diff --git a/backend/main.py b/backend/main.py
index 85ebc11..c5abf76 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -8,7 +8,6 @@ import json
 import base64
 import uuid
 import logging
-import asyncio
 
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
@@ -29,7 +28,6 @@ app.add_middleware(
     allow_headers=["*"],
 )
 
-# System prompt
 BASE_SYSTEM_PROMPT = (
     "You are Kira, a warm, kind, and encouraging AI body double. "
     "You speak in a friendly, girly-pop tone. You are helping someone with ADHD "
@@ -63,25 +61,13 @@ async def health():
     return {"status": "ok", "name": "kira", "memory": mem_status}
 
 
-def build_system_prompt(user_id: str) -> str:
-    prompt = BASE_SYSTEM_PROMPT
-    if kira_memory.enabled:
-        try:
-            kira_memory.ensure_peers(user_id)
-            suffix = kira_memory.build_system_prompt_suffix()
-            if suffix:
-                prompt += suffix
-        except Exception as e:
-            logger.warning(f"Memory context failed: {e}")
-    return prompt
+async def run_conversation(text: str, memory_suffix: str = "") -> str:
+    """LLM call with optional Honcho memory context injected into system prompt."""
+    system_prompt = BASE_SYSTEM_PROMPT
+    if memory_suffix:
+        system_prompt += memory_suffix
 
-
-async def run_conversation(text: str, user_id: str) -> str:
-    """STT → LLM → TTS using the cheapest models."""
-    system_prompt = build_system_prompt(user_id)
     client = get_openai()
-
-    # LLM
     resp = await client.chat.completions.create(
         model="gpt-5.4-nano",
         messages=[
@@ -91,8 +77,7 @@ async def run_conversation(text: str, user_id: str) -> str:
         max_completion_tokens=300,
         temperature=0.7,
     )
-    kira_text = resp.choices[0].message.content or "Mhm, I'm here!"
-    return kira_text
+    return resp.choices[0].message.content or "Mhm, I'm here!"
 
 
 async def transcribe_audio(audio_bytes: bytes) -> str | None:
@@ -126,41 +111,13 @@ async def synthesize_speech(text: str) -> bytes:
         return b""
 
 
-def _pcm16_to_wav(pcm_data: bytes) -> bytes:
-    """Wrap raw PCM16 mono 24kHz data in a WAV container."""
-    import struct
-    num_channels = 1
-    sample_rate = 24000
-    bits_per_sample = 16
-    byte_rate = sample_rate * num_channels * (bits_per_sample // 8)
-    block_align = num_channels * (bits_per_sample // 8)
-    data_size = len(pcm_data)
-    header_size = 44
-    total_size = header_size + data_size
-
-    header = b"RIFF"
-    header += struct.pack("<I", total_size - 8)
-    header += b"WAVE"
-    header += b"fmt "
-    header += struct.pack("<I", 16)           # subchunk size
-    header += struct.pack("<H", 1)            # PCM format
-    header += struct.pack("<H", num_channels)
-    header += struct.pack("<I", sample_rate)
-    header += struct.pack("<I", byte_rate)
-    header += struct.pack("<H", block_align)
-    header += struct.pack("<H", bits_per_sample)
-    header += b"data"
-    header += struct.pack("<I", data_size)
-
-    return header + pcm_data
-
-
 @app.websocket("/api/ws")
 async def conversation_ws(websocket: WebSocket):
     await websocket.accept()
     session_id = str(uuid.uuid4())[:8]
     user_id = "default-user"
     identified = False
+    memory_suffix = ""
     logger.info(f"[{session_id}] WebSocket connected")
 
     audio_buffer = bytearray()
@@ -185,6 +142,13 @@ async def conversation_ws(websocket: WebSocket):
                 if kira_memory.enabled:
                     kira_memory.ensure_peers(user_id)
                     kira_memory.ensure_session(session_id)
+                    # Build memory context ONCE on identify (not per-turn — too slow)
+                    try:
+                        ctx = kira_memory.build_system_prompt_suffix()
+                        if ctx:
+                            memory_suffix = ctx
+                    except Exception:
+                        pass
 
                 await websocket.send_json({
                     "type": "identified",
@@ -207,7 +171,6 @@ async def conversation_ws(websocket: WebSocket):
 
             # ── Conversation ──
             if msg_type == "audio_chunk":
-                # Single Opus/webm blob from MediaRecorder
                 chunk = base64.b64decode(msg["data"])
                 audio_buffer.extend(chunk)
 
@@ -229,13 +192,12 @@ async def conversation_ws(websocket: WebSocket):
                 await websocket.send_json({"type": "transcript", "role": "user", "text": transcript})
                 conversation_history.append({"role": "user", "content": transcript})
 
-                # 2. LLM
+                # 2. LLM (uses cached memory_suffix from identify)
                 logger.info(f"[{session_id}] User: {transcript}")
-                kira_text = await run_conversation(transcript, user_id)
+                kira_text = await run_conversation(transcript, memory_suffix)
                 conversation_history.append({"role": "assistant", "content": kira_text})
                 logger.info(f"[{session_id}] Kira: {kira_text}")
 
-                # Store in Honcho
                 if kira_memory.enabled and identified:
                     try:
                         kira_memory.store_messages(transcript, kira_text)
@@ -257,7 +219,7 @@ async def conversation_ws(websocket: WebSocket):
                 conversation_history.append({"role": "user", "content": user_text})
                 logger.info(f"[{session_id}] User (text): {user_text}")
 
-                kira_text = await run_conversation(user_text, user_id)
+                kira_text = await run_conversation(user_text, memory_suffix)
                 conversation_history.append({"role": "assistant", "content": kira_text})
                 logger.info(f"[{session_id}] Kira: {kira_text}")