diff --git a/backend/main.py b/backend/main.py
index a54f000..b1d4d59 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -95,20 +95,23 @@ async def transcribe_audio(audio_bytes: bytes) -> str | None:
         return None
 
 
-async def synthesize_speech(text: str) -> bytes:
-    """Generate TTS audio from text."""
+async def synthesize_speech(text: str, websocket, speaking_start_sent: bool = False) -> None:
+    """Generate TTS audio from text, streaming chunks to the client."""
     client = get_openai()
     try:
-        resp = await client.audio.speech.create(
+        async with client.audio.speech.with_streaming_response.create(
             model="tts-1",
             voice="nova",
             input=text,
             response_format="opus",
-        )
-        return resp.content
+        ) as resp:
+            async for chunk in resp.iter_bytes():
+                if chunk:
+                    audio_b64 = base64.b64encode(chunk).decode("utf-8")
+                    await websocket.send_json({"type": "audio", "data": audio_b64, "text": text if speaking_start_sent else ""})
+                    speaking_start_sent = True
     except Exception as e:
         logger.warning(f"TTS error: {e}")
-        return b""
 
 
 @app.websocket("/api/ws")
@@ -213,11 +216,9 @@ async def conversation_ws(websocket: WebSocket):
 
                 # 3. TTS
                 await websocket.send_json({"type": "speaking_start", "text": kira_text})
-                audio_bytes = await synthesize_speech(kira_text)
+                await synthesize_speech(kira_text, websocket)
                 t3 = time.time()
                 logger.info(f"[{session_id}] TTS took {t3-t2:.1f}s. Total: {t3-t0:.1f}s")
-                audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
-                await websocket.send_json({"type": "audio", "data": audio_b64, "text": kira_text})
                 await websocket.send_json({"type": "speaking_end"})
 
             elif msg_type == "conversation_text":
@@ -239,9 +240,7 @@ async def conversation_ws(websocket: WebSocket):
                         pass
 
                 await websocket.send_json({"type": "speaking_start", "text": kira_text})
-                audio_bytes = await synthesize_speech(kira_text)
-                audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
-                await websocket.send_json({"type": "audio", "data": audio_b64, "text": kira_text})
+                await synthesize_speech(kira_text, websocket)
                 await websocket.send_json({"type": "speaking_end"})
 
             elif msg_type == "ping":
diff --git a/frontend/src/hooks/useConversation.ts b/frontend/src/hooks/useConversation.ts
index 8f90e72..5763bef 100644
--- a/frontend/src/hooks/useConversation.ts
+++ b/frontend/src/hooks/useConversation.ts
@@ -44,6 +44,7 @@ export function useConversation() {
   const audioRef = useRef<HTMLAudioElement | null>(null);
   const recorderRef = useRef<MediaRecorder | null>(null);
   const streamRef = useRef<MediaStream | null>(null);
+  const audioBufferRef = useRef<Uint8Array[]>([]);
 
   // Connect WebSocket
   const connect = useCallback(() => {
@@ -115,23 +116,36 @@ export function useConversation() {
         break;
 
       case 'audio': {
-        // Incoming Opus audio from TTS (full response, not streamed)
-        if (msg.data && audioRef.current) {
+        // Incoming Opus audio chunk from streaming TTS
+        if (msg.data) {
           const binary = atob(msg.data);
           const bytes = new Uint8Array(binary.length);
           for (let i = 0; i < binary.length; i++) {
             bytes[i] = binary.charCodeAt(i);
           }
-          const blob = new Blob([bytes], { type: 'audio/ogg' });
-          const url = URL.createObjectURL(blob);
-          audioRef.current.src = url;
-          audioRef.current.play().catch(() => {});
+          audioBufferRef.current.push(bytes);
         }
         break;
       }
 
       case 'speaking_end':
         setIsKiraSpeaking(false);
+        // Play all accumulated chunks as one blob
+        if (audioBufferRef.current.length > 0 && audioRef.current) {
+          const allChunks = audioBufferRef.current;
+          const totalLen = allChunks.reduce((s, c) => s + c.length, 0);
+          const combined = new Uint8Array(totalLen);
+          let offset = 0;
+          for (const chunk of allChunks) {
+            combined.set(chunk, offset);
+            offset += chunk.length;
+          }
+          audioBufferRef.current = [];
+          const blob = new Blob([combined], { type: 'audio/ogg' });
+          const url = URL.createObjectURL(blob);
+          audioRef.current.src = url;
+          audioRef.current.play().catch(() => {});
+        }
         break;
 
       case 'interruption':