"""Kira — AI body double backend Hybrid pipeline: gpt-realtime-whisper (streaming STT) → gpt-5.4-nano (LLM) → OpenAI TTS """ import json import base64 import uuid import logging import asyncio from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from config import settings from services.hybrid import HybridPipeline from services.memory import kira_memory logging.basicConfig(level=logging.INFO) logger = logging.getLogger("kira") app = FastAPI(title="Kira Backend") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.on_event("startup") async def startup(): if kira_memory.init(): logger.info("Honcho memory initialized") else: logger.info("Honcho memory not configured") @app.get("/api/health") async def health(): mem_status = "active" if kira_memory.enabled else "disabled" return {"status": "ok", "name": "kira", "memory": mem_status} @app.websocket("/api/ws") async def conversation_ws(websocket: WebSocket): await websocket.accept() session_id = str(uuid.uuid4())[:8] user_id = "default-user" identified = False logger.info(f"[{session_id}] WebSocket connected") pending_transcripts: list[str] = [] pipeline: HybridPipeline | None = None pipeline_task: asyncio.Task | None = None pipeline_ready = asyncio.Event() audio_queue: asyncio.Queue[bytes] = asyncio.Queue() text_queue: asyncio.Queue[str] = asyncio.Queue() memory_suffix = "" async def on_ready(): pipeline_ready.set() logger.info(f"[{session_id}] Pipeline ready") async def on_transcript_delta(delta: str): """Streaming partial transcript.""" await websocket.send_json({"type": "transcript_delta", "text": delta}) async def on_transcript_done(full: str): """Full utterance received.""" await websocket.send_json({"type": "transcript", "role": "user", "text": full}) async def on_audio_delta(audio_bytes: bytes): """Forward TTS audio to client.""" try: audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") await websocket.send_json({"type": "audio", "data": audio_b64}) except Exception: pass async def on_speech_start(): await websocket.send_json({"type": "speaking_start"}) async def on_speech_end(): await websocket.send_json({"type": "speaking_end"}) async def on_error(msg: str): await websocket.send_json({"type": "error", "message": msg}) # Create pipeline pipeline = HybridPipeline( on_transcript_delta=on_transcript_delta, on_transcript_done=on_transcript_done, on_audio_delta=on_audio_delta, on_speech_start=on_speech_start, on_speech_end=on_speech_end, on_ready=on_ready, on_error=on_error, memory_suffix=memory_suffix, ) pipeline_task = asyncio.create_task(pipeline.connect()) try: await asyncio.wait_for(pipeline_ready.wait(), timeout=15) except asyncio.TimeoutError: logger.error(f"[{session_id}] Pipeline failed to connect") await websocket.send_json({"type": "error", "message": "Failed to connect to AI"}) pipeline_task.cancel() return # Forward audio/text from client to pipeline async def forward_audio(): while pipeline and pipeline._connected: try: pcm16 = await asyncio.wait_for(audio_queue.get(), timeout=1) await pipeline.send_audio(pcm16) except asyncio.TimeoutError: continue except Exception: break async def forward_text(): while pipeline and pipeline._connected: try: text = await asyncio.wait_for(text_queue.get(), timeout=1) await pipeline.send_text(text) # Store in Honcho if kira_memory.enabled and identified: kira_memory.store_user_message(text) except asyncio.TimeoutError: continue except Exception: break fwd_audio = asyncio.create_task(forward_audio()) fwd_text = asyncio.create_task(forward_text()) try: while True: raw = await websocket.receive_text() msg = json.loads(raw) msg_type = msg.get("type", "") # ── Identity ── if msg_type == "identify": user_id = msg.get("user_id", "").strip() user_name = msg.get("name", "").strip() if user_name and user_id: kira_memory.set_user_preference(user_id, "name", user_name) prefs = kira_memory.get_user_preferences(user_id) identified = True if kira_memory.enabled: kira_memory.ensure_peers(user_id) kira_memory.ensure_session(session_id) # Build memory context and update pipeline if kira_memory.enabled: try: ctx = kira_memory.build_system_prompt_suffix() if ctx: pipeline._memory_suffix = ctx memory_suffix = ctx except Exception: pass await websocket.send_json({ "type": "identified", "user_id": user_id, "preferences": prefs, }) continue # ── Preferences ── if msg_type == "set_preference": key = msg.get("key", "").strip() value = msg.get("value", "").strip() if key and user_id and user_id != "default-user": kira_memory.set_user_preference(user_id, key, value) continue # ── Audio (PCM16) ── if msg_type == "audio": audio_b64 = msg.get("data", "") if audio_b64: pcm16 = base64.b64decode(audio_b64) await audio_queue.put(pcm16) continue # ── Text input ── if msg_type == "conversation_text": text = msg.get("text", "").strip() if text: await text_queue.put(text) continue if msg_type == "ping": await websocket.send_json({"type": "pong"}) except WebSocketDisconnect: logger.info(f"[{session_id}] Disconnected") except Exception as e: logger.error(f"[{session_id}] Error: {e}") finally: fwd_audio.cancel() fwd_text.cancel() if pipeline: await pipeline.disconnect() if pipeline_task: pipeline_task.cancel()