"""Kira — AI body double backend Cheapest pipeline: gpt-4o-mini-transcribe STT → gpt-5.4-nano LLM → OpenAI TTS ~$0.019/min total, simple 3-step chat completions. """ import json import base64 import uuid import logging from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from config import settings from services.memory import kira_memory logging.basicConfig(level=logging.INFO) logger = logging.getLogger("kira") app = FastAPI(title="Kira Backend") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) BASE_SYSTEM_PROMPT = ( "You are Kira, a warm, kind, and encouraging AI body double. " "You speak in a friendly, girly-pop tone. You are helping someone with ADHD " "stay focused and on task. Keep responses short, supportive, and uplifting. " "Check in on them. Remind them to take breaks. Celebrate small wins. " "Use occasional emoji but don't overdo it. Never be judgmental." ) _openai = None def get_openai(): global _openai if _openai is None: from openai import AsyncOpenAI _openai = AsyncOpenAI(api_key=settings.openai_api_key) return _openai @app.on_event("startup") async def startup(): if kira_memory.init(): logger.info("Honcho memory initialized") else: logger.info("Honcho memory not configured") @app.get("/api/health") async def health(): mem_status = "active" if kira_memory.enabled else "disabled" return {"status": "ok", "name": "kira", "memory": mem_status} async def run_conversation(text: str, memory_suffix: str = "") -> str: """LLM call with optional Honcho memory context injected into system prompt.""" system_prompt = BASE_SYSTEM_PROMPT if memory_suffix: system_prompt += memory_suffix client = get_openai() resp = await client.chat.completions.create( model="gpt-5.4-nano", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": text}, ], max_completion_tokens=300, temperature=0.7, ) return resp.choices[0].message.content or "Mhm, I'm here!" async def transcribe_audio(audio_bytes: bytes) -> str | None: """Transcribe Opus/webm audio using cheapest STT model.""" client = get_openai() try: transcript = await client.audio.transcriptions.create( model="gpt-4o-mini-transcribe", file=("audio.webm", audio_bytes, "audio/webm"), response_format="text", ) return transcript.strip() if transcript and transcript.strip() else None except Exception as e: logger.warning(f"STT error: {e}") return None async def synthesize_speech(text: str, websocket, speaking_start_sent: bool = False) -> None: """Generate TTS audio from text, streaming chunks to the client.""" client = get_openai() try: async with client.audio.speech.with_streaming_response.create( model="tts-1", voice="nova", input=text, response_format="opus", ) as resp: async for chunk in resp.iter_bytes(): if chunk: audio_b64 = base64.b64encode(chunk).decode("utf-8") await websocket.send_json({"type": "audio", "data": audio_b64, "text": text if speaking_start_sent else ""}) speaking_start_sent = True except Exception as e: logger.warning(f"TTS error: {e}") @app.websocket("/api/ws") async def conversation_ws(websocket: WebSocket): await websocket.accept() session_id = str(uuid.uuid4())[:8] user_id = "default-user" identified = False memory_suffix = "" logger.info(f"[{session_id}] WebSocket connected") audio_buffer = bytearray() conversation_history: list[dict] = [] try: while True: raw = await websocket.receive_text() msg = json.loads(raw) msg_type = msg.get("type", "") # ── Identity & Preferences ── if msg_type == "identify": user_id = msg.get("user_id", "").strip() user_name = msg.get("name", "").strip() if user_name and user_id: kira_memory.set_user_preference(user_id, "name", user_name) prefs = kira_memory.get_user_preferences(user_id) identified = True if kira_memory.enabled: kira_memory.ensure_peers(user_id) kira_memory.ensure_session(session_id) # Build memory context ONCE on identify (not per-turn — too slow) try: ctx = kira_memory.build_system_prompt_suffix() if ctx: memory_suffix = ctx except Exception: pass await websocket.send_json({ "type": "identified", "user_id": user_id, "preferences": prefs, }) continue if msg_type == "set_preference": key = msg.get("key", "").strip() value = msg.get("value", "").strip() if key and user_id and user_id != "default-user": kira_memory.set_user_preference(user_id, key, value) await websocket.send_json({ "type": "preference_saved", "key": key, "success": True, }) continue # ── Conversation ── if msg_type == "audio_chunk": chunk = base64.b64decode(msg["data"]) audio_buffer.extend(chunk) elif msg_type == "transcribe": if not audio_buffer: await websocket.send_json({"type": "error", "message": "No audio data"}) continue import time t0 = time.time() logger.info(f"[{session_id}] Transcribing {len(audio_buffer)} bytes...") # 1. STT transcript = await transcribe_audio(bytes(audio_buffer)) t1 = time.time() audio_buffer.clear() if not transcript: await websocket.send_json({"type": "error", "message": "Could not transcribe"}) continue logger.info(f"[{session_id}] STT took {t1-t0:.1f}s") await websocket.send_json({"type": "transcript", "role": "user", "text": transcript}) conversation_history.append({"role": "user", "content": transcript}) # 2. LLM (uses cached memory_suffix from identify) logger.info(f"[{session_id}] User: {transcript}") kira_text = await run_conversation(transcript, memory_suffix) t2 = time.time() logger.info(f"[{session_id}] LLM took {t2-t1:.1f}s") conversation_history.append({"role": "assistant", "content": kira_text}) logger.info(f"[{session_id}] Kira: {kira_text}") if kira_memory.enabled and identified: try: kira_memory.store_messages(transcript, kira_text) except Exception: pass # 3. TTS await websocket.send_json({"type": "speaking_start", "text": kira_text}) await synthesize_speech(kira_text, websocket) t3 = time.time() logger.info(f"[{session_id}] TTS took {t3-t2:.1f}s. Total: {t3-t0:.1f}s") await websocket.send_json({"type": "speaking_end"}) elif msg_type == "conversation_text": user_text = msg.get("text", "").strip() if not user_text: continue conversation_history.append({"role": "user", "content": user_text}) logger.info(f"[{session_id}] User (text): {user_text}") kira_text = await run_conversation(user_text, memory_suffix) conversation_history.append({"role": "assistant", "content": kira_text}) logger.info(f"[{session_id}] Kira: {kira_text}") if kira_memory.enabled and identified: try: kira_memory.store_messages(user_text, kira_text) except Exception: pass await websocket.send_json({"type": "speaking_start", "text": kira_text}) await synthesize_speech(kira_text, websocket) await websocket.send_json({"type": "speaking_end"}) elif msg_type == "ping": await websocket.send_json({"type": "pong"}) except WebSocketDisconnect: logger.info(f"[{session_id}] Disconnected") except Exception as e: logger.error(f"[{session_id}] Error: {e}") try: await websocket.send_json({"type": "error", "message": str(e)}) except Exception: pass