"""Kira — AI body double backend REST STT (gpt-4o-transcribe) → gpt-5.4-nano + Honcho → streaming TTS (sage) """ import json import base64 import uuid import logging import asyncio from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from config import settings from services.memory import kira_memory logging.basicConfig(level=logging.INFO) logger = logging.getLogger("kira") app = FastAPI(title="Kira Backend") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) BASE_SYSTEM_PROMPT = ( "You are Kira, a warm, kind, and encouraging AI body double. " "You speak in a friendly, girly-pop tone. You are helping someone with ADHD " "stay focused and on task. Keep responses short, supportive, and uplifting. " "Check in on them. Remind them to take breaks. Celebrate small wins. " "Use occasional emoji but don't overdo it. Never be judgmental." ) _openai = None def get_openai(): global _openai if _openai is None: from openai import AsyncOpenAI _openai = AsyncOpenAI(api_key=settings.openai_api_key) return _openai @app.on_event("startup") async def startup(): if kira_memory.init(): logger.info("Honcho memory initialized") else: logger.info("Honcho memory not configured") @app.get("/api/health") async def health(): mem_status = "active" if kira_memory.enabled else "disabled" return {"status": "ok", "name": "kira", "memory": mem_status} async def transcribe_audio(client, audio_b64: str) -> str | None: """REST transcription via gpt-4o-transcribe (full utterance blob).""" try: audio_bytes = base64.b64decode(audio_b64) import io audio_file = io.BytesIO(audio_bytes) audio_file.name = "audio.webm" transcript = await client.audio.transcriptions.create( model="gpt-4o-transcribe", file=audio_file, ) return transcript.text.strip() if transcript.text else None except Exception as e: logger.error(f"Transcription error: {e}") return None async def get_kira_response(client, user_text: str, memory_suffix: str) -> str: """Get Kira's response from gpt-5.4-nano.""" system_prompt = BASE_SYSTEM_PROMPT if memory_suffix: system_prompt += memory_suffix resp = await client.chat.completions.create( model="gpt-5.4-nano", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_text}, ], max_completion_tokens=300, temperature=0.7, ) return resp.choices[0].message.content or "Mhm, I'm here! ✨" async def stream_tts(client, text: str, websocket: WebSocket): """Stream TTS audio as Opus chunks over WebSocket.""" await websocket.send_json({"type": "speaking_start", "text": text}) async with client.audio.speech.with_streaming_response.create( model="tts-1", voice="sage", input=text, response_format="opus", ) as tts_resp: async for chunk in tts_resp.iter_bytes(): if chunk: b64 = base64.b64encode(chunk).decode("utf-8") await websocket.send_json({"type": "audio", "data": b64}) await websocket.send_json({"type": "speaking_end"}) @app.websocket("/api/ws") async def conversation_ws(websocket: WebSocket): await websocket.accept() session_id = str(uuid.uuid4())[:8] user_id = "default-user" identified = False memory_suffix = "" logger.info(f"[{session_id}] WebSocket connected") conversation_history: list[dict] = [] try: while True: raw = await websocket.receive_text() msg = json.loads(raw) msg_type = msg.get("type", "") # ── Identity & Preferences ── if msg_type == "identify": user_id = msg.get("user_id", "").strip() user_name = msg.get("name", "").strip() if user_name and user_id: kira_memory.set_user_preference(user_id, "name", user_name) prefs = kira_memory.get_user_preferences(user_id) identified = True if kira_memory.enabled: kira_memory.ensure_peers(user_id) kira_memory.ensure_session(session_id) try: ctx = kira_memory.build_system_prompt_suffix() if ctx: memory_suffix = ctx except Exception: pass await websocket.send_json({ "type": "identified", "user_id": user_id, "preferences": prefs, }) continue if msg_type == "set_preference": key = msg.get("key", "").strip() value = msg.get("value", "").strip() if key and user_id and user_id != "default-user": kira_memory.set_user_preference(user_id, key, value) await websocket.send_json({"type": "preference_saved", "key": key, "success": True}) continue # ── Audio (full webm/opus blob from MediaRecorder) → REST STT → LLM → TTS ── if msg_type == "audio": audio_b64 = msg.get("data", "") if not audio_b64: continue client = get_openai() # STT (REST) transcript = await transcribe_audio(client, audio_b64) if not transcript: await websocket.send_json({"type": "transcript", "role": "user", "text": "(could not transcribe)"}) await websocket.send_json({"type": "error", "message": "Could not transcribe audio"}) continue logger.info(f"[{session_id}] User: {transcript}") await websocket.send_json({"type": "transcript_delta", "text": transcript}) await websocket.send_json({"type": "transcript", "role": "user", "text": transcript}) conversation_history.append({"role": "user", "content": transcript}) # LLM kira_text = await get_kira_response(client, transcript, memory_suffix) conversation_history.append({"role": "assistant", "content": kira_text}) logger.info(f"[{session_id}] Kira: {kira_text}") # Store in Honcho if kira_memory.enabled and identified: try: kira_memory.store_messages(transcript, kira_text) except Exception: pass # TTS (streaming) await stream_tts(client, kira_text, websocket) continue # ── Text input → direct LLM + TTS ── if msg_type == "conversation_text": text = msg.get("text", "").strip() if not text: continue logger.info(f"[{session_id}] User (text): {text}") conversation_history.append({"role": "user", "content": text}) client = get_openai() kira_text = await get_kira_response(client, text, memory_suffix) conversation_history.append({"role": "assistant", "content": kira_text}) logger.info(f"[{session_id}] Kira: {kira_text}") if kira_memory.enabled and identified: try: kira_memory.store_messages(text, kira_text) except Exception: pass await stream_tts(client, kira_text, websocket) continue if msg_type == "ping": await websocket.send_json({"type": "pong"}) except WebSocketDisconnect: logger.info(f"[{session_id}] Disconnected") except Exception as e: logger.error(f"[{session_id}] Error: {e}")