init: Kira — AI body double with Honcho memory

Full voice pipeline (Whisper STT -> DeepSeek LLM -> OpenAI TTS), animated SVG avatar (Live2D-ready), girly-pop UI, lofi music, timer/notes/pets/wardrobe widgets, 10 background scenes with particle effects, Honcho cross-session memory.
2026-06-04 10:51:38 -04:00
commit 97424cb98f
47 changed files with 5691 additions and 0 deletions
@@ -0,0 +1,217 @@
+"""Kira — AI body double backend
+
+Real-time speech-to-speech pipeline:
+  mic audio → Whisper API → text → DeepSeek LLM → response text → OpenAI TTS → audio
+
+Honcho memory integration:
+  Cross-session user context injected into LLM prompts,
+  conversation exchanges stored for continuous learning.
+"""
+
+import json
+import base64
+import uuid
+import logging
+
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.middleware.cors import CORSMiddleware
+
+from config import settings
+from services.stt import transcribe_audio
+from services.llm import get_kira_response
+from services.tts import synthesize_speech
+from services.memory import kira_memory
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("kira")
+
+app = FastAPI(title="Kira Backend")
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# ─── Base system prompt (static part) ───
+BASE_SYSTEM_PROMPT = (
+    "You are Kira, a warm, kind, and encouraging AI body double. "
+    "You speak in a friendly, girly-pop tone. You are helping someone with ADHD "
+    "stay focused and on task. Keep responses short, supportive, and uplifting. "
+    "Check in on them. Remind them to take breaks. Celebrate small wins. "
+    "Use occasional emoji but don't overdo it. Never be judgmental. "
+    "You remember things about them between conversations."
+)
+
+
+@app.on_event("startup")
+async def startup():
+    """Initialize Honcho memory on app startup."""
+    if kira_memory.init():
+        logger.info("Honcho memory initialized")
+    else:
+        logger.info("Honcho memory not configured — running without memory")
+
+
+@app.get("/api/health")
+async def health():
+    mem_status = "active" if kira_memory.enabled else "disabled"
+    return {"status": "ok", "name": "kira", "memory": mem_status}
+
+
+def build_system_prompt(user_id: str) -> dict:
+    """Build system prompt with Honcho memory context injected."""
+    base = BASE_SYSTEM_PROMPT
+
+    # Append memory context if Honcho is available
+    if kira_memory.enabled:
+        try:
+            # Get user-specific context from Honcho
+            kira_memory.ensure_peers(user_id)
+            memory_suffix = kira_memory.build_system_prompt_suffix()
+            if memory_suffix:
+                base += memory_suffix
+        except Exception as e:
+            logger.warning(f"Failed to build memory context: {e}")
+
+    return {"role": "system", "content": base}
+
+
+@app.websocket("/api/ws")
+async def conversation_ws(websocket: WebSocket):
+    await websocket.accept()
+    session_id = str(uuid.uuid4())[:8]
+    user_id = "default-user"
+    logger.info(f"[{session_id}] WebSocket connected")
+
+    # Audio buffer accumulates chunks from one utterance
+    audio_buffer = bytearray()
+    conversation_history: list[dict] = []
+
+    # Initialize Honcho for this session
+    if kira_memory.enabled:
+        try:
+            kira_memory.ensure_peers(user_id)
+            kira_memory.ensure_session(session_id)
+            logger.info(f"[{session_id}] Honcho session ready")
+        except Exception as e:
+            logger.warning(f"[{session_id}] Honcho setup failed: {e}")
+
+    try:
+        first_message = True
+
+        while True:
+            raw = await websocket.receive_text()
+            msg = json.loads(raw)
+            msg_type = msg.get("type", "")
+
+            # Build system prompt fresh each turn to get latest Honcho context
+            system_prompt = build_system_prompt(user_id)
+
+            if msg_type == "audio_chunk":
+                chunk = base64.b64decode(msg["data"])
+                audio_buffer.extend(chunk)
+
+            elif msg_type == "transcribe":
+                if not audio_buffer:
+                    await websocket.send_json({"type": "error", "message": "No audio data"})
+                    continue
+
+                logger.info(f"[{session_id}] Transcribing {len(audio_buffer)} bytes...")
+
+                # 1. Speech-to-text
+                transcript = await transcribe_audio(bytes(audio_buffer))
+                audio_buffer.clear()
+
+                if not transcript:
+                    await websocket.send_json({"type": "error", "message": "Could not transcribe audio"})
+                    continue
+
+                # Echo transcript
+                await websocket.send_json({
+                    "type": "transcript",
+                    "text": transcript,
+                })
+
+                # 2. LLM call
+                logger.info(f"[{session_id}] User: {transcript}")
+                user_msg = {"role": "user", "content": transcript}
+                conversation_history.append(user_msg)
+
+                messages = [system_prompt] + conversation_history[-10:]
+                kira_text = await get_kira_response(messages)
+
+                assistant_msg = {"role": "assistant", "content": kira_text}
+                conversation_history.append(assistant_msg)
+                logger.info(f"[{session_id}] Kira: {kira_text}")
+
+                # 3. Store in Honcho
+                if kira_memory.enabled:
+                    try:
+                        kira_memory.store_messages(transcript, kira_text)
+                    except Exception as e:
+                        logger.warning(f"[{session_id}] Failed to store messages: {e}")
+
+                # 4. TTS
+                await websocket.send_json({
+                    "type": "speaking_start",
+                    "text": kira_text,
+                })
+
+                audio_bytes = await synthesize_speech(kira_text)
+                audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+                await websocket.send_json({
+                    "type": "audio",
+                    "data": audio_b64,
+                    "text": kira_text,
+                })
+
+                await websocket.send_json({"type": "speaking_end"})
+
+            elif msg_type == "ping":
+                await websocket.send_json({"type": "pong"})
+
+            elif msg_type == "conversation_text":
+                user_text = msg.get("text", "").strip()
+                if not user_text:
+                    continue
+
+                logger.info(f"[{session_id}] User (text): {user_text}")
+                user_msg = {"role": "user", "content": user_text}
+                conversation_history.append(user_msg)
+
+                messages = [system_prompt] + conversation_history[-10:]
+                kira_text = await get_kira_response(messages)
+
+                assistant_msg = {"role": "assistant", "content": kira_text}
+                conversation_history.append(assistant_msg)
+                logger.info(f"[{session_id}] Kira: {kira_text}")
+
+                # Store in Honcho
+                if kira_memory.enabled:
+                    try:
+                        kira_memory.store_messages(user_text, kira_text)
+                    except Exception as e:
+                        logger.warning(f"[{session_id}] Failed to store messages: {e}")
+
+                # TTS
+                await websocket.send_json({"type": "speaking_start", "text": kira_text})
+                audio_bytes = await synthesize_speech(kira_text)
+                audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+                await websocket.send_json({
+                    "type": "audio",
+                    "data": audio_b64,
+                    "text": kira_text,
+                })
+                await websocket.send_json({"type": "speaking_end"})
+
+    except WebSocketDisconnect:
+        logger.info(f"[{session_id}] Disconnected")
+    except Exception as e:
+        logger.error(f"[{session_id}] Error: {e}")
+        try:
+            await websocket.send_json({"type": "error", "message": str(e)})
+        except Exception:
+            pass