kira/backend/main.py

"""Kira — AI body double backend

OpenAI Realtime API pipeline:
  mic audio → [built-in STT → GPT-4o-mini → built-in TTS] → speaker audio
                    Single WebSocket, ~300-800ms latency
"""

import json
import base64
import uuid
import logging
import asyncio

from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware

from config import settings
from services.realtime import RealtimeRelay
from services.memory import kira_memory

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("kira")

app = FastAPI(title="Kira Backend")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.on_event("startup")
async def startup():
    if kira_memory.init():
        logger.info("Honcho memory initialized")
    else:
        logger.info("Honcho memory not configured")


@app.get("/api/health")
async def health():
    mem_status = "active" if kira_memory.enabled else "disabled"
    return {"status": "ok", "name": "kira", "memory": mem_status}


@app.websocket("/api/ws")
async def conversation_ws(websocket: WebSocket):
    await websocket.accept()
    session_id = str(uuid.uuid4())[:8]
    user_id = "default-user"
    identified = False
    logger.info(f"[{session_id}] WebSocket connected")

    # Track conversation for Honcho
    pending_transcripts: list[tuple[str, str]] = []

    # Will be set when Realtime relay is ready
    relay_ready = asyncio.Event()
    relay: RealtimeRelay | None = None
    relay_task: asyncio.Task | None = None
    audio_queue: asyncio.Queue[bytes] = asyncio.Queue()
    text_queue: asyncio.Queue[str] = asyncio.Queue()

    async def on_ready():
        relay_ready.set()
        logger.info(f"[{session_id}] Realtime relay ready")

    async def on_audio_delta(audio_bytes: bytes):
        """Forward audio chunks from OpenAI to the client."""
        try:
            audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
            await websocket.send_json({
                "type": "audio",
                "data": audio_b64,
            })
        except Exception:
            pass

    async def on_transcript(text: str):
        """Store transcripts for Honcho."""
        pending_transcripts.append(("transcript", text))
        role, content = text.split(": ", 1)
        logger.info(f"[{session_id}] {role}: {content}")
        await websocket.send_json({
            "type": "transcript",
            "role": role,
            "text": content,
        })

    async def on_speech_started():
        """Kira started speaking."""
        await websocket.send_json({"type": "speaking_start"})

    async def on_speech_stopped():
        """Kira finished speaking."""
        await websocket.send_json({"type": "speaking_end"})

    async def on_interruption():
        """User interrupted — Kira stops speaking."""
        await websocket.send_json({"type": "interruption"})

    async def on_error(msg: str):
        await websocket.send_json({"type": "error", "message": msg})

    # ── Create and start the Realtime relay ──
    relay = RealtimeRelay(
        on_audio_delta=on_audio_delta,
        on_transcript=on_transcript,
        on_speech_started=on_speech_started,
        on_speech_stopped=on_speech_stopped,
        on_interruption=on_interruption,
        on_error=on_error,
        on_ready=on_ready,
    )

    relay_task = asyncio.create_task(relay.connect())

    # Wait for relay to be ready
    try:
        await asyncio.wait_for(relay_ready.wait(), timeout=15)
    except asyncio.TimeoutError:
        logger.error(f"[{session_id}] Realtime relay failed to connect")
        await websocket.send_json({"type": "error", "message": "Failed to connect to AI"})
        relay_task.cancel()
        return

    # ── Forward audio/text from client to relay ──
    async def forward_audio():
        while relay and relay._connected:
            try:
                pcm16 = await asyncio.wait_for(audio_queue.get(), timeout=1)
                await relay.send_audio(pcm16)
            except asyncio.TimeoutError:
                continue
            except Exception:
                break

    async def forward_text():
        while relay and relay._connected:
            try:
                text = await asyncio.wait_for(text_queue.get(), timeout=1)
                await relay.send_text(text)
            except asyncio.TimeoutError:
                continue
            except Exception:
                break

    fwd_audio_task = asyncio.create_task(forward_audio())
    fwd_text_task = asyncio.create_task(forward_text())

    try:
        while True:
            raw = await websocket.receive_text()
            msg = json.loads(raw)
            msg_type = msg.get("type", "")

            # ── Identity ──
            if msg_type == "identify":
                user_id = msg.get("user_id", "").strip()
                user_name = msg.get("name", "").strip()
                if user_name and user_id:
                    kira_memory.set_user_preference(user_id, "name", user_name)

                prefs = kira_memory.get_user_preferences(user_id)
                identified = True

                if kira_memory.enabled:
                    kira_memory.ensure_peers(user_id)
                    kira_memory.ensure_session(session_id)

                # Inject Honcho context into the Realtime session instructions
                memory_suffix = ""
                if kira_memory.enabled:
                    try:
                        ctx = kira_memory.build_system_prompt_suffix()
                        if ctx:
                            memory_suffix = ctx
                    except Exception:
                        pass

                if relay and relay._connected and memory_suffix:
                    await relay._send({
                        "type": "session.update",
                        "session": {
                            "instructions": (
                                "You are Kira, a warm, kind, and encouraging AI body double. "
                                "Speak in a friendly, girly-pop tone. Help someone with ADHD "
                                "stay focused. Keep responses short and supportive. "
                                "Check in, remind breaks, celebrate wins. Never judgmental."
                                + memory_suffix
                            ),
                        },
                    })

                await websocket.send_json({
                    "type": "identified",
                    "user_id": user_id,
                    "preferences": prefs,
                })
                continue

            # ── Preferences ──
            if msg_type == "set_preference":
                key = msg.get("key", "").strip()
                value = msg.get("value", "").strip()
                if key and user_id and user_id != "default-user":
                    kira_memory.set_user_preference(user_id, key, value)
                continue

            # ── Audio from frontend (PCM16) ──
            if msg_type == "audio":
                audio_b64 = msg.get("data", "")
                if audio_b64:
                    pcm16 = base64.b64decode(audio_b64)
                    await audio_queue.put(pcm16)
                continue

            # ── Text input ──
            if msg_type == "conversation_text":
                text = msg.get("text", "").strip()
                if text:
                    await text_queue.put(text)
                    # Also store in Honcho immediately
                    if kira_memory.enabled and identified:
                        kira_memory.store_user_message(text)
                continue

            if msg_type == "ping":
                await websocket.send_json({"type": "pong"})

    except WebSocketDisconnect:
        logger.info(f"[{session_id}] Disconnected")
    except Exception as e:
        logger.error(f"[{session_id}] Error: {e}")
    finally:
        # Store pending transcripts in Honcho
        if kira_memory.enabled and identified:
            for _, transcript_text in pending_transcripts:
                if transcript_text.startswith("user: "):
                    content = transcript_text[6:]
                    kira_memory.store_user_message(content)
                elif transcript_text.startswith("assistant: "):
                    content = transcript_text[11:]
                    kira_memory.store_kira_message(content)

        fwd_audio_task.cancel()
        fwd_text_task.cancel()
        if relay:
            await relay.disconnect()
        if relay_task:
            relay_task.cancel()