kira/backend/main.py

"""Kira — AI body double backend

Hybrid pipeline: gpt-realtime-whisper (streaming STT) → gpt-5.4-nano (LLM) → OpenAI TTS
"""

import json
import base64
import uuid
import logging
import asyncio

from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware

from config import settings
from services.hybrid import HybridPipeline
from services.memory import kira_memory

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("kira")

app = FastAPI(title="Kira Backend")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.on_event("startup")
async def startup():
    if kira_memory.init():
        logger.info("Honcho memory initialized")
    else:
        logger.info("Honcho memory not configured")


@app.get("/api/health")
async def health():
    mem_status = "active" if kira_memory.enabled else "disabled"
    return {"status": "ok", "name": "kira", "memory": mem_status}


@app.websocket("/api/ws")
async def conversation_ws(websocket: WebSocket):
    await websocket.accept()
    session_id = str(uuid.uuid4())[:8]
    user_id = "default-user"
    identified = False
    logger.info(f"[{session_id}] WebSocket connected")

    pending_transcripts: list[str] = []
    pipeline: HybridPipeline | None = None
    pipeline_task: asyncio.Task | None = None
    pipeline_ready = asyncio.Event()
    audio_queue: asyncio.Queue[bytes] = asyncio.Queue()
    text_queue: asyncio.Queue[str] = asyncio.Queue()

    memory_suffix = ""

    async def on_ready():
        pipeline_ready.set()
        logger.info(f"[{session_id}] Pipeline ready")

    async def on_transcript_delta(delta: str):
        """Streaming partial transcript."""
        await websocket.send_json({"type": "transcript_delta", "text": delta})

    async def on_transcript_done(full: str):
        """Full utterance received."""
        await websocket.send_json({"type": "transcript", "role": "user", "text": full})

    async def on_audio_delta(audio_bytes: bytes):
        """Forward TTS audio to client."""
        try:
            audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
            await websocket.send_json({"type": "audio", "data": audio_b64})
        except Exception:
            pass

    async def on_speech_start():
        await websocket.send_json({"type": "speaking_start"})

    async def on_speech_end():
        await websocket.send_json({"type": "speaking_end"})

    async def on_error(msg: str):
        await websocket.send_json({"type": "error", "message": msg})

    # Create pipeline
    pipeline = HybridPipeline(
        on_transcript_delta=on_transcript_delta,
        on_transcript_done=on_transcript_done,
        on_audio_delta=on_audio_delta,
        on_speech_start=on_speech_start,
        on_speech_end=on_speech_end,
        on_ready=on_ready,
        on_error=on_error,
        memory_suffix=memory_suffix,
    )

    pipeline_task = asyncio.create_task(pipeline.connect())

    try:
        await asyncio.wait_for(pipeline_ready.wait(), timeout=15)
    except asyncio.TimeoutError:
        logger.error(f"[{session_id}] Pipeline failed to connect")
        await websocket.send_json({"type": "error", "message": "Failed to connect to AI"})
        pipeline_task.cancel()
        return

    # Forward audio/text from client to pipeline
    async def forward_audio():
        while pipeline and pipeline._connected:
            try:
                pcm16 = await asyncio.wait_for(audio_queue.get(), timeout=1)
                await pipeline.send_audio(pcm16)
            except asyncio.TimeoutError:
                continue
            except Exception:
                break

    async def forward_text():
        while pipeline and pipeline._connected:
            try:
                text = await asyncio.wait_for(text_queue.get(), timeout=1)
                await pipeline.send_text(text)
                # Store in Honcho
                if kira_memory.enabled and identified:
                    kira_memory.store_user_message(text)
            except asyncio.TimeoutError:
                continue
            except Exception:
                break

    fwd_audio = asyncio.create_task(forward_audio())
    fwd_text = asyncio.create_task(forward_text())

    try:
        while True:
            raw = await websocket.receive_text()
            msg = json.loads(raw)
            msg_type = msg.get("type", "")

            # ── Identity ──
            if msg_type == "identify":
                user_id = msg.get("user_id", "").strip()
                user_name = msg.get("name", "").strip()
                if user_name and user_id:
                    kira_memory.set_user_preference(user_id, "name", user_name)

                prefs = kira_memory.get_user_preferences(user_id)
                identified = True

                if kira_memory.enabled:
                    kira_memory.ensure_peers(user_id)
                    kira_memory.ensure_session(session_id)

                # Build memory context and update pipeline
                if kira_memory.enabled:
                    try:
                        ctx = kira_memory.build_system_prompt_suffix()
                        if ctx:
                            pipeline._memory_suffix = ctx
                            memory_suffix = ctx
                    except Exception:
                        pass

                await websocket.send_json({
                    "type": "identified",
                    "user_id": user_id,
                    "preferences": prefs,
                })
                continue

            # ── Preferences ──
            if msg_type == "set_preference":
                key = msg.get("key", "").strip()
                value = msg.get("value", "").strip()
                if key and user_id and user_id != "default-user":
                    kira_memory.set_user_preference(user_id, key, value)
                continue

            # ── Audio (PCM16) ──
            if msg_type == "audio":
                audio_b64 = msg.get("data", "")
                if audio_b64:
                    pcm16 = base64.b64decode(audio_b64)
                    await audio_queue.put(pcm16)
                continue

            # ── Text input ──
            if msg_type == "conversation_text":
                text = msg.get("text", "").strip()
                if text:
                    await text_queue.put(text)
                continue

            if msg_type == "ping":
                await websocket.send_json({"type": "pong"})

    except WebSocketDisconnect:
        logger.info(f"[{session_id}] Disconnected")
    except Exception as e:
        logger.error(f"[{session_id}] Error: {e}")
    finally:
        fwd_audio.cancel()
        fwd_text.cancel()
        if pipeline:
            await pipeline.disconnect()
        if pipeline_task:
            pipeline_task.cancel()