Files
kira/backend/main.py
T
hobokenchicken 78ea059f08 feat: user personalization with Honcho-backed preferences
- WelcomeScreen: first-time name entry with cute onboarding
- identify WS message: sets user_id, loads saved prefs from Honcho
- set_preference WS message: saves scene/outfit/accessory to Honcho metadata
- Preferences auto-load on return visits via localStorage + Honcho peer meta
- Kira uses the user's name in greeting and prompts
- Backend: get/set preference methods in KiraMemory service
- Frontend: optimistic preference updates, synced to backend on change
2026-06-04 11:00:58 -04:00

244 lines
8.9 KiB
Python

"""Kira — AI body double backend
Real-time speech-to-speech pipeline:
mic audio → Whisper API → text → DeepSeek LLM → response text → OpenAI TTS → audio
Honcho memory integration:
Cross-session user context injected into LLM prompts,
conversation exchanges stored for continuous learning.
User preferences (name, scene, outfit, accessory) persisted in peer metadata.
"""
import json
import base64
import uuid
import logging
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from config import settings
from services.stt import transcribe_audio
from services.llm import get_kira_response
from services.tts import synthesize_speech
from services.memory import kira_memory
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("kira")
app = FastAPI(title="Kira Backend")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ─── Base system prompt (static part) ───
BASE_SYSTEM_PROMPT = (
"You are Kira, a warm, kind, and encouraging AI body double. "
"You speak in a friendly, girly-pop tone. You are helping someone with ADHD "
"stay focused and on task. Keep responses short, supportive, and uplifting. "
"Check in on them. Remind them to take breaks. Celebrate small wins. "
"Use occasional emoji but don't overdo it. Never be judgmental. "
"You remember things about them between conversations."
)
@app.on_event("startup")
async def startup():
"""Initialize Honcho memory on app startup."""
if kira_memory.init():
logger.info("Honcho memory initialized")
else:
logger.info("Honcho memory not configured — running without memory")
@app.get("/api/health")
async def health():
mem_status = "active" if kira_memory.enabled else "disabled"
return {"status": "ok", "name": "kira", "memory": mem_status}
def build_system_prompt(user_id: str) -> dict:
"""Build system prompt with Honcho memory context injected."""
base = BASE_SYSTEM_PROMPT
if kira_memory.enabled:
try:
kira_memory.ensure_peers(user_id)
memory_suffix = kira_memory.build_system_prompt_suffix()
if memory_suffix:
base += memory_suffix
except Exception as e:
logger.warning(f"Failed to build memory context: {e}")
return {"role": "system", "content": base}
def handle_identify(msg: dict, session_id: str) -> dict | None:
"""Handle user identification. Returns user preferences or None."""
user_id = msg.get("user_id", "").strip()
if not user_id:
return {"type": "error", "message": "user_id is required"}
user_name = msg.get("name", "").strip()
if user_name:
kira_memory.set_user_preference(user_id, "name", user_name)
prefs = kira_memory.get_user_preferences(user_id)
logger.info(f"[{session_id}] Identified as {user_id} (name={user_name or prefs.get('name', '')})")
return {
"type": "identified",
"user_id": user_id,
"preferences": prefs,
}
def handle_set_preference(msg: dict, session_id: str, user_id: str) -> dict | None:
"""Handle preference update. Returns success status."""
if not user_id or user_id == "default-user":
return {"type": "error", "message": "Must identify first"}
key = msg.get("key", "").strip()
value = msg.get("value", "").strip()
if not key:
return {"type": "error", "message": "key is required"}
ok = kira_memory.set_user_preference(user_id, key, value)
return {
"type": "preference_saved",
"key": key,
"success": ok,
}
@app.websocket("/api/ws")
async def conversation_ws(websocket: WebSocket):
await websocket.accept()
session_id = str(uuid.uuid4())[:8]
user_id = "default-user"
identified = False
logger.info(f"[{session_id}] WebSocket connected")
audio_buffer = bytearray()
conversation_history: list[dict] = []
try:
while True:
raw = await websocket.receive_text()
msg = json.loads(raw)
msg_type = msg.get("type", "")
# ── Identity & Preferences ──
if msg_type == "identify":
response = handle_identify(msg, session_id)
if response:
await websocket.send_json(response)
if response["type"] == "identified":
user_id = response["user_id"]
identified = True
# Set up Honcho for this user
if kira_memory.enabled:
try:
kira_memory.ensure_peers(user_id)
kira_memory.ensure_session(session_id)
logger.info(f"[{session_id}] Honcho session ready for {user_id}")
except Exception as e:
logger.warning(f"[{session_id}] Honcho setup failed: {e}")
continue
if msg_type == "set_preference":
response = handle_set_preference(msg, session_id, user_id)
if response:
await websocket.send_json(response)
continue
# ── Conversation ──
system_prompt = build_system_prompt(user_id)
if msg_type == "audio_chunk":
chunk = base64.b64decode(msg["data"])
audio_buffer.extend(chunk)
elif msg_type == "transcribe":
if not audio_buffer:
await websocket.send_json({"type": "error", "message": "No audio data"})
continue
logger.info(f"[{session_id}] Transcribing {len(audio_buffer)} bytes...")
transcript = await transcribe_audio(bytes(audio_buffer))
audio_buffer.clear()
if not transcript:
await websocket.send_json({"type": "error", "message": "Could not transcribe audio"})
continue
await websocket.send_json({"type": "transcript", "text": transcript})
logger.info(f"[{session_id}] User: {transcript}")
conversation_history.append({"role": "user", "content": transcript})
messages = [system_prompt] + conversation_history[-10:]
kira_text = await get_kira_response(messages)
conversation_history.append({"role": "assistant", "content": kira_text})
logger.info(f"[{session_id}] Kira: {kira_text}")
if kira_memory.enabled and identified:
try:
kira_memory.store_messages(transcript, kira_text)
except Exception as e:
logger.warning(f"[{session_id}] Failed to store messages: {e}")
await websocket.send_json({"type": "speaking_start", "text": kira_text})
audio_bytes = await synthesize_speech(kira_text)
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
await websocket.send_json({"type": "audio", "data": audio_b64, "text": kira_text})
await websocket.send_json({"type": "speaking_end"})
elif msg_type == "ping":
await websocket.send_json({"type": "pong"})
elif msg_type == "conversation_text":
user_text = msg.get("text", "").strip()
if not user_text:
continue
logger.info(f"[{session_id}] User (text): {user_text}")
conversation_history.append({"role": "user", "content": user_text})
messages = [system_prompt] + conversation_history[-10:]
kira_text = await get_kira_response(messages)
conversation_history.append({"role": "assistant", "content": kira_text})
logger.info(f"[{session_id}] Kira: {kira_text}")
if kira_memory.enabled and identified:
try:
kira_memory.store_messages(user_text, kira_text)
except Exception as e:
logger.warning(f"[{session_id}] Failed to store messages: {e}")
await websocket.send_json({"type": "speaking_start", "text": kira_text})
audio_bytes = await synthesize_speech(kira_text)
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
await websocket.send_json({"type": "audio", "data": audio_b64, "text": kira_text})
await websocket.send_json({"type": "speaking_end"})
except WebSocketDisconnect:
logger.info(f"[{session_id}] Disconnected")
except Exception as e:
logger.error(f"[{session_id}] Error: {e}")
try:
await websocket.send_json({"type": "error", "message": str(e)})
except Exception:
pass