"""Kira — AI body double backend Cheapest pipeline: gpt-4o-mini-transcribe STT → gpt-5.4-nano LLM → OpenAI TTS ~$0.019/min total, simple 3-step chat completions. """ import json import base64 import uuid import logging import asyncio from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from config import settings from services.memory import kira_memory logging.basicConfig(level=logging.INFO) logger = logging.getLogger("kira") app = FastAPI(title="Kira Backend") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # System prompt BASE_SYSTEM_PROMPT = ( "You are Kira, a warm, kind, and encouraging AI body double. " "You speak in a friendly, girly-pop tone. You are helping someone with ADHD " "stay focused and on task. Keep responses short, supportive, and uplifting. " "Check in on them. Remind them to take breaks. Celebrate small wins. " "Use occasional emoji but don't overdo it. Never be judgmental." ) _openai = None def get_openai(): global _openai if _openai is None: from openai import AsyncOpenAI _openai = AsyncOpenAI(api_key=settings.openai_api_key) return _openai @app.on_event("startup") async def startup(): if kira_memory.init(): logger.info("Honcho memory initialized") else: logger.info("Honcho memory not configured") @app.get("/api/health") async def health(): mem_status = "active" if kira_memory.enabled else "disabled" return {"status": "ok", "name": "kira", "memory": mem_status} def build_system_prompt(user_id: str) -> str: prompt = BASE_SYSTEM_PROMPT if kira_memory.enabled: try: kira_memory.ensure_peers(user_id) suffix = kira_memory.build_system_prompt_suffix() if suffix: prompt += suffix except Exception as e: logger.warning(f"Memory context failed: {e}") return prompt async def run_conversation(text: str, user_id: str) -> str: """STT → LLM → TTS using the cheapest models.""" system_prompt = build_system_prompt(user_id) client = get_openai() # LLM resp = await client.chat.completions.create( model="gpt-5.4-nano", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": text}, ], max_tokens=300, temperature=0.7, ) kira_text = resp.choices[0].message.content or "Mhm, I'm here!" return kira_text async def transcribe_audio(audio_bytes: bytes) -> str | None: """Transcribe audio bytes using cheapest STT model. Accepts raw PCM16 mono 24kHz data — wraps in WAV container automatically. """ client = get_openai() try: # Wrap raw PCM16 in WAV container for the API wav_bytes = _pcm16_to_wav(audio_bytes) transcript = await client.audio.transcriptions.create( model="gpt-4o-mini-transcribe", file=("audio.wav", wav_bytes, "audio/wav"), response_format="text", ) return transcript.strip() if transcript and transcript.strip() else None except Exception as e: logger.warning(f"STT error: {e}") return None async def synthesize_speech(text: str) -> bytes: """Generate TTS audio from text.""" client = get_openai() try: resp = await client.audio.speech.create( model="tts-1", voice="nova", input=text, response_format="opus", ) return resp.content except Exception as e: logger.warning(f"TTS error: {e}") return b"" def _pcm16_to_wav(pcm_data: bytes) -> bytes: """Wrap raw PCM16 mono 24kHz data in a WAV container.""" import struct num_channels = 1 sample_rate = 24000 bits_per_sample = 16 byte_rate = sample_rate * num_channels * (bits_per_sample // 8) block_align = num_channels * (bits_per_sample // 8) data_size = len(pcm_data) header_size = 44 total_size = header_size + data_size header = b"RIFF" header += struct.pack("