diff --git a/backend/main.py b/backend/main.py index 85ebc11..c5abf76 100644 --- a/backend/main.py +++ b/backend/main.py @@ -8,7 +8,6 @@ import json import base64 import uuid import logging -import asyncio from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware @@ -29,7 +28,6 @@ app.add_middleware( allow_headers=["*"], ) -# System prompt BASE_SYSTEM_PROMPT = ( "You are Kira, a warm, kind, and encouraging AI body double. " "You speak in a friendly, girly-pop tone. You are helping someone with ADHD " @@ -63,25 +61,13 @@ async def health(): return {"status": "ok", "name": "kira", "memory": mem_status} -def build_system_prompt(user_id: str) -> str: - prompt = BASE_SYSTEM_PROMPT - if kira_memory.enabled: - try: - kira_memory.ensure_peers(user_id) - suffix = kira_memory.build_system_prompt_suffix() - if suffix: - prompt += suffix - except Exception as e: - logger.warning(f"Memory context failed: {e}") - return prompt +async def run_conversation(text: str, memory_suffix: str = "") -> str: + """LLM call with optional Honcho memory context injected into system prompt.""" + system_prompt = BASE_SYSTEM_PROMPT + if memory_suffix: + system_prompt += memory_suffix - -async def run_conversation(text: str, user_id: str) -> str: - """STT → LLM → TTS using the cheapest models.""" - system_prompt = build_system_prompt(user_id) client = get_openai() - - # LLM resp = await client.chat.completions.create( model="gpt-5.4-nano", messages=[ @@ -91,8 +77,7 @@ async def run_conversation(text: str, user_id: str) -> str: max_completion_tokens=300, temperature=0.7, ) - kira_text = resp.choices[0].message.content or "Mhm, I'm here!" - return kira_text + return resp.choices[0].message.content or "Mhm, I'm here!" async def transcribe_audio(audio_bytes: bytes) -> str | None: @@ -126,41 +111,13 @@ async def synthesize_speech(text: str) -> bytes: return b"" -def _pcm16_to_wav(pcm_data: bytes) -> bytes: - """Wrap raw PCM16 mono 24kHz data in a WAV container.""" - import struct - num_channels = 1 - sample_rate = 24000 - bits_per_sample = 16 - byte_rate = sample_rate * num_channels * (bits_per_sample // 8) - block_align = num_channels * (bits_per_sample // 8) - data_size = len(pcm_data) - header_size = 44 - total_size = header_size + data_size - - header = b"RIFF" - header += struct.pack("