init: Kira — AI body double with Honcho memory
Full voice pipeline (Whisper STT -> DeepSeek LLM -> OpenAI TTS), animated SVG avatar (Live2D-ready), girly-pop UI, lofi music, timer/notes/pets/wardrobe widgets, 10 background scenes with particle effects, Honcho cross-session memory.
This commit is contained in:
@@ -0,0 +1,12 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -0,0 +1,27 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# OpenAI (used for STT + TTS)
|
||||
openai_api_key: str = ""
|
||||
|
||||
# DeepSeek (LLM)
|
||||
deepseek_api_key: str = ""
|
||||
deepseek_base_url: str = "https://api.deepseek.com/v1"
|
||||
deepseek_model: str = "deepseek-chat"
|
||||
|
||||
# Honcho (memory)
|
||||
honcho_api_key: str = ""
|
||||
honcho_base_url: str = ""
|
||||
|
||||
# Server
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
|
||||
model_config = {
|
||||
"env_file": ".env",
|
||||
"env_file_encoding": "utf-8",
|
||||
"extra": "ignore",
|
||||
}
|
||||
|
||||
|
||||
settings = Settings()
|
||||
+217
@@ -0,0 +1,217 @@
|
||||
"""Kira — AI body double backend
|
||||
|
||||
Real-time speech-to-speech pipeline:
|
||||
mic audio → Whisper API → text → DeepSeek LLM → response text → OpenAI TTS → audio
|
||||
|
||||
Honcho memory integration:
|
||||
Cross-session user context injected into LLM prompts,
|
||||
conversation exchanges stored for continuous learning.
|
||||
"""
|
||||
|
||||
import json
|
||||
import base64
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from config import settings
|
||||
from services.stt import transcribe_audio
|
||||
from services.llm import get_kira_response
|
||||
from services.tts import synthesize_speech
|
||||
from services.memory import kira_memory
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("kira")
|
||||
|
||||
app = FastAPI(title="Kira Backend")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# ─── Base system prompt (static part) ───
|
||||
BASE_SYSTEM_PROMPT = (
|
||||
"You are Kira, a warm, kind, and encouraging AI body double. "
|
||||
"You speak in a friendly, girly-pop tone. You are helping someone with ADHD "
|
||||
"stay focused and on task. Keep responses short, supportive, and uplifting. "
|
||||
"Check in on them. Remind them to take breaks. Celebrate small wins. "
|
||||
"Use occasional emoji but don't overdo it. Never be judgmental. "
|
||||
"You remember things about them between conversations."
|
||||
)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
"""Initialize Honcho memory on app startup."""
|
||||
if kira_memory.init():
|
||||
logger.info("Honcho memory initialized")
|
||||
else:
|
||||
logger.info("Honcho memory not configured — running without memory")
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
async def health():
|
||||
mem_status = "active" if kira_memory.enabled else "disabled"
|
||||
return {"status": "ok", "name": "kira", "memory": mem_status}
|
||||
|
||||
|
||||
def build_system_prompt(user_id: str) -> dict:
|
||||
"""Build system prompt with Honcho memory context injected."""
|
||||
base = BASE_SYSTEM_PROMPT
|
||||
|
||||
# Append memory context if Honcho is available
|
||||
if kira_memory.enabled:
|
||||
try:
|
||||
# Get user-specific context from Honcho
|
||||
kira_memory.ensure_peers(user_id)
|
||||
memory_suffix = kira_memory.build_system_prompt_suffix()
|
||||
if memory_suffix:
|
||||
base += memory_suffix
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to build memory context: {e}")
|
||||
|
||||
return {"role": "system", "content": base}
|
||||
|
||||
|
||||
@app.websocket("/api/ws")
|
||||
async def conversation_ws(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
session_id = str(uuid.uuid4())[:8]
|
||||
user_id = "default-user"
|
||||
logger.info(f"[{session_id}] WebSocket connected")
|
||||
|
||||
# Audio buffer accumulates chunks from one utterance
|
||||
audio_buffer = bytearray()
|
||||
conversation_history: list[dict] = []
|
||||
|
||||
# Initialize Honcho for this session
|
||||
if kira_memory.enabled:
|
||||
try:
|
||||
kira_memory.ensure_peers(user_id)
|
||||
kira_memory.ensure_session(session_id)
|
||||
logger.info(f"[{session_id}] Honcho session ready")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{session_id}] Honcho setup failed: {e}")
|
||||
|
||||
try:
|
||||
first_message = True
|
||||
|
||||
while True:
|
||||
raw = await websocket.receive_text()
|
||||
msg = json.loads(raw)
|
||||
msg_type = msg.get("type", "")
|
||||
|
||||
# Build system prompt fresh each turn to get latest Honcho context
|
||||
system_prompt = build_system_prompt(user_id)
|
||||
|
||||
if msg_type == "audio_chunk":
|
||||
chunk = base64.b64decode(msg["data"])
|
||||
audio_buffer.extend(chunk)
|
||||
|
||||
elif msg_type == "transcribe":
|
||||
if not audio_buffer:
|
||||
await websocket.send_json({"type": "error", "message": "No audio data"})
|
||||
continue
|
||||
|
||||
logger.info(f"[{session_id}] Transcribing {len(audio_buffer)} bytes...")
|
||||
|
||||
# 1. Speech-to-text
|
||||
transcript = await transcribe_audio(bytes(audio_buffer))
|
||||
audio_buffer.clear()
|
||||
|
||||
if not transcript:
|
||||
await websocket.send_json({"type": "error", "message": "Could not transcribe audio"})
|
||||
continue
|
||||
|
||||
# Echo transcript
|
||||
await websocket.send_json({
|
||||
"type": "transcript",
|
||||
"text": transcript,
|
||||
})
|
||||
|
||||
# 2. LLM call
|
||||
logger.info(f"[{session_id}] User: {transcript}")
|
||||
user_msg = {"role": "user", "content": transcript}
|
||||
conversation_history.append(user_msg)
|
||||
|
||||
messages = [system_prompt] + conversation_history[-10:]
|
||||
kira_text = await get_kira_response(messages)
|
||||
|
||||
assistant_msg = {"role": "assistant", "content": kira_text}
|
||||
conversation_history.append(assistant_msg)
|
||||
logger.info(f"[{session_id}] Kira: {kira_text}")
|
||||
|
||||
# 3. Store in Honcho
|
||||
if kira_memory.enabled:
|
||||
try:
|
||||
kira_memory.store_messages(transcript, kira_text)
|
||||
except Exception as e:
|
||||
logger.warning(f"[{session_id}] Failed to store messages: {e}")
|
||||
|
||||
# 4. TTS
|
||||
await websocket.send_json({
|
||||
"type": "speaking_start",
|
||||
"text": kira_text,
|
||||
})
|
||||
|
||||
audio_bytes = await synthesize_speech(kira_text)
|
||||
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
|
||||
await websocket.send_json({
|
||||
"type": "audio",
|
||||
"data": audio_b64,
|
||||
"text": kira_text,
|
||||
})
|
||||
|
||||
await websocket.send_json({"type": "speaking_end"})
|
||||
|
||||
elif msg_type == "ping":
|
||||
await websocket.send_json({"type": "pong"})
|
||||
|
||||
elif msg_type == "conversation_text":
|
||||
user_text = msg.get("text", "").strip()
|
||||
if not user_text:
|
||||
continue
|
||||
|
||||
logger.info(f"[{session_id}] User (text): {user_text}")
|
||||
user_msg = {"role": "user", "content": user_text}
|
||||
conversation_history.append(user_msg)
|
||||
|
||||
messages = [system_prompt] + conversation_history[-10:]
|
||||
kira_text = await get_kira_response(messages)
|
||||
|
||||
assistant_msg = {"role": "assistant", "content": kira_text}
|
||||
conversation_history.append(assistant_msg)
|
||||
logger.info(f"[{session_id}] Kira: {kira_text}")
|
||||
|
||||
# Store in Honcho
|
||||
if kira_memory.enabled:
|
||||
try:
|
||||
kira_memory.store_messages(user_text, kira_text)
|
||||
except Exception as e:
|
||||
logger.warning(f"[{session_id}] Failed to store messages: {e}")
|
||||
|
||||
# TTS
|
||||
await websocket.send_json({"type": "speaking_start", "text": kira_text})
|
||||
audio_bytes = await synthesize_speech(kira_text)
|
||||
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
|
||||
await websocket.send_json({
|
||||
"type": "audio",
|
||||
"data": audio_b64,
|
||||
"text": kira_text,
|
||||
})
|
||||
await websocket.send_json({"type": "speaking_end"})
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.info(f"[{session_id}] Disconnected")
|
||||
except Exception as e:
|
||||
logger.error(f"[{session_id}] Error: {e}")
|
||||
try:
|
||||
await websocket.send_json({"type": "error", "message": str(e)})
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,9 @@
|
||||
fastapi>=0.115.0
|
||||
uvicorn[standard]>=0.34.0
|
||||
python-dotenv>=1.1.0
|
||||
openai>=1.55.0
|
||||
websockets>=14.1
|
||||
pydantic>=2.10.0
|
||||
pydantic-settings>=2.7.0
|
||||
httpx>=0.28.0
|
||||
honcho-ai>=2.1.0
|
||||
@@ -0,0 +1,30 @@
|
||||
"""LLM service — DeepSeek API"""
|
||||
|
||||
import logging
|
||||
from openai import AsyncOpenAI
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger("kira.llm")
|
||||
|
||||
|
||||
def _get_client() -> AsyncOpenAI:
|
||||
return AsyncOpenAI(
|
||||
api_key=settings.deepseek_api_key,
|
||||
base_url=settings.deepseek_base_url,
|
||||
)
|
||||
|
||||
|
||||
async def get_kira_response(messages: list[dict]) -> str:
|
||||
"""Get Kira's response from the LLM."""
|
||||
try:
|
||||
client = _get_client()
|
||||
resp = await client.chat.completions.create(
|
||||
model=settings.deepseek_model,
|
||||
messages=messages,
|
||||
max_tokens=300,
|
||||
temperature=0.7,
|
||||
)
|
||||
return resp.choices[0].message.content or "Mhm, I'm here!"
|
||||
except Exception as e:
|
||||
logger.error(f"LLM error: {e}")
|
||||
return "I'm still here with you! Could you say that again?"
|
||||
@@ -0,0 +1,183 @@
|
||||
"""Honcho memory service for Kira.
|
||||
|
||||
Integrates Honcho persistent memory into Kira's conversation pipeline:
|
||||
- User context retrieval before LLM calls
|
||||
- Message storage after each exchange
|
||||
- Cross-session memory for personalized responses
|
||||
"""
|
||||
|
||||
import logging
|
||||
from honcho import Honcho
|
||||
from honcho.peer import Peer
|
||||
from honcho.session import Session
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger("kira.memory")
|
||||
|
||||
|
||||
class KiraMemory:
|
||||
"""Manages Honcho memory for Kira conversations."""
|
||||
|
||||
def __init__(self):
|
||||
self._honcho: Honcho | None = None
|
||||
self._user_peer: Peer | None = None
|
||||
self._kira_peer: Peer | None = None
|
||||
self._session: Session | None = None
|
||||
self._initialized = False
|
||||
|
||||
def init(self) -> bool:
|
||||
"""Initialize Honcho connection. Returns False if not configured."""
|
||||
api_key = settings.honcho_api_key
|
||||
base_url = settings.honcho_base_url
|
||||
|
||||
if not api_key:
|
||||
logger.warning("HONCHO_API_KEY not set — memory disabled")
|
||||
return False
|
||||
|
||||
if not base_url:
|
||||
self._honcho = Honcho(
|
||||
api_key=api_key,
|
||||
workspace_id="kira",
|
||||
environment="production",
|
||||
)
|
||||
else:
|
||||
self._honcho = Honcho(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
workspace_id="kira",
|
||||
)
|
||||
|
||||
logger.info(f"Honcho connected to workspace 'kira'")
|
||||
self._initialized = True
|
||||
return True
|
||||
|
||||
@property
|
||||
def enabled(self) -> bool:
|
||||
return self._initialized and self._honcho is not None
|
||||
|
||||
def ensure_peers(self, user_id: str = "default-user") -> None:
|
||||
"""Get or create Honcho peers for the user and Kira."""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
self._user_peer = self._honcho.peer(user_id)
|
||||
self._kira_peer = self._honcho.peer("kira")
|
||||
|
||||
logger.info(f"Peers ready: user={user_id}, kira")
|
||||
|
||||
def ensure_session(self, session_id: str) -> None:
|
||||
"""Get or create a Honcho session for this conversation."""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
self._session = self._honcho.session(session_id)
|
||||
|
||||
# Add peers to session if not already members
|
||||
if self._user_peer and self._kira_peer:
|
||||
self._session.add_peers([self._user_peer, self._kira_peer])
|
||||
|
||||
logger.info(f"Session ready: {session_id}")
|
||||
|
||||
def get_user_context(self) -> str:
|
||||
"""Query Honcho for context about the user.
|
||||
|
||||
Returns a string summary of what Honcho knows about the user,
|
||||
to inject into the LLM system prompt. Empty string if no context.
|
||||
"""
|
||||
if not self.enabled or not self._user_peer:
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Query Honcho's dialectic reasoning about the user
|
||||
context = self._user_peer.chat(
|
||||
"What should Kira know about this user? "
|
||||
"Summarize their preferences, current projects, mood, "
|
||||
"and any important context in 2-3 sentences."
|
||||
)
|
||||
if context:
|
||||
return f"\n[Memory: {context}]"
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get user context: {e}")
|
||||
return ""
|
||||
|
||||
def get_kira_context(self) -> str:
|
||||
"""Get what the user knows about Kira (relationship context)."""
|
||||
if not self.enabled or not self._user_peer:
|
||||
return ""
|
||||
|
||||
try:
|
||||
context = self._user_peer.chat(
|
||||
"What is the user's relationship with Kira? "
|
||||
"How do they feel about their focus sessions? "
|
||||
"Summarize in 1-2 sentences.",
|
||||
target="kira",
|
||||
)
|
||||
if context:
|
||||
return f"\n[Kira Context: {context}]"
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get relationship context: {e}")
|
||||
return ""
|
||||
|
||||
def build_system_prompt_suffix(self) -> str:
|
||||
"""Build a context suffix to append to Kira's system prompt."""
|
||||
if not self.enabled:
|
||||
return ""
|
||||
|
||||
user_ctx = self.get_user_context()
|
||||
kira_ctx = self.get_kira_context()
|
||||
|
||||
parts = [s for s in [user_ctx, kira_ctx] if s]
|
||||
if not parts:
|
||||
return ""
|
||||
|
||||
return "\n\n---\n### What Kira remembers:" + "".join(parts)
|
||||
|
||||
def store_messages(
|
||||
self,
|
||||
user_message: str,
|
||||
kira_message: str,
|
||||
) -> None:
|
||||
"""Store a conversation exchange in Honcho."""
|
||||
if not self.enabled or not self._session:
|
||||
return
|
||||
|
||||
try:
|
||||
messages = []
|
||||
if self._user_peer:
|
||||
messages.append(
|
||||
self._user_peer.message(user_message)
|
||||
)
|
||||
if self._kira_peer:
|
||||
messages.append(
|
||||
self._kira_peer.message(kira_message)
|
||||
)
|
||||
|
||||
if messages:
|
||||
self._session.add_messages(messages)
|
||||
logger.debug("Stored conversation exchange in Honcho")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to store messages: {e}")
|
||||
|
||||
def store_user_message(self, text: str) -> None:
|
||||
"""Store a single user message."""
|
||||
if not self.enabled or not self._session or not self._user_peer:
|
||||
return
|
||||
try:
|
||||
self._session.add_messages([self._user_peer.message(text)])
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to store user message: {e}")
|
||||
|
||||
def store_kira_message(self, text: str) -> None:
|
||||
"""Store a single Kira message."""
|
||||
if not self.enabled or not self._session or not self._kira_peer:
|
||||
return
|
||||
try:
|
||||
self._session.add_messages([self._kira_peer.message(text)])
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to store Kira message: {e}")
|
||||
|
||||
|
||||
# Singleton instance for the app
|
||||
kira_memory = KiraMemory()
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Speech-to-text via OpenAI Whisper API"""
|
||||
|
||||
import logging
|
||||
from openai import AsyncOpenAI
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger("kira.stt")
|
||||
|
||||
|
||||
def _get_client() -> AsyncOpenAI:
|
||||
return AsyncOpenAI(api_key=settings.openai_api_key)
|
||||
|
||||
|
||||
async def transcribe_audio(audio_bytes: bytes) -> str | None:
|
||||
"""Transcribe audio bytes to text using Whisper API."""
|
||||
try:
|
||||
client = _get_client()
|
||||
transcript = await client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=("audio.webm", audio_bytes, "audio/webm"),
|
||||
language="en",
|
||||
response_format="text",
|
||||
)
|
||||
return transcript.strip() if transcript and transcript.strip() else None
|
||||
except Exception as e:
|
||||
logger.error(f"STT error: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,31 @@
|
||||
"""Text-to-speech via OpenAI TTS API"""
|
||||
|
||||
import logging
|
||||
from openai import AsyncOpenAI
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger("kira.tts")
|
||||
|
||||
|
||||
def _get_client() -> AsyncOpenAI:
|
||||
return AsyncOpenAI(api_key=settings.openai_api_key)
|
||||
|
||||
|
||||
async def synthesize_speech(text: str, voice: str = "nova") -> bytes:
|
||||
"""Synthesize text to speech audio bytes.
|
||||
|
||||
Voices available: alloy, echo, fable, nova, shimmer
|
||||
Nova is the warmest female voice — fits Kira's personality.
|
||||
"""
|
||||
try:
|
||||
client = _get_client()
|
||||
resp = await client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice=voice,
|
||||
input=text,
|
||||
response_format="opus",
|
||||
)
|
||||
return resp.content
|
||||
except Exception as e:
|
||||
logger.error(f"TTS error: {e}")
|
||||
return b""
|
||||
Reference in New Issue
Block a user