import { useState, useCallback, useRef, useEffect } from 'react'; export interface UserPreferences { name: string; scene: string; outfit: string; accessory: string; } interface Message { id: string; role: 'user' | 'kira'; text: string; timestamp: number; } const WS_URL = `${location.protocol === 'https:' ? 'wss:' : 'ws:'}//${location.host}/api/ws`; const USER_ID_KEY = 'kira-user-id'; function loadUserId(): string { return localStorage.getItem(USER_ID_KEY) || ''; } function saveUserId(id: string) { localStorage.setItem(USER_ID_KEY, id); } /** Capture PCM16 mono 24kHz audio from mic and send via callback. */ function startPCMCapture( stream: MediaStream, onChunk: (pcm16: Uint8Array) => void, ): { stop: () => void } { const ctx = new AudioContext({ sampleRate: 24000 }); const source = ctx.createMediaStreamSource(stream); const processor = ctx.createScriptProcessor(4096, 1, 1); let running = true; processor.onaudioprocess = (e) => { if (!running) return; const input = e.inputBuffer.getChannelData(0); // Float32Array [-1, 1] // Convert float32 → PCM16 int16 const pcm16 = new Int16Array(input.length); for (let i = 0; i < input.length; i++) { const s = Math.max(-1, Math.min(1, input[i])); pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7fff; } onChunk(new Uint8Array(pcm16.buffer)); }; source.connect(processor); processor.connect(ctx.destination); return { stop: () => { running = false; source.disconnect(); processor.disconnect(); ctx.close(); }, }; } export function useConversation() { const [messages, setMessages] = useState([]); const [isConnected, setIsConnected] = useState(false); const [isKiraSpeaking, setIsKiraSpeaking] = useState(false); const [isRecording, setIsRecording] = useState(false); const [identified, setIdentified] = useState(false); const [preferences, setPreferences] = useState({ name: '', scene: 'cozy-room', outfit: 'cozy-hoodie', accessory: '', }); const [loadingPrefs, setLoadingPrefs] = useState(true); const [micError, setMicError] = useState(null); const wsRef = useRef(null); const audioRef = useRef(null); const captureRef = useRef<{ stop: () => void } | null>(null); const streamRef = useRef(null); const audioBufferRef = useRef([]); // Connect WebSocket const connect = useCallback(() => { if (wsRef.current?.readyState === WebSocket.OPEN) return; setLoadingPrefs(true); const ws = new WebSocket(WS_URL); wsRef.current = ws; ws.onopen = () => { setIsConnected(true); const savedId = loadUserId(); if (savedId) { ws.send(JSON.stringify({ type: 'identify', user_id: savedId })); } else { setLoadingPrefs(false); } }; ws.onclose = () => { setIsConnected(false); setTimeout(connect, 3000); }; ws.onmessage = (event) => { try { const msg = JSON.parse(event.data); handleMessage(msg); } catch { /* ignore parse errors */ } }; }, []); // Audio playback element useEffect(() => { if (!audioRef.current) { audioRef.current = new Audio(); audioRef.current.onended = () => setIsKiraSpeaking(false); } }, []); // Handle incoming WS messages const handleMessage = useCallback((msg: any) => { switch (msg.type) { case 'identified': { setIdentified(true); setLoadingPrefs(false); if (msg.user_id) saveUserId(msg.user_id); if (msg.preferences) { setPreferences({ name: msg.preferences.name || '', scene: msg.preferences.scene || 'cozy-room', outfit: msg.preferences.outfit || 'cozy-hoodie', accessory: msg.preferences.accessory || '', }); } break; } case 'transcript': addMessage(msg.role === 'user' ? 'user' : 'kira', msg.text); break; case 'transcript_delta': // Streaming partial transcript — could show as typing indicator break; case 'speaking_start': setIsKiraSpeaking(true); break; case 'audio': { // Incoming Opus audio from TTS (full response, not streamed) if (msg.data && audioRef.current) { const binary = atob(msg.data); const bytes = new Uint8Array(binary.length); for (let i = 0; i < binary.length; i++) { bytes[i] = binary.charCodeAt(i); } const blob = new Blob([bytes], { type: 'audio/ogg' }); const url = URL.createObjectURL(blob); audioRef.current.src = url; audioRef.current.play().catch(() => {}); } break; } case 'speaking_end': setIsKiraSpeaking(false); audioBufferRef.current = []; break; case 'interruption': setIsKiraSpeaking(false); audioBufferRef.current = []; if (audioRef.current) { audioRef.current.pause(); audioRef.current.currentTime = 0; } break; case 'error': console.error('[Kira]', msg.message); break; } }, []); const addMessage = useCallback((role: 'user' | 'kira', text: string) => { setMessages((prev) => [ ...prev, { id: crypto.randomUUID(), role, text, timestamp: Date.now() }, ]); }, []); // ── Identity ── const identify = useCallback((name: string) => { const userId = `kira-${name.toLowerCase().replace(/[^a-z0-9]/g, '-')}`; saveUserId(userId); setPreferences((p) => ({ ...p, name })); if (wsRef.current?.readyState === WebSocket.OPEN) { wsRef.current.send(JSON.stringify({ type: 'identify', user_id: userId, name })); } }, []); // ── Preferences ── const setPreference = useCallback((key: string, value: string) => { setPreferences((p) => ({ ...p, [key]: value })); if (wsRef.current?.readyState === WebSocket.OPEN && identified) { wsRef.current.send(JSON.stringify({ type: 'set_preference', key, value })); } }, [identified]); // ── Audio (Realtime PCM16) ── const startRecording = useCallback(async () => { // Check HTTPS if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { addMessage('kira', 'Mic requires HTTPS. Try accessing via HTTPS!'); return; } try { setMicError(null); const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); streamRef.current = stream; const ws = wsRef.current; if (!ws || ws.readyState !== WebSocket.OPEN) { addMessage('kira', 'Not connected to server yet...'); stream.getTracks().forEach((t) => t.stop()); return; } // Start PCM16 capture — each chunk sent as WS message const capture = startPCMCapture(stream, (pcm16) => { if (ws.readyState === WebSocket.OPEN) { const base64 = arrayBufferToBase64(pcm16.buffer); ws.send(JSON.stringify({ type: 'audio', data: base64 })); } }); captureRef.current = capture; setIsRecording(true); } catch (err) { const msg = err instanceof Error ? err.message : String(err); setMicError(msg); console.error('[Kira Mic]', msg); } }, [addMessage]); const stopRecording = useCallback(() => { captureRef.current?.stop(); captureRef.current = null; streamRef.current?.getTracks().forEach((t) => t.stop()); streamRef.current = null; setIsRecording(false); // Tell backend to process accumulated audio if (wsRef.current?.readyState === WebSocket.OPEN) { wsRef.current.send(JSON.stringify({ type: 'transcribe' })); } }, []); // ── Text ── const sendText = useCallback((text: string) => { if (!text.trim()) return; if (wsRef.current?.readyState === WebSocket.OPEN) { wsRef.current.send(JSON.stringify({ type: 'conversation_text', text: text.trim() })); } }, []); // Connect on mount useEffect(() => { connect(); return () => { wsRef.current?.close(); captureRef.current?.stop(); streamRef.current?.getTracks().forEach((t) => t.stop()); }; }, [connect]); return { messages, isConnected, isKiraSpeaking, isRecording, identified, preferences, loadingPrefs, micError, identify, setPreference, sendText, startRecording, stopRecording, }; } // ── Helpers ── function arrayBufferToBase64(buffer: ArrayBufferLike): string { const bytes = new Uint8Array(buffer); let binary = ''; for (let i = 0; i < bytes.length; i++) { binary += String.fromCharCode(bytes[i]); } return btoa(binary); } /** Convert raw PCM16 mono 24kHz to a playable WAV blob. */ function pcm16ToWav(pcm16: Uint8Array): ArrayBuffer { const numChannels = 1; const sampleRate = 24000; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcm16.length; const headerSize = 44; const totalSize = headerSize + dataSize; const buf = new ArrayBuffer(totalSize); const view = new DataView(buf); // RIFF header writeString(view, 0, 'RIFF'); view.setUint32(4, totalSize - 8, true); writeString(view, 8, 'WAVE'); // fmt subchunk writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); // subchunk size view.setUint16(20, 1, true); // PCM view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); // data subchunk writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); // PCM data for (let i = 0; i < pcm16.length; i++) { view.setUint8(44 + i, pcm16[i]); } return buf; } function writeString(view: DataView, offset: number, str: string) { for (let i = 0; i < str.length; i++) { view.setUint8(offset + i, str.charCodeAt(i)); } }