kira/frontend/src/hooks/useConversation.ts

import { useState, useCallback, useRef, useEffect } from 'react';

export interface UserPreferences {
  name: string;
  scene: string;
  outfit: string;
  accessory: string;
}

interface Message {
  id: string;
  role: 'user' | 'kira';
  text: string;
  timestamp: number;
}

const WS_URL = `${location.protocol === 'https:' ? 'wss:' : 'ws:'}//${location.host}/api/ws`;
const USER_ID_KEY = 'kira-user-id';

function loadUserId(): string {
  return localStorage.getItem(USER_ID_KEY) || '';
}

function saveUserId(id: string) {
  localStorage.setItem(USER_ID_KEY, id);
}

export function useConversation() {
  const [messages, setMessages] = useState<Message[]>([]);
  const [isConnected, setIsConnected] = useState(false);
  const [isKiraSpeaking, setIsKiraSpeaking] = useState(false);
  const [isRecording, setIsRecording] = useState(false);
  const [identified, setIdentified] = useState(false);
  const [preferences, setPreferences] = useState<UserPreferences>({
    name: '',
    scene: 'cozy-room',
    outfit: 'cozy-hoodie',
    accessory: '',
  });
  const [loadingPrefs, setLoadingPrefs] = useState(true);
  const [micError, setMicError] = useState<string | null>(null);

  const wsRef = useRef<WebSocket | null>(null);
  const audioRef = useRef<HTMLAudioElement | null>(null);
  const captureRef = useRef<{ stop: () => void } | null>(null);
  const recorderRef = useRef<MediaRecorder | null>(null);
  const streamRef = useRef<MediaStream | null>(null);
  const audioBufferRef = useRef<Uint8Array[]>([]);

  // Connect WebSocket
  const connect = useCallback(() => {
    if (wsRef.current?.readyState === WebSocket.OPEN) return;
    setLoadingPrefs(true);

    const ws = new WebSocket(WS_URL);
    wsRef.current = ws;

    ws.onopen = () => {
      setIsConnected(true);
      const savedId = loadUserId();
      if (savedId) {
        ws.send(JSON.stringify({ type: 'identify', user_id: savedId }));
      } else {
        setLoadingPrefs(false);
      }
    };

    ws.onclose = () => {
      setIsConnected(false);
      setTimeout(connect, 3000);
    };

    ws.onmessage = (event) => {
      try {
        const msg = JSON.parse(event.data);
        handleMessage(msg);
      } catch { /* ignore parse errors */ }
    };
  }, []);

  // Audio playback element
  useEffect(() => {
    if (!audioRef.current) {
      audioRef.current = new Audio();
      audioRef.current.onended = () => setIsKiraSpeaking(false);
    }
  }, []);

  // Handle incoming WS messages
  const handleMessage = useCallback((msg: any) => {
    switch (msg.type) {
      case 'identified': {
        setIdentified(true);
        setLoadingPrefs(false);
        if (msg.user_id) saveUserId(msg.user_id);
        if (msg.preferences) {
          setPreferences({
            name: msg.preferences.name || '',
            scene: msg.preferences.scene || 'cozy-room',
            outfit: msg.preferences.outfit || 'cozy-hoodie',
            accessory: msg.preferences.accessory || '',
          });
        }
        break;
      }

      case 'transcript':
        addMessage(msg.role === 'user' ? 'user' : 'kira', msg.text);
        break;

      case 'transcript_delta':
        // Streaming partial transcript — could show as typing indicator
        break;

      case 'speaking_start':
        setIsKiraSpeaking(true);
        break;

      case 'audio': {
        // Incoming Opus audio chunk from streaming TTS
        if (msg.data) {
          const binary = atob(msg.data);
          const bytes = new Uint8Array(binary.length);
          for (let i = 0; i < binary.length; i++) {
            bytes[i] = binary.charCodeAt(i);
          }
          audioBufferRef.current.push(bytes);
        }
        break;
      }

      case 'speaking_end':
        setIsKiraSpeaking(false);
        // Play all accumulated chunks as one blob
        if (audioBufferRef.current.length > 0 && audioRef.current) {
          const allChunks = audioBufferRef.current;
          const totalLen = allChunks.reduce((s, c) => s + c.length, 0);
          const combined = new Uint8Array(totalLen);
          let offset = 0;
          for (const chunk of allChunks) {
            combined.set(chunk, offset);
            offset += chunk.length;
          }
          // audioBufferRef no longer used for playback (incremental)
          const blob = new Blob([combined], { type: 'audio/ogg' });
          const url = URL.createObjectURL(blob);
          audioRef.current.src = url;
          audioRef.current.play().catch(() => {});
        }
        break;

      case 'interruption':
        setIsKiraSpeaking(false);
        if (audioRef.current) {
          audioRef.current.pause();
          audioRef.current.currentTime = 0;
        }
        break;

      case 'error':
        console.error('[Kira]', msg.message);
        break;
    }
  }, []);

  const addMessage = useCallback((role: 'user' | 'kira', text: string) => {
    setMessages((prev) => [
      ...prev,
      { id: crypto.randomUUID(), role, text, timestamp: Date.now() },
    ]);
  }, []);

  // ── Identity ──

  const identify = useCallback((name: string) => {
    const userId = `kira-${name.toLowerCase().replace(/[^a-z0-9]/g, '-')}`;
    saveUserId(userId);
    setPreferences((p) => ({ ...p, name }));

    if (wsRef.current?.readyState === WebSocket.OPEN) {
      wsRef.current.send(JSON.stringify({ type: 'identify', user_id: userId, name }));
    }
  }, []);

  // ── Preferences ──

  const setPreference = useCallback((key: string, value: string) => {
    setPreferences((p) => ({ ...p, [key]: value }));
    if (wsRef.current?.readyState === WebSocket.OPEN && identified) {
      wsRef.current.send(JSON.stringify({ type: 'set_preference', key, value }));
    }
  }, [identified]);

  // ── Audio (Realtime PCM16) ──

  const startRecording = useCallback(async () => {
    if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
      addMessage('kira', 'Mic requires HTTPS. Try accessing via HTTPS!');
      return;
    }

    try {
      setMicError(null);
      const stream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true } });
      streamRef.current = stream;

      const ws = wsRef.current;
      if (!ws || ws.readyState !== WebSocket.OPEN) {
        addMessage('kira', 'Not connected to server yet...');
        stream.getTracks().forEach((t) => t.stop());
        return;
      }

      // Use MediaRecorder for full utterance blob (Opus/webm) — sent on stop for REST STT
      const mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
      const chunks: Blob[] = [];
      mediaRecorder.ondataavailable = (e) => {
        if (e.data.size > 0) chunks.push(e.data);
      };
      mediaRecorder.onstop = () => {
        if (chunks.length > 0 && ws.readyState === WebSocket.OPEN) {
          const blob = new Blob(chunks, { type: 'audio/webm' });
          blob.arrayBuffer().then((buf) => {
            const base64 = arrayBufferToBase64(buf);
            ws.send(JSON.stringify({ type: 'audio', data: base64 }));
          });
        }
        chunks.length = 0;
        stream.getTracks().forEach((t) => t.stop());
        streamRef.current = null;
        setIsRecording(false);
      };
      recorderRef.current = mediaRecorder;
      mediaRecorder.start();
      setIsRecording(true);
    } catch (err) {
      const msg = err instanceof Error ? err.message : String(err);
      setMicError(msg);
      console.error('[Kira Mic]', msg);
    }
  }, [addMessage]);

  const stopRecording = useCallback(() => {
    if (recorderRef.current && recorderRef.current.state === 'recording') {
      recorderRef.current.stop();
      // onstop will handle sending the blob and cleanup
    } else {
      // fallback cleanup
      streamRef.current?.getTracks().forEach((t) => t.stop());
      streamRef.current = null;
      setIsRecording(false);
    }
    captureRef.current = null; // legacy
  }, []);

  // ── Text ──

  const sendText = useCallback((text: string) => {
    if (!text.trim()) return;
    if (wsRef.current?.readyState === WebSocket.OPEN) {
      wsRef.current.send(JSON.stringify({ type: 'conversation_text', text: text.trim() }));
    }
  }, []);

  // Connect on mount
  useEffect(() => {
    connect();
    return () => {
      wsRef.current?.close();
      if (recorderRef.current && recorderRef.current.state === 'recording') recorderRef.current.stop();
      captureRef.current?.stop();
      streamRef.current?.getTracks().forEach((t) => t.stop());
    };
  }, [connect]);

  return {
    messages,
    isConnected,
    isKiraSpeaking,
    isRecording,
    identified,
    preferences,
    loadingPrefs,
    micError,
    identify,
    setPreference,
    sendText,
    startRecording,
    stopRecording,
  };
}

// ── Helpers ──

function arrayBufferToBase64(buffer: ArrayBufferLike): string {
  const bytes = new Uint8Array(buffer);
  let binary = '';
  for (let i = 0; i < bytes.length; i++) {
    binary += String.fromCharCode(bytes[i]);
  }
  return btoa(binary);
}

/** Capture PCM16 mono 24kHz audio from mic and send via callback. */
function startPCMCapture(
  stream: MediaStream,
  onChunk: (pcm16: Uint8Array) => void,
): { stop: () => void } {
  const ctx = new AudioContext({ sampleRate: 24000 });
  const source = ctx.createMediaStreamSource(stream);
  const processor = ctx.createScriptProcessor(4096, 1, 1);
  let running = true;

  processor.onaudioprocess = (e) => {
    if (!running) return;
    const input = e.inputBuffer.getChannelData(0);
    const pcm16 = new Int16Array(input.length);
    for (let i = 0; i < input.length; i++) {
      const s = Math.max(-1, Math.min(1, input[i]));
      pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
    }
    onChunk(new Uint8Array(pcm16.buffer));
  };

  source.connect(processor);
  processor.connect(ctx.destination);

  return {
    stop: () => {
      running = false;
      source.disconnect();
      processor.disconnect();
      ctx.close();
    },
  };
}