chore: initial clean commit

2026-02-26 13:56:21 -05:00
commit 1755075657
53 changed files with 18068 additions and 0 deletions
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -0,0 +1,3 @@
+pub mod tokens;
+pub mod registry;
+pub mod streaming;
--- a/src/utils/registry.rs
+++ b/src/utils/registry.rs
@@ -0,0 +1,24 @@
+use anyhow::Result;
+use tracing::info;
+use crate::models::registry::ModelRegistry;
+
+const MODELS_DEV_URL: &str = "https://models.dev/api.json";
+
+pub async fn fetch_registry() -> Result<ModelRegistry> {
+    info!("Fetching model registry from {}", MODELS_DEV_URL);
+    
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(10))
+        .build()?;
+        
+    let response = client.get(MODELS_DEV_URL).send().await?;
+    
+    if !response.status().is_success() {
+        return Err(anyhow::anyhow!("Failed to fetch registry: HTTP {}", response.status()));
+    }
+    
+    let registry: ModelRegistry = response.json().await?;
+    info!("Successfully loaded model registry");
+    
+    Ok(registry)
+}
--- a/src/utils/streaming.rs
+++ b/src/utils/streaming.rs
@@ -0,0 +1,200 @@
+use futures::stream::Stream;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+use std::sync::Arc;
+use crate::logging::{RequestLogger, RequestLog};
+use crate::client::ClientManager;
+use crate::providers::{Provider, ProviderStreamChunk};
+use crate::errors::AppError;
+use crate::utils::tokens::estimate_completion_tokens;
+
+pub struct AggregatingStream<S> {
+    inner: S,
+    client_id: String,
+    provider: Arc<dyn Provider>,
+    model: String,
+    prompt_tokens: u32,
+    has_images: bool,
+    accumulated_content: String,
+    accumulated_reasoning: String,
+    logger: Arc<RequestLogger>,
+    client_manager: Arc<ClientManager>,
+    model_registry: Arc<crate::models::registry::ModelRegistry>,
+    start_time: std::time::Instant,
+    has_logged: bool,
+}
+
+impl<S> AggregatingStream<S> 
+where 
+    S: Stream<Item = Result<ProviderStreamChunk, AppError>> + Unpin
+{
+    pub fn new(
+        inner: S,
+        client_id: String,
+        provider: Arc<dyn Provider>,
+        model: String,
+        prompt_tokens: u32,
+        has_images: bool,
+        logger: Arc<RequestLogger>,
+        client_manager: Arc<ClientManager>,
+        model_registry: Arc<crate::models::registry::ModelRegistry>,
+    ) -> Self {
+        Self {
+            inner,
+            client_id,
+            provider,
+            model,
+            prompt_tokens,
+            has_images,
+            accumulated_content: String::new(),
+            accumulated_reasoning: String::new(),
+            logger,
+            client_manager,
+            model_registry,
+            start_time: std::time::Instant::now(),
+            has_logged: false,
+        }
+    }
+
+    fn finalize(&mut self) {
+        if self.has_logged {
+            return;
+        }
+        self.has_logged = true;
+
+        let duration = self.start_time.elapsed();
+        let client_id = self.client_id.clone();
+        let provider_name = self.provider.name().to_string();
+        let model = self.model.clone();
+        let logger = self.logger.clone();
+        let client_manager = self.client_manager.clone();
+        let provider = self.provider.clone();
+        let prompt_tokens = self.prompt_tokens;
+        let has_images = self.has_images;
+        let registry = self.model_registry.clone();
+        
+        // Estimate completion tokens (including reasoning if present)
+        let content_tokens = estimate_completion_tokens(&self.accumulated_content, &model);
+        let reasoning_tokens = if !self.accumulated_reasoning.is_empty() {
+            estimate_completion_tokens(&self.accumulated_reasoning, &model)
+        } else {
+            0
+        };
+        
+        let completion_tokens = content_tokens + reasoning_tokens;
+        let total_tokens = prompt_tokens + completion_tokens;
+        let cost = provider.calculate_cost(&model, prompt_tokens, completion_tokens, &registry);
+
+        // Spawn a background task to log the completion
+        tokio::spawn(async move {
+            // Log to database
+            logger.log_request(RequestLog {
+                timestamp: chrono::Utc::now(),
+                client_id: client_id.clone(),
+                provider: provider_name,
+                model,
+                prompt_tokens,
+                completion_tokens,
+                total_tokens,
+                cost,
+                has_images,
+                status: "success".to_string(),
+                error_message: None,
+                duration_ms: duration.as_millis() as u64,
+            });
+
+            // Update client usage
+            let _ = client_manager.update_client_usage(
+                &client_id,
+                total_tokens as i64,
+                cost,
+            ).await;
+        });
+    }
+}
+
+impl<S> Stream for AggregatingStream<S>
+where
+    S: Stream<Item = Result<ProviderStreamChunk, AppError>> + Unpin
+{
+    type Item = Result<ProviderStreamChunk, AppError>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let result = Pin::new(&mut self.inner).poll_next(cx);
+        
+        match &result {
+            Poll::Ready(Some(Ok(chunk))) => {
+                self.accumulated_content.push_str(&chunk.content);
+                if let Some(reasoning) = &chunk.reasoning_content {
+                    self.accumulated_reasoning.push_str(reasoning);
+                }
+            }
+            Poll::Ready(Some(Err(_))) => {
+                // If there's an error, we might still want to log what we got so far?
+                // For now, just finalize if we have content
+                if !self.accumulated_content.is_empty() {
+                    self.finalize();
+                }
+            }
+            Poll::Ready(None) => {
+                self.finalize();
+            }
+            Poll::Pending => {}
+        }
+        
+        result
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use futures::stream::{self, StreamExt};
+    use anyhow::Result;
+    
+    // Simple mock provider for testing
+    struct MockProvider;
+    #[async_trait::async_trait]
+    impl Provider for MockProvider {
+        fn name(&self) -> &str { "mock" }
+        fn supports_model(&self, _model: &str) -> bool { true }
+        fn supports_multimodal(&self) -> bool { false }
+        async fn chat_completion(&self, _req: crate::models::UnifiedRequest) -> Result<crate::providers::ProviderResponse, AppError> { unimplemented!() }
+        async fn chat_completion_stream(&self, _req: crate::models::UnifiedRequest) -> Result<futures::stream::BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> { unimplemented!() }
+        fn estimate_tokens(&self, _req: &crate::models::UnifiedRequest) -> Result<u32> { Ok(10) }
+        fn calculate_cost(&self, _model: &str, _p: u32, _c: u32, _r: &crate::models::registry::ModelRegistry) -> f64 { 0.05 }
+    }
+
+    #[tokio::test]
+    async fn test_aggregating_stream() {
+        let chunks = vec![
+            Ok(ProviderStreamChunk { content: "Hello".to_string(), finish_reason: None, model: "test".to_string() }),
+            Ok(ProviderStreamChunk { content: " World".to_string(), finish_reason: Some("stop".to_string()), model: "test".to_string() }),
+        ];
+        let inner_stream = stream::iter(chunks);
+        
+        let pool = sqlx::SqlitePool::connect("sqlite::memory:").await.unwrap();
+        let logger = Arc::new(RequestLogger::new(pool.clone()));
+        let client_manager = Arc::new(ClientManager::new(pool.clone()));
+        let registry = Arc::new(crate::models::registry::ModelRegistry { providers: std::collections::HashMap::new() });
+        
+        let mut agg_stream = AggregatingStream::new(
+            inner_stream,
+            "client_1".to_string(),
+            Arc::new(MockProvider),
+            "test".to_string(),
+            10,
+            false,
+            logger,
+            client_manager,
+            registry,
+        );
+        
+        while let Some(item) = agg_stream.next().await {
+            assert!(item.is_ok());
+        }
+        
+        assert_eq!(agg_stream.accumulated_content, "Hello World");
+        assert!(agg_stream.has_logged);
+    }
+}
--- a/src/utils/tokens.rs
+++ b/src/utils/tokens.rs
@@ -0,0 +1,51 @@
+use tiktoken_rs::get_bpe_from_model;
+use crate::models::UnifiedRequest;
+
+/// Count tokens for a given model and text
+pub fn count_tokens(model: &str, text: &str) -> u32 {
+    // If we can't get the bpe for the model, fallback to a safe default (cl100k_base for GPT-4/o1)
+    let bpe = get_bpe_from_model(model).unwrap_or_else(|_| {
+        tiktoken_rs::cl100k_base().expect("Failed to get cl100k_base encoding")
+    });
+    
+    bpe.encode_with_special_tokens(text).len() as u32
+}
+
+/// Estimate tokens for a unified request
+pub fn estimate_request_tokens(model: &str, request: &UnifiedRequest) -> u32 {
+    let mut total_tokens = 0;
+    
+    // Base tokens per message for OpenAI (approximate)
+    let tokens_per_message = 3;
+    let _tokens_per_name = 1;
+    
+    for msg in &request.messages {
+        total_tokens += tokens_per_message;
+        
+        for part in &msg.content {
+            match part {
+                crate::models::ContentPart::Text { text } => {
+                    total_tokens += count_tokens(model, text);
+                }
+                crate::models::ContentPart::Image { .. } => {
+                    // Vision models usually have a fixed cost or calculation based on size
+                    // For now, let's use a conservative estimate of 1000 tokens
+                    total_tokens += 1000;
+                }
+            }
+        }
+        
+        // Add name tokens if we had names (we don't in UnifiedMessage yet)
+        // total_tokens += tokens_per_name;
+    }
+    
+    // Add 3 tokens for the assistant reply header
+    total_tokens += 3;
+    
+    total_tokens
+}
+
+/// Estimate tokens for completion text
+pub fn estimate_completion_tokens(text: &str, model: &str) -> u32 {
+    count_tokens(model, text)
+}