chore: initial clean commit

2026-02-26 13:56:21 -05:00
commit 1755075657
53 changed files with 18068 additions and 0 deletions
--- a/src/server/mod.rs
+++ b/src/server/mod.rs
@@ -0,0 +1,224 @@
+use uuid::Uuid;
+use axum::{
+    extract::State,
+    routing::post,
+    Json, Router,
+    response::sse::{Event, Sse},
+    response::IntoResponse,
+};
+use futures::stream::StreamExt;
+use tracing::{info, warn};
+
+use crate::{
+    auth::AuthenticatedClient,
+    errors::AppError,
+    models::{ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse, ChatStreamChoice, ChatStreamDelta, ChatMessage, ChatChoice, Usage},
+    state::AppState,
+    rate_limiting,
+};
+
+pub fn router(state: AppState) -> Router {
+    Router::new()
+        .route("/v1/chat/completions", post(chat_completions))
+        .layer(axum::middleware::from_fn_with_state(
+            state.clone(),
+            rate_limiting::middleware::rate_limit_middleware,
+        ))
+        .with_state(state)
+}
+
+async fn chat_completions(
+    State(state): State<AppState>,
+    auth: AuthenticatedClient,
+    Json(request): Json<ChatCompletionRequest>,
+) -> Result<axum::response::Response, AppError> {
+    let start_time = std::time::Instant::now();
+    let client_id = auth.client_id.clone();
+    let model = request.model.clone();
+    
+    info!("Chat completion request from client {} for model {}", client_id, model);
+    
+    // Find appropriate provider for the model
+    let provider = state.provider_manager.get_provider_for_model(&request.model)
+        .ok_or_else(|| AppError::ProviderError(format!("No provider found for model: {}", request.model)))?;
+    
+    let provider_name = provider.name().to_string();
+    
+    // Check circuit breaker for this provider
+    rate_limiting::middleware::circuit_breaker_middleware(&provider_name, &state).await?;
+
+    // Convert to unified request format
+    let mut unified_request = crate::models::UnifiedRequest::try_from(request)
+        .map_err(|e| AppError::ValidationError(e.to_string()))?;
+    
+    // Set client_id from authentication
+    unified_request.client_id = client_id.clone();
+
+    // Hydrate images if present
+    if unified_request.has_images {
+        unified_request.hydrate_images().await
+            .map_err(|e| AppError::ValidationError(format!("Failed to process images: {}", e)))?;
+    }
+
+    // Check if streaming is requested
+    if unified_request.stream {
+        // Estimate prompt tokens for logging later
+        let prompt_tokens = crate::utils::tokens::estimate_request_tokens(&model, &unified_request);
+        let has_images = unified_request.has_images;
+
+        // Handle streaming response
+        let stream_result = provider.chat_completion_stream(unified_request).await;
+        
+        match stream_result {
+            Ok(stream) => {
+                // Record provider success
+                state.rate_limit_manager.record_provider_success(&provider_name).await;
+                
+                // Wrap with AggregatingStream for token counting and database logging
+                let aggregating_stream = crate::utils::streaming::AggregatingStream::new(
+                    stream,
+                    client_id.clone(),
+                    provider.clone(),
+                    model.clone(),
+                    prompt_tokens,
+                    has_images,
+                    state.request_logger.clone(),
+                    state.client_manager.clone(),
+                    state.model_registry.clone(),
+                );
+
+                // Create SSE stream from aggregating stream
+                let sse_stream = aggregating_stream.map(move |chunk_result| {
+                    match chunk_result {
+                        Ok(chunk) => {
+                            // Convert provider chunk to OpenAI-compatible SSE event
+                            let response = ChatCompletionStreamResponse {
+                                id: format!("chatcmpl-{}", Uuid::new_v4()),
+                                object: "chat.completion.chunk".to_string(),
+                                created: chrono::Utc::now().timestamp() as u64,
+                                model: chunk.model.clone(),
+                                choices: vec![ChatStreamChoice {
+                                    index: 0,
+                                    delta: ChatStreamDelta {
+                                        role: None,
+                                        content: Some(chunk.content),
+                                        reasoning_content: chunk.reasoning_content,
+                                    },
+                                    finish_reason: chunk.finish_reason,
+                                }],
+                            };
+                            
+                            Ok(Event::default().json_data(response).unwrap())
+                        }
+                        Err(e) => {
+                            warn!("Error in streaming response: {}", e);
+                            Err(e)
+                        }
+                    }
+                });
+                
+                Ok(Sse::new(sse_stream).into_response())
+            }
+            Err(e) => {
+                // Record provider failure
+                state.rate_limit_manager.record_provider_failure(&provider_name).await;
+                
+                // Log failed request
+                let duration = start_time.elapsed();
+                warn!("Streaming request failed after {:?}: {}", duration, e);
+                
+                Err(e)
+            }
+        }
+    } else {
+        // Handle non-streaming response
+        let result = provider.chat_completion(unified_request).await;
+
+        match result {
+            Ok(response) => {
+                // Record provider success
+                state.rate_limit_manager.record_provider_success(&provider_name).await;
+                
+                let duration = start_time.elapsed();
+                let cost = provider.calculate_cost(&response.model, response.prompt_tokens, response.completion_tokens, &state.model_registry);
+
+                // Log request to database
+                state.request_logger.log_request(crate::logging::RequestLog {
+                    timestamp: chrono::Utc::now(),
+                    client_id: client_id.clone(),
+                    provider: provider_name.clone(),
+                    model: response.model.clone(),
+                    prompt_tokens: response.prompt_tokens,
+                    completion_tokens: response.completion_tokens,
+                    total_tokens: response.total_tokens,
+                    cost,
+                    has_images: false, // TODO: check images
+                    status: "success".to_string(),
+                    error_message: None,
+                    duration_ms: duration.as_millis() as u64,
+                });
+
+                // Update client usage
+                let _ = state.client_manager.update_client_usage(
+                    &client_id,
+                    response.total_tokens as i64,
+                    cost,
+                ).await;
+
+                // Convert ProviderResponse to ChatCompletionResponse
+                let chat_response = ChatCompletionResponse {
+                    id: format!("chatcmpl-{}", Uuid::new_v4()),
+                    object: "chat.completion".to_string(),
+                    created: chrono::Utc::now().timestamp() as u64,
+                    model: response.model,
+                    choices: vec![ChatChoice {
+                        index: 0,
+                        message: ChatMessage {
+                            role: "assistant".to_string(),
+                            content: crate::models::MessageContent::Text { 
+                                content: response.content 
+                            },
+                            reasoning_content: response.reasoning_content,
+                        },
+                        finish_reason: Some("stop".to_string()),
+                    }],
+                    usage: Some(Usage {
+                        prompt_tokens: response.prompt_tokens,
+                        completion_tokens: response.completion_tokens,
+                        total_tokens: response.total_tokens,
+                    }),
+                };
+                
+                // Log successful request
+                info!("Request completed successfully in {:?}", duration);
+                
+                Ok(Json(chat_response).into_response())
+            }
+            Err(e) => {
+                // Record provider failure
+                state.rate_limit_manager.record_provider_failure(&provider_name).await;
+                
+                // Log failed request to database
+                let duration = start_time.elapsed();
+                state.request_logger.log_request(crate::logging::RequestLog {
+                    timestamp: chrono::Utc::now(),
+                    client_id: client_id.clone(),
+                    provider: provider_name.clone(),
+                    model: model.clone(),
+                    prompt_tokens: 0,
+                    completion_tokens: 0,
+                    total_tokens: 0,
+                    cost: 0.0,
+                    has_images: false,
+                    status: "error".to_string(),
+                    error_message: Some(e.to_string()),
+                    duration_ms: duration.as_millis() as u64,
+                });
+
+                warn!("Request failed after {:?}: {}", duration, e);
+                
+                Err(e)
+            }
+        }
+    }
+}