refactor: comprehensive audit — fix bugs, harden security, deduplicate providers, add CI/Docker
Phase 1: Fix compilation (config_path Option<PathBuf>, streaming test, stale test cleanup) Phase 2: Fix critical bugs (remove block_on deadlocks in 4 providers, fix broken SQL query builder) Phase 3: Security hardening (session manager, real auth, token masking, Gemini key to header, password policy) Phase 4: Implement stubs (real provider test, /proc health metrics, client/provider/backup endpoints, has_images) Phase 5: Code quality (shared provider helpers, explicit re-exports, all Clippy warnings fixed, unwrap removal, 6 unused deps removed, dashboard split into 7 sub-modules) Phase 6: Infrastructure (GitHub Actions CI, multi-stage Dockerfile, rustfmt.toml, clippy.toml, script fixes)
This commit is contained in:
@@ -1,22 +1,25 @@
|
||||
use std::sync::Arc;
|
||||
use sqlx::Row;
|
||||
use uuid::Uuid;
|
||||
use axum::{
|
||||
extract::State,
|
||||
routing::post,
|
||||
Json, Router,
|
||||
response::sse::{Event, Sse},
|
||||
extract::State,
|
||||
response::IntoResponse,
|
||||
response::sse::{Event, Sse},
|
||||
routing::post,
|
||||
};
|
||||
use futures::stream::StreamExt;
|
||||
use sqlx::Row;
|
||||
use std::sync::Arc;
|
||||
use tracing::{info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
auth::AuthenticatedClient,
|
||||
errors::AppError,
|
||||
models::{ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse, ChatStreamChoice, ChatStreamDelta, ChatMessage, ChatChoice, Usage},
|
||||
state::AppState,
|
||||
models::{
|
||||
ChatChoice, ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse, ChatMessage,
|
||||
ChatStreamChoice, ChatStreamDelta, Usage,
|
||||
},
|
||||
rate_limiting,
|
||||
state::AppState,
|
||||
};
|
||||
|
||||
pub fn router(state: AppState) -> Router {
|
||||
@@ -65,13 +68,13 @@ async fn chat_completions(
|
||||
if !state.auth_tokens.is_empty() && !state.auth_tokens.contains(&auth.token) {
|
||||
return Err(AppError::AuthError("Invalid authentication token".to_string()));
|
||||
}
|
||||
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let client_id = auth.client_id.clone();
|
||||
let model = request.model.clone();
|
||||
|
||||
|
||||
info!("Chat completion request from client {} for model {}", client_id, model);
|
||||
|
||||
|
||||
// Check if model is enabled in database and get potential mapping
|
||||
let model_config = sqlx::query("SELECT enabled, mapping FROM model_configs WHERE id = ?")
|
||||
.bind(&model)
|
||||
@@ -85,7 +88,10 @@ async fn chat_completions(
|
||||
};
|
||||
|
||||
if !model_enabled {
|
||||
return Err(AppError::ValidationError(format!("Model {} is currently disabled", model)));
|
||||
return Err(AppError::ValidationError(format!(
|
||||
"Model {} is currently disabled",
|
||||
model
|
||||
)));
|
||||
}
|
||||
|
||||
// Apply mapping if present
|
||||
@@ -95,53 +101,61 @@ async fn chat_completions(
|
||||
}
|
||||
|
||||
// Find appropriate provider for the model
|
||||
let provider = state.provider_manager.get_provider_for_model(&request.model).await
|
||||
let provider = state
|
||||
.provider_manager
|
||||
.get_provider_for_model(&request.model)
|
||||
.await
|
||||
.ok_or_else(|| AppError::ProviderError(format!("No provider found for model: {}", request.model)))?;
|
||||
|
||||
|
||||
let provider_name = provider.name().to_string();
|
||||
|
||||
|
||||
// Check circuit breaker for this provider
|
||||
rate_limiting::middleware::circuit_breaker_middleware(&provider_name, &state).await?;
|
||||
|
||||
// Convert to unified request format
|
||||
let mut unified_request = crate::models::UnifiedRequest::try_from(request)
|
||||
.map_err(|e| AppError::ValidationError(e.to_string()))?;
|
||||
|
||||
let mut unified_request =
|
||||
crate::models::UnifiedRequest::try_from(request).map_err(|e| AppError::ValidationError(e.to_string()))?;
|
||||
|
||||
// Set client_id from authentication
|
||||
unified_request.client_id = client_id.clone();
|
||||
|
||||
// Hydrate images if present
|
||||
if unified_request.has_images {
|
||||
unified_request.hydrate_images().await
|
||||
unified_request
|
||||
.hydrate_images()
|
||||
.await
|
||||
.map_err(|e| AppError::ValidationError(format!("Failed to process images: {}", e)))?;
|
||||
}
|
||||
|
||||
let has_images = unified_request.has_images;
|
||||
|
||||
// Check if streaming is requested
|
||||
if unified_request.stream {
|
||||
// Estimate prompt tokens for logging later
|
||||
let prompt_tokens = crate::utils::tokens::estimate_request_tokens(&model, &unified_request);
|
||||
let has_images = unified_request.has_images;
|
||||
|
||||
// Handle streaming response
|
||||
let stream_result = provider.chat_completion_stream(unified_request).await;
|
||||
|
||||
|
||||
match stream_result {
|
||||
Ok(stream) => {
|
||||
// Record provider success
|
||||
state.rate_limit_manager.record_provider_success(&provider_name).await;
|
||||
|
||||
|
||||
// Wrap with AggregatingStream for token counting and database logging
|
||||
let aggregating_stream = crate::utils::streaming::AggregatingStream::new(
|
||||
stream,
|
||||
client_id.clone(),
|
||||
provider.clone(),
|
||||
model.clone(),
|
||||
prompt_tokens,
|
||||
has_images,
|
||||
state.request_logger.clone(),
|
||||
state.client_manager.clone(),
|
||||
state.model_registry.clone(),
|
||||
state.db_pool.clone(),
|
||||
crate::utils::streaming::StreamConfig {
|
||||
client_id: client_id.clone(),
|
||||
provider: provider.clone(),
|
||||
model: model.clone(),
|
||||
prompt_tokens,
|
||||
has_images,
|
||||
logger: state.request_logger.clone(),
|
||||
client_manager: state.client_manager.clone(),
|
||||
model_registry: state.model_registry.clone(),
|
||||
db_pool: state.db_pool.clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// Create SSE stream from aggregating stream
|
||||
@@ -164,8 +178,14 @@ async fn chat_completions(
|
||||
finish_reason: chunk.finish_reason,
|
||||
}],
|
||||
};
|
||||
|
||||
Ok(Event::default().json_data(response).unwrap())
|
||||
|
||||
match Event::default().json_data(response) {
|
||||
Ok(event) => Ok(event),
|
||||
Err(e) => {
|
||||
warn!("Failed to serialize SSE event: {}", e);
|
||||
Err(AppError::InternalError("SSE serialization failed".to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Error in streaming response: {}", e);
|
||||
@@ -173,17 +193,17 @@ async fn chat_completions(
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Ok(Sse::new(sse_stream).into_response())
|
||||
}
|
||||
Err(e) => {
|
||||
// Record provider failure
|
||||
state.rate_limit_manager.record_provider_failure(&provider_name).await;
|
||||
|
||||
|
||||
// Log failed request
|
||||
let duration = start_time.elapsed();
|
||||
warn!("Streaming request failed after {:?}: {}", duration, e);
|
||||
|
||||
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
@@ -193,12 +213,19 @@ async fn chat_completions(
|
||||
|
||||
match result {
|
||||
Ok(response) => {
|
||||
// Record provider success
|
||||
state.rate_limit_manager.record_provider_success(&provider_name).await;
|
||||
|
||||
let duration = start_time.elapsed();
|
||||
let cost = get_model_cost(&response.model, response.prompt_tokens, response.completion_tokens, &provider, &state).await;
|
||||
// Log request to database
|
||||
// Record provider success
|
||||
state.rate_limit_manager.record_provider_success(&provider_name).await;
|
||||
|
||||
let duration = start_time.elapsed();
|
||||
let cost = get_model_cost(
|
||||
&response.model,
|
||||
response.prompt_tokens,
|
||||
response.completion_tokens,
|
||||
&provider,
|
||||
&state,
|
||||
)
|
||||
.await;
|
||||
// Log request to database
|
||||
state.request_logger.log_request(crate::logging::RequestLog {
|
||||
timestamp: chrono::Utc::now(),
|
||||
client_id: client_id.clone(),
|
||||
@@ -208,18 +235,17 @@ async fn chat_completions(
|
||||
completion_tokens: response.completion_tokens,
|
||||
total_tokens: response.total_tokens,
|
||||
cost,
|
||||
has_images: false, // TODO: check images
|
||||
has_images,
|
||||
status: "success".to_string(),
|
||||
error_message: None,
|
||||
duration_ms: duration.as_millis() as u64,
|
||||
});
|
||||
|
||||
// Update client usage
|
||||
let _ = state.client_manager.update_client_usage(
|
||||
&client_id,
|
||||
response.total_tokens as i64,
|
||||
cost,
|
||||
).await;
|
||||
let _ = state
|
||||
.client_manager
|
||||
.update_client_usage(&client_id, response.total_tokens as i64, cost)
|
||||
.await;
|
||||
|
||||
// Convert ProviderResponse to ChatCompletionResponse
|
||||
let chat_response = ChatCompletionResponse {
|
||||
@@ -231,8 +257,8 @@ async fn chat_completions(
|
||||
index: 0,
|
||||
message: ChatMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: crate::models::MessageContent::Text {
|
||||
content: response.content
|
||||
content: crate::models::MessageContent::Text {
|
||||
content: response.content,
|
||||
},
|
||||
reasoning_content: response.reasoning_content,
|
||||
},
|
||||
@@ -244,16 +270,16 @@ async fn chat_completions(
|
||||
total_tokens: response.total_tokens,
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
// Log successful request
|
||||
info!("Request completed successfully in {:?}", duration);
|
||||
|
||||
|
||||
Ok(Json(chat_response).into_response())
|
||||
}
|
||||
Err(e) => {
|
||||
// Record provider failure
|
||||
state.rate_limit_manager.record_provider_failure(&provider_name).await;
|
||||
|
||||
|
||||
// Log failed request to database
|
||||
let duration = start_time.elapsed();
|
||||
state.request_logger.log_request(crate::logging::RequestLog {
|
||||
@@ -272,7 +298,7 @@ async fn chat_completions(
|
||||
});
|
||||
|
||||
warn!("Request failed after {:?}: {}", duration, e);
|
||||
|
||||
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user