- Add in-memory ModelConfigCache (30s refresh, explicit invalidation) replacing 2 SQLite queries per request (model lookup + cost override) - Configure all 5 provider HTTP clients with proper timeouts (300s), connection pooling (4 idle/host, 90s idle timeout), and TCP keepalive - Move client_usage update to tokio::spawn in non-streaming path - Use fast chars/4 heuristic for token estimation on large inputs (>1KB) - Generate single UUID/timestamp per SSE stream instead of per chunk - Add shared LazyLock<Client> for image fetching in multimodal module - Add proxy overhead timing instrumentation for both request paths - Fix test helper to include new model_config_cache field
152 lines
5.4 KiB
Rust
152 lines
5.4 KiB
Rust
//! LLM Proxy Library
|
|
//!
|
|
//! This library provides the core functionality for the LLM proxy gateway,
|
|
//! including provider integration, token tracking, and API endpoints.
|
|
|
|
pub mod auth;
|
|
pub mod client;
|
|
pub mod config;
|
|
pub mod dashboard;
|
|
pub mod database;
|
|
pub mod errors;
|
|
pub mod logging;
|
|
pub mod models;
|
|
pub mod multimodal;
|
|
pub mod providers;
|
|
pub mod rate_limiting;
|
|
pub mod server;
|
|
pub mod state;
|
|
pub mod utils;
|
|
|
|
// Re-exports for convenience
|
|
pub use auth::{AuthenticatedClient, validate_token};
|
|
pub use config::{
|
|
AppConfig, DatabaseConfig, DeepSeekConfig, GeminiConfig, GrokConfig, ModelMappingConfig, ModelPricing,
|
|
OllamaConfig, OpenAIConfig, PricingConfig, ProviderConfig, ServerConfig,
|
|
};
|
|
pub use database::{DbPool, init as init_db, test_connection};
|
|
pub use errors::AppError;
|
|
pub use logging::{LoggingContext, RequestLog, RequestLogger};
|
|
pub use models::{
|
|
ChatChoice, ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse, ChatMessage,
|
|
ChatStreamChoice, ChatStreamDelta, ContentPart, ContentPartValue, FromOpenAI, ImageUrl, MessageContent,
|
|
OpenAIContentPart, OpenAIMessage, OpenAIRequest, ToOpenAI, UnifiedMessage, UnifiedRequest, Usage,
|
|
};
|
|
pub use providers::{Provider, ProviderManager, ProviderResponse, ProviderStreamChunk};
|
|
pub use server::router;
|
|
pub use state::AppState;
|
|
|
|
/// Test utilities for integration testing
|
|
#[cfg(test)]
|
|
pub mod test_utils {
|
|
use std::sync::Arc;
|
|
|
|
use crate::{client::ClientManager, providers::ProviderManager, rate_limiting::RateLimitManager, state::AppState};
|
|
use sqlx::sqlite::SqlitePool;
|
|
|
|
/// Create a test application state
|
|
pub async fn create_test_state() -> Arc<AppState> {
|
|
// Create in-memory database
|
|
let pool = SqlitePool::connect("sqlite::memory:")
|
|
.await
|
|
.expect("Failed to create test database");
|
|
|
|
// Run migrations
|
|
crate::database::init(&crate::config::DatabaseConfig {
|
|
path: std::path::PathBuf::from(":memory:"),
|
|
max_connections: 5,
|
|
})
|
|
.await
|
|
.expect("Failed to initialize test database");
|
|
|
|
let rate_limit_manager = RateLimitManager::new(
|
|
crate::rate_limiting::RateLimiterConfig::default(),
|
|
crate::rate_limiting::CircuitBreakerConfig::default(),
|
|
);
|
|
|
|
let client_manager = Arc::new(ClientManager::new(pool.clone()));
|
|
|
|
// Create provider manager
|
|
let provider_manager = ProviderManager::new();
|
|
|
|
let model_registry = crate::models::registry::ModelRegistry {
|
|
providers: std::collections::HashMap::new(),
|
|
};
|
|
|
|
let (dashboard_tx, _) = tokio::sync::broadcast::channel(100);
|
|
|
|
let config = Arc::new(crate::config::AppConfig {
|
|
server: crate::config::ServerConfig {
|
|
port: 8080,
|
|
host: "127.0.0.1".to_string(),
|
|
auth_tokens: vec![],
|
|
},
|
|
database: crate::config::DatabaseConfig {
|
|
path: std::path::PathBuf::from(":memory:"),
|
|
max_connections: 5,
|
|
},
|
|
providers: crate::config::ProviderConfig {
|
|
openai: crate::config::OpenAIConfig {
|
|
api_key_env: "OPENAI_API_KEY".to_string(),
|
|
base_url: "".to_string(),
|
|
default_model: "".to_string(),
|
|
enabled: true,
|
|
},
|
|
gemini: crate::config::GeminiConfig {
|
|
api_key_env: "GEMINI_API_KEY".to_string(),
|
|
base_url: "".to_string(),
|
|
default_model: "".to_string(),
|
|
enabled: true,
|
|
},
|
|
deepseek: crate::config::DeepSeekConfig {
|
|
api_key_env: "DEEPSEEK_API_KEY".to_string(),
|
|
base_url: "".to_string(),
|
|
default_model: "".to_string(),
|
|
enabled: true,
|
|
},
|
|
grok: crate::config::GrokConfig {
|
|
api_key_env: "GROK_API_KEY".to_string(),
|
|
base_url: "".to_string(),
|
|
default_model: "".to_string(),
|
|
enabled: true,
|
|
},
|
|
ollama: crate::config::OllamaConfig {
|
|
base_url: "".to_string(),
|
|
enabled: true,
|
|
models: vec![],
|
|
},
|
|
},
|
|
model_mapping: crate::config::ModelMappingConfig { patterns: vec![] },
|
|
pricing: crate::config::PricingConfig {
|
|
openai: vec![],
|
|
gemini: vec![],
|
|
deepseek: vec![],
|
|
grok: vec![],
|
|
ollama: vec![],
|
|
},
|
|
config_path: None,
|
|
});
|
|
|
|
Arc::new(AppState {
|
|
config,
|
|
provider_manager,
|
|
db_pool: pool.clone(),
|
|
rate_limit_manager: Arc::new(rate_limit_manager),
|
|
client_manager,
|
|
request_logger: Arc::new(crate::logging::RequestLogger::new(pool.clone(), dashboard_tx.clone())),
|
|
model_registry: Arc::new(model_registry),
|
|
model_config_cache: crate::state::ModelConfigCache::new(pool.clone()),
|
|
dashboard_tx,
|
|
auth_tokens: vec![],
|
|
})
|
|
}
|
|
|
|
/// Create a test HTTP client
|
|
pub fn create_test_client() -> reqwest::Client {
|
|
reqwest::Client::builder()
|
|
.timeout(std::time::Duration::from_secs(30))
|
|
.build()
|
|
.expect("Failed to create test HTTP client")
|
|
}
|
|
}
|