Files
GopherGate/src/main.rs
hobokenchicken 8d50ce7c22
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
perf: eliminate per-request SQLite queries and optimize proxy latency
- Add in-memory ModelConfigCache (30s refresh, explicit invalidation)
  replacing 2 SQLite queries per request (model lookup + cost override)
- Configure all 5 provider HTTP clients with proper timeouts (300s),
  connection pooling (4 idle/host, 90s idle timeout), and TCP keepalive
- Move client_usage update to tokio::spawn in non-streaming path
- Use fast chars/4 heuristic for token estimation on large inputs (>1KB)
- Generate single UUID/timestamp per SSE stream instead of per chunk
- Add shared LazyLock<Client> for image fetching in multimodal module
- Add proxy overhead timing instrumentation for both request paths
- Fix test helper to include new model_config_cache field
2026-03-02 12:53:22 -05:00

92 lines
2.9 KiB
Rust

use anyhow::Result;
use axum::{Router, routing::get};
use std::net::SocketAddr;
use tracing::{error, info};
use llm_proxy::{
config::AppConfig,
dashboard, database,
providers::ProviderManager,
rate_limiting::{CircuitBreakerConfig, RateLimitManager, RateLimiterConfig},
server,
state::AppState,
};
#[tokio::main]
async fn main() -> Result<()> {
// Initialize tracing (logging)
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.with_target(false)
.init();
info!("Starting LLM Proxy Gateway v{}", env!("CARGO_PKG_VERSION"));
// Load configuration
let config = AppConfig::load().await?;
info!("Configuration loaded from {:?}", config.config_path);
// Initialize database connection pool
let db_pool = database::init(&config.database).await?;
info!("Database initialized at {:?}", config.database.path);
// Initialize provider manager with configured providers
let provider_manager = ProviderManager::new();
// Initialize all supported providers (they handle their own enabled check)
let supported_providers = vec!["openai", "gemini", "deepseek", "grok", "ollama"];
for name in supported_providers {
if let Err(e) = provider_manager.initialize_provider(name, &config, &db_pool).await {
error!("Failed to initialize provider {}: {}", name, e);
}
}
// Create rate limit manager
let rate_limit_manager = RateLimitManager::new(RateLimiterConfig::default(), CircuitBreakerConfig::default());
// Fetch model registry from models.dev
let model_registry = match llm_proxy::utils::registry::fetch_registry().await {
Ok(registry) => registry,
Err(e) => {
error!("Failed to fetch model registry: {}. Using empty registry.", e);
llm_proxy::models::registry::ModelRegistry {
providers: std::collections::HashMap::new(),
}
}
};
// Create application state
let state = AppState::new(
config.clone(),
provider_manager,
db_pool,
rate_limit_manager,
model_registry,
config.server.auth_tokens.clone(),
);
// Initialize model config cache and start background refresh (every 30s)
state.model_config_cache.refresh().await;
state.model_config_cache.clone().start_refresh_task(30);
info!("Model config cache initialized");
// Create application router
let app = Router::new()
.route("/health", get(health_check))
.merge(server::router(state.clone()))
.merge(dashboard::router(state.clone()));
// Start server
let addr = SocketAddr::from(([0, 0, 0, 0], config.server.port));
info!("Server listening on http://{}", addr);
let listener = tokio::net::TcpListener::bind(&addr).await?;
axum::serve(listener, app).await?;
Ok(())
}
async fn health_check() -> &'static str {
"OK"
}