perf: eliminate per-request SQLite queries and optimize proxy latency
- Add in-memory ModelConfigCache (30s refresh, explicit invalidation) replacing 2 SQLite queries per request (model lookup + cost override) - Configure all 5 provider HTTP clients with proper timeouts (300s), connection pooling (4 idle/host, 90s idle timeout), and TCP keepalive - Move client_usage update to tokio::spawn in non-streaming path - Use fast chars/4 heuristic for token estimation on large inputs (>1KB) - Generate single UUID/timestamp per SSE stream instead of per chunk - Add shared LazyLock<Client> for image fetching in multimodal module - Add proxy overhead timing instrumentation for both request paths - Fix test helper to include new model_config_cache field
This commit is contained in:
@@ -20,8 +20,16 @@ impl GrokProvider {
|
||||
}
|
||||
|
||||
pub fn new_with_key(config: &crate::config::GrokConfig, app_config: &AppConfig, api_key: String) -> Result<Self> {
|
||||
let client = reqwest::Client::builder()
|
||||
.connect_timeout(std::time::Duration::from_secs(5))
|
||||
.timeout(std::time::Duration::from_secs(300))
|
||||
.pool_idle_timeout(std::time::Duration::from_secs(90))
|
||||
.pool_max_idle_per_host(4)
|
||||
.tcp_keepalive(std::time::Duration::from_secs(30))
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
client: reqwest::Client::new(),
|
||||
client,
|
||||
config: config.clone(),
|
||||
api_key,
|
||||
pricing: app_config.pricing.grok.clone(),
|
||||
|
||||
Reference in New Issue
Block a user