perf: eliminate per-request SQLite queries and optimize proxy latency
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

- Add in-memory ModelConfigCache (30s refresh, explicit invalidation)
  replacing 2 SQLite queries per request (model lookup + cost override)
- Configure all 5 provider HTTP clients with proper timeouts (300s),
  connection pooling (4 idle/host, 90s idle timeout), and TCP keepalive
- Move client_usage update to tokio::spawn in non-streaming path
- Use fast chars/4 heuristic for token estimation on large inputs (>1KB)
- Generate single UUID/timestamp per SSE stream instead of per chunk
- Add shared LazyLock<Client> for image fetching in multimodal module
- Add proxy overhead timing instrumentation for both request paths
- Fix test helper to include new model_config_cache field
This commit is contained in:
2026-03-02 12:53:22 -05:00
parent e4cf088071
commit 8d50ce7c22
13 changed files with 232 additions and 74 deletions

View File

@@ -8,8 +8,20 @@
use anyhow::{Context, Result};
use base64::{Engine as _, engine::general_purpose};
use std::sync::LazyLock;
use tracing::{info, warn};
/// Shared HTTP client for image fetching — avoids creating a new TCP+TLS
/// connection for every image URL.
static IMAGE_CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
reqwest::Client::builder()
.connect_timeout(std::time::Duration::from_secs(5))
.timeout(std::time::Duration::from_secs(30))
.pool_idle_timeout(std::time::Duration::from_secs(60))
.build()
.expect("Failed to build image HTTP client")
});
/// Supported image formats for multimodal input
#[derive(Debug, Clone)]
pub enum ImageInput {
@@ -55,9 +67,13 @@ impl ImageInput {
Ok((base64_data, mime_type.clone()))
}
Self::Url(url) => {
// Fetch image from URL
// Fetch image from URL using shared client
info!("Fetching image from URL: {}", url);
let response = reqwest::get(url).await.context("Failed to fetch image from URL")?;
let response = IMAGE_CLIENT
.get(url)
.send()
.await
.context("Failed to fetch image from URL")?;
if !response.status().is_success() {
anyhow::bail!("Failed to fetch image: HTTP {}", response.status());