perf: eliminate per-request SQLite queries and optimize proxy latency
- Add in-memory ModelConfigCache (30s refresh, explicit invalidation) replacing 2 SQLite queries per request (model lookup + cost override) - Configure all 5 provider HTTP clients with proper timeouts (300s), connection pooling (4 idle/host, 90s idle timeout), and TCP keepalive - Move client_usage update to tokio::spawn in non-streaming path - Use fast chars/4 heuristic for token estimation on large inputs (>1KB) - Generate single UUID/timestamp per SSE stream instead of per chunk - Add shared LazyLock<Client> for image fetching in multimodal module - Add proxy overhead timing instrumentation for both request paths - Fix test helper to include new model_config_cache field
This commit is contained in:
@@ -8,8 +8,20 @@
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use std::sync::LazyLock;
|
||||
use tracing::{info, warn};
|
||||
|
||||
/// Shared HTTP client for image fetching — avoids creating a new TCP+TLS
|
||||
/// connection for every image URL.
|
||||
static IMAGE_CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
|
||||
reqwest::Client::builder()
|
||||
.connect_timeout(std::time::Duration::from_secs(5))
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.pool_idle_timeout(std::time::Duration::from_secs(60))
|
||||
.build()
|
||||
.expect("Failed to build image HTTP client")
|
||||
});
|
||||
|
||||
/// Supported image formats for multimodal input
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ImageInput {
|
||||
@@ -55,9 +67,13 @@ impl ImageInput {
|
||||
Ok((base64_data, mime_type.clone()))
|
||||
}
|
||||
Self::Url(url) => {
|
||||
// Fetch image from URL
|
||||
// Fetch image from URL using shared client
|
||||
info!("Fetching image from URL: {}", url);
|
||||
let response = reqwest::get(url).await.context("Failed to fetch image from URL")?;
|
||||
let response = IMAGE_CLIENT
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to fetch image from URL")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
anyhow::bail!("Failed to fetch image: HTTP {}", response.status());
|
||||
|
||||
Reference in New Issue
Block a user