merge
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

This commit is contained in:
2026-03-06 15:22:06 -05:00
parent a243a3987d
commit e8955fd36c
13 changed files with 150 additions and 25 deletions

View File

@@ -137,8 +137,23 @@ async fn get_model_cost(
// Check in-memory cache for cost overrides (no SQLite hit)
if let Some(cached) = state.model_config_cache.get(model).await {
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
// Manual overrides don't have cache-specific rates, so use simple formula
return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
// Manual overrides logic: if cache rates are provided, use cache-aware formula.
// Formula: (non_cached_prompt * input_rate) + (cache_read * read_rate) + (cache_write * write_rate) + (completion * output_rate)
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
let mut total = (non_cached_prompt as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
if let Some(cr) = cached.cache_read_cost_per_m {
total += cache_read_tokens as f64 * cr / 1_000_000.0;
} else {
// No manual cache_read rate — charge cached tokens at full input rate (backwards compatibility)
total += cache_read_tokens as f64 * p / 1_000_000.0;
}
if let Some(cw) = cached.cache_write_cost_per_m {
total += cache_write_tokens as f64 * cw / 1_000_000.0;
}
return total;
}
}