merge
This commit is contained in:
@@ -137,8 +137,23 @@ async fn get_model_cost(
|
||||
// Check in-memory cache for cost overrides (no SQLite hit)
|
||||
if let Some(cached) = state.model_config_cache.get(model).await {
|
||||
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
||||
// Manual overrides don't have cache-specific rates, so use simple formula
|
||||
return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
||||
// Manual overrides logic: if cache rates are provided, use cache-aware formula.
|
||||
// Formula: (non_cached_prompt * input_rate) + (cache_read * read_rate) + (cache_write * write_rate) + (completion * output_rate)
|
||||
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||
let mut total = (non_cached_prompt as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
||||
|
||||
if let Some(cr) = cached.cache_read_cost_per_m {
|
||||
total += cache_read_tokens as f64 * cr / 1_000_000.0;
|
||||
} else {
|
||||
// No manual cache_read rate — charge cached tokens at full input rate (backwards compatibility)
|
||||
total += cache_read_tokens as f64 * p / 1_000_000.0;
|
||||
}
|
||||
|
||||
if let Some(cw) = cached.cache_write_cost_per_m {
|
||||
total += cache_write_tokens as f64 * cw / 1_000_000.0;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user