fixed tracking
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

This commit is contained in:
2026-03-11 16:21:32 +00:00
parent c2595f7a74
commit 3ab00fb188
4 changed files with 64 additions and 28 deletions

Binary file not shown.

View File

@@ -128,7 +128,9 @@ impl super::Provider for DeepSeekProvider {
cache_write_tokens: u32, cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
helpers::calculate_cost_with_registry( if let Some(metadata) = registry.find_model(model) {
if metadata.cost.is_some() {
return helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
@@ -136,9 +138,26 @@ impl super::Provider for DeepSeekProvider {
cache_write_tokens, cache_write_tokens,
registry, registry,
&self.pricing, &self.pricing,
0.14,
0.28, 0.28,
) 0.42,
);
}
}
// Custom DeepSeek fallback that correctly handles cache hits
let (prompt_rate, completion_rate) = self
.pricing
.iter()
.find(|p| model.contains(&p.model))
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
.unwrap_or((0.28, 0.42)); // Default to DeepSeek's current API pricing
let cache_hit_rate = prompt_rate / 10.0;
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
} }
async fn chat_completion_stream( async fn chat_completion_stream(

View File

@@ -772,7 +772,9 @@ impl super::Provider for GeminiProvider {
cache_write_tokens: u32, cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
super::helpers::calculate_cost_with_registry( if let Some(metadata) = registry.find_model(model) {
if metadata.cost.is_some() {
return super::helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
@@ -782,7 +784,24 @@ impl super::Provider for GeminiProvider {
&self.pricing, &self.pricing,
0.075, 0.075,
0.30, 0.30,
) );
}
}
// Custom Gemini fallback that correctly handles cache hits (25% of input cost)
let (prompt_rate, completion_rate) = self
.pricing
.iter()
.find(|p| model.contains(&p.model))
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
.unwrap_or((0.075, 0.30)); // Default to Gemini 1.5 Flash current API pricing
let cache_hit_rate = prompt_rate * 0.25;
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
} }
async fn chat_completion_stream( async fn chat_completion_stream(

View File

@@ -261,9 +261,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64()) .or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32; .unwrap_or(0) as u32;
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use). // DeepSeek reports prompt_cache_miss_tokens which are just regular non-cached tokens.
// OpenAI doesn't report cache_write in this location, but may in the future. // They do not incur a separate cache_write fee, so we don't map them here to avoid double-charging.
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32; let cache_write_tokens = 0;
Ok(ProviderResponse { Ok(ProviderResponse {
content, content,
@@ -300,9 +300,7 @@ pub fn parse_openai_stream_chunk(
.or_else(|| u["prompt_cache_hit_tokens"].as_u64()) .or_else(|| u["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32; .unwrap_or(0) as u32;
let cache_write_tokens = u["prompt_cache_miss_tokens"] let cache_write_tokens = 0;
.as_u64()
.unwrap_or(0) as u32;
Some(StreamUsage { Some(StreamUsage {
prompt_tokens, prompt_tokens,