fixed tracking
This commit is contained in:
Binary file not shown.
@@ -128,17 +128,36 @@ impl super::Provider for DeepSeekProvider {
|
||||
cache_write_tokens: u32,
|
||||
registry: &crate::models::registry::ModelRegistry,
|
||||
) -> f64 {
|
||||
helpers::calculate_cost_with_registry(
|
||||
model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
registry,
|
||||
&self.pricing,
|
||||
0.14,
|
||||
0.28,
|
||||
)
|
||||
if let Some(metadata) = registry.find_model(model) {
|
||||
if metadata.cost.is_some() {
|
||||
return helpers::calculate_cost_with_registry(
|
||||
model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
registry,
|
||||
&self.pricing,
|
||||
0.28,
|
||||
0.42,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Custom DeepSeek fallback that correctly handles cache hits
|
||||
let (prompt_rate, completion_rate) = self
|
||||
.pricing
|
||||
.iter()
|
||||
.find(|p| model.contains(&p.model))
|
||||
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
||||
.unwrap_or((0.28, 0.42)); // Default to DeepSeek's current API pricing
|
||||
|
||||
let cache_hit_rate = prompt_rate / 10.0;
|
||||
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||
|
||||
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
|
||||
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
|
||||
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
||||
}
|
||||
|
||||
async fn chat_completion_stream(
|
||||
|
||||
@@ -772,17 +772,36 @@ impl super::Provider for GeminiProvider {
|
||||
cache_write_tokens: u32,
|
||||
registry: &crate::models::registry::ModelRegistry,
|
||||
) -> f64 {
|
||||
super::helpers::calculate_cost_with_registry(
|
||||
model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
registry,
|
||||
&self.pricing,
|
||||
0.075,
|
||||
0.30,
|
||||
)
|
||||
if let Some(metadata) = registry.find_model(model) {
|
||||
if metadata.cost.is_some() {
|
||||
return super::helpers::calculate_cost_with_registry(
|
||||
model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
registry,
|
||||
&self.pricing,
|
||||
0.075,
|
||||
0.30,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Custom Gemini fallback that correctly handles cache hits (25% of input cost)
|
||||
let (prompt_rate, completion_rate) = self
|
||||
.pricing
|
||||
.iter()
|
||||
.find(|p| model.contains(&p.model))
|
||||
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
||||
.unwrap_or((0.075, 0.30)); // Default to Gemini 1.5 Flash current API pricing
|
||||
|
||||
let cache_hit_rate = prompt_rate * 0.25;
|
||||
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||
|
||||
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
|
||||
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
|
||||
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
||||
}
|
||||
|
||||
async fn chat_completion_stream(
|
||||
|
||||
@@ -261,9 +261,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
||||
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
|
||||
.unwrap_or(0) as u32;
|
||||
|
||||
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use).
|
||||
// OpenAI doesn't report cache_write in this location, but may in the future.
|
||||
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32;
|
||||
// DeepSeek reports prompt_cache_miss_tokens which are just regular non-cached tokens.
|
||||
// They do not incur a separate cache_write fee, so we don't map them here to avoid double-charging.
|
||||
let cache_write_tokens = 0;
|
||||
|
||||
Ok(ProviderResponse {
|
||||
content,
|
||||
@@ -300,9 +300,7 @@ pub fn parse_openai_stream_chunk(
|
||||
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
|
||||
.unwrap_or(0) as u32;
|
||||
|
||||
let cache_write_tokens = u["prompt_cache_miss_tokens"]
|
||||
.as_u64()
|
||||
.unwrap_or(0) as u32;
|
||||
let cache_write_tokens = 0;
|
||||
|
||||
Some(StreamUsage {
|
||||
prompt_tokens,
|
||||
|
||||
Reference in New Issue
Block a user