feat: implement reasoning_tokens tracking and enhanced usage logging
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

This commit is contained in:
2026-03-11 17:14:49 +00:00
parent 3ab00fb188
commit cc5eba1957
11 changed files with 75 additions and 4 deletions

View File

@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
let reasoning_content = candidate
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
let reasoning_tokens = reasoning_content.as_ref()
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
.unwrap_or(0);
// Extract function calls → OpenAI tool_calls
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
tool_calls,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
@@ -902,6 +907,7 @@ impl super::Provider for GeminiProvider {
super::StreamUsage {
prompt_tokens: u.prompt_token_count,
completion_tokens: u.candidates_token_count,
reasoning_tokens: 0,
total_tokens: u.total_token_count,
cache_read_tokens: u.cached_content_token_count,
cache_write_tokens: 0,

View File

@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
// Extract reasoning tokens
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
.as_u64()
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
tool_calls,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,
@@ -295,6 +301,10 @@ pub fn parse_openai_stream_chunk(
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
.as_u64()
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
@@ -305,6 +315,7 @@ pub fn parse_openai_stream_chunk(
Some(StreamUsage {
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,

View File

@@ -75,6 +75,7 @@ pub struct ProviderResponse {
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
pub struct StreamUsage {
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,

View File

@@ -177,6 +177,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None,
prompt_tokens,
completion_tokens,
reasoning_tokens: 0,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
@@ -275,6 +276,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None,
prompt_tokens,
completion_tokens,
reasoning_tokens: 0,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,