feat: implement reasoning_tokens tracking and enhanced usage logging
This commit is contained in:
@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
|
||||
let reasoning_content = candidate
|
||||
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
|
||||
|
||||
let reasoning_tokens = reasoning_content.as_ref()
|
||||
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
|
||||
.unwrap_or(0);
|
||||
|
||||
// Extract function calls → OpenAI tool_calls
|
||||
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
|
||||
|
||||
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
|
||||
tool_calls,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
reasoning_tokens,
|
||||
total_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
|
||||
@@ -902,6 +907,7 @@ impl super::Provider for GeminiProvider {
|
||||
super::StreamUsage {
|
||||
prompt_tokens: u.prompt_token_count,
|
||||
completion_tokens: u.candidates_token_count,
|
||||
reasoning_tokens: 0,
|
||||
total_tokens: u.total_token_count,
|
||||
cache_read_tokens: u.cached_content_token_count,
|
||||
cache_write_tokens: 0,
|
||||
|
||||
@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
||||
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
||||
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
|
||||
|
||||
// Extract reasoning tokens
|
||||
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
|
||||
.as_u64()
|
||||
.unwrap_or(0) as u32;
|
||||
|
||||
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
|
||||
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
|
||||
.as_u64()
|
||||
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
||||
tool_calls,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
reasoning_tokens,
|
||||
total_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
@@ -295,6 +301,10 @@ pub fn parse_openai_stream_chunk(
|
||||
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
||||
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
|
||||
|
||||
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
|
||||
.as_u64()
|
||||
.unwrap_or(0) as u32;
|
||||
|
||||
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
|
||||
.as_u64()
|
||||
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
|
||||
@@ -305,6 +315,7 @@ pub fn parse_openai_stream_chunk(
|
||||
Some(StreamUsage {
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
reasoning_tokens,
|
||||
total_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
|
||||
@@ -75,6 +75,7 @@ pub struct ProviderResponse {
|
||||
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
|
||||
pub prompt_tokens: u32,
|
||||
pub completion_tokens: u32,
|
||||
pub reasoning_tokens: u32,
|
||||
pub total_tokens: u32,
|
||||
pub cache_read_tokens: u32,
|
||||
pub cache_write_tokens: u32,
|
||||
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
|
||||
pub struct StreamUsage {
|
||||
pub prompt_tokens: u32,
|
||||
pub completion_tokens: u32,
|
||||
pub reasoning_tokens: u32,
|
||||
pub total_tokens: u32,
|
||||
pub cache_read_tokens: u32,
|
||||
pub cache_write_tokens: u32,
|
||||
|
||||
@@ -177,6 +177,7 @@ impl super::Provider for OpenAIProvider {
|
||||
tool_calls: None,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
reasoning_tokens: 0,
|
||||
total_tokens,
|
||||
cache_read_tokens: 0,
|
||||
cache_write_tokens: 0,
|
||||
@@ -275,6 +276,7 @@ impl super::Provider for OpenAIProvider {
|
||||
tool_calls: None,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
reasoning_tokens: 0,
|
||||
total_tokens,
|
||||
cache_read_tokens: 0,
|
||||
cache_write_tokens: 0,
|
||||
|
||||
Reference in New Issue
Block a user