feat: implement reasoning_tokens tracking and enhanced usage logging
This commit is contained in:
@@ -312,6 +312,14 @@ async fn chat_completions(
|
||||
},
|
||||
finish_reason: chunk.finish_reason,
|
||||
}],
|
||||
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
|
||||
prompt_tokens: u.prompt_tokens,
|
||||
completion_tokens: u.completion_tokens,
|
||||
total_tokens: u.total_tokens,
|
||||
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
|
||||
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
|
||||
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
|
||||
}),
|
||||
};
|
||||
|
||||
// Use axum's Event directly, wrap in Ok
|
||||
@@ -383,6 +391,7 @@ async fn chat_completions(
|
||||
model: response.model.clone(),
|
||||
prompt_tokens: response.prompt_tokens,
|
||||
completion_tokens: response.completion_tokens,
|
||||
reasoning_tokens: response.reasoning_tokens,
|
||||
total_tokens: response.total_tokens,
|
||||
cache_read_tokens: response.cache_read_tokens,
|
||||
cache_write_tokens: response.cache_write_tokens,
|
||||
@@ -423,6 +432,7 @@ async fn chat_completions(
|
||||
prompt_tokens: response.prompt_tokens,
|
||||
completion_tokens: response.completion_tokens,
|
||||
total_tokens: response.total_tokens,
|
||||
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
|
||||
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
|
||||
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
|
||||
}),
|
||||
@@ -452,6 +462,7 @@ async fn chat_completions(
|
||||
model: model.clone(),
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
reasoning_tokens: 0,
|
||||
total_tokens: 0,
|
||||
cache_read_tokens: 0,
|
||||
cache_write_tokens: 0,
|
||||
|
||||
Reference in New Issue
Block a user