feat: implement reasoning_tokens tracking and enhanced usage logging
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

This commit is contained in:
2026-03-11 17:14:49 +00:00
parent 3ab00fb188
commit cc5eba1957
11 changed files with 75 additions and 4 deletions

View File

@@ -255,7 +255,10 @@ pub(super) async fn handle_system_logs(
model,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,
cost,
status,
error_message,
@@ -279,6 +282,11 @@ pub(super) async fn handle_system_logs(
"client_id": row.get::<String, _>("client_id"),
"provider": row.get::<String, _>("provider"),
"model": row.get::<String, _>("model"),
"prompt_tokens": row.get::<i64, _>("prompt_tokens"),
"completion_tokens": row.get::<i64, _>("completion_tokens"),
"reasoning_tokens": row.get::<i64, _>("reasoning_tokens"),
"cache_read_tokens": row.get::<i64, _>("cache_read_tokens"),
"cache_write_tokens": row.get::<i64, _>("cache_write_tokens"),
"tokens": row.get::<i64, _>("total_tokens"),
"cost": row.get::<f64, _>("cost"),
"status": row.get::<String, _>("status"),

View File

@@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
model TEXT,
prompt_tokens INTEGER,
completion_tokens INTEGER,
reasoning_tokens INTEGER DEFAULT 0,
total_tokens INTEGER,
cost REAL,
has_images BOOLEAN DEFAULT FALSE,
@@ -172,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
// Add billing_mode column if it doesn't exist (migration for existing DBs)
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")

View File

@@ -15,6 +15,7 @@ pub struct RequestLog {
pub model: String,
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
@@ -77,8 +78,8 @@ impl RequestLogger {
sqlx::query(
r#"
INSERT INTO llm_requests
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
)
.bind(log.timestamp)
@@ -87,6 +88,7 @@ impl RequestLogger {
.bind(&log.model)
.bind(log.prompt_tokens as i64)
.bind(log.completion_tokens as i64)
.bind(log.reasoning_tokens as i64)
.bind(log.total_tokens as i64)
.bind(log.cache_read_tokens as i64)
.bind(log.cache_write_tokens as i64)

View File

@@ -165,6 +165,8 @@ pub struct Usage {
pub completion_tokens: u32,
pub total_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_read_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_write_tokens: Option<u32>,
@@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse {
pub created: u64,
pub model: String,
pub choices: Vec<ChatStreamChoice>,
#[serde(skip_serializing_if = "Option::is_none")]
pub usage: Option<Usage>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
let reasoning_content = candidate
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
let reasoning_tokens = reasoning_content.as_ref()
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
.unwrap_or(0);
// Extract function calls → OpenAI tool_calls
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
tool_calls,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
@@ -902,6 +907,7 @@ impl super::Provider for GeminiProvider {
super::StreamUsage {
prompt_tokens: u.prompt_token_count,
completion_tokens: u.candidates_token_count,
reasoning_tokens: 0,
total_tokens: u.total_token_count,
cache_read_tokens: u.cached_content_token_count,
cache_write_tokens: 0,

View File

@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
// Extract reasoning tokens
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
.as_u64()
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
tool_calls,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,
@@ -295,6 +301,10 @@ pub fn parse_openai_stream_chunk(
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
.as_u64()
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
@@ -305,6 +315,7 @@ pub fn parse_openai_stream_chunk(
Some(StreamUsage {
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,

View File

@@ -75,6 +75,7 @@ pub struct ProviderResponse {
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
pub struct StreamUsage {
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,

View File

@@ -177,6 +177,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None,
prompt_tokens,
completion_tokens,
reasoning_tokens: 0,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
@@ -275,6 +276,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None,
prompt_tokens,
completion_tokens,
reasoning_tokens: 0,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,

View File

@@ -312,6 +312,14 @@ async fn chat_completions(
},
finish_reason: chunk.finish_reason,
}],
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
prompt_tokens: u.prompt_tokens,
completion_tokens: u.completion_tokens,
total_tokens: u.total_tokens,
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
}),
};
// Use axum's Event directly, wrap in Ok
@@ -383,6 +391,7 @@ async fn chat_completions(
model: response.model.clone(),
prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens,
reasoning_tokens: response.reasoning_tokens,
total_tokens: response.total_tokens,
cache_read_tokens: response.cache_read_tokens,
cache_write_tokens: response.cache_write_tokens,
@@ -423,6 +432,7 @@ async fn chat_completions(
prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens,
total_tokens: response.total_tokens,
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
}),
@@ -452,6 +462,7 @@ async fn chat_completions(
model: model.clone(),
prompt_tokens: 0,
completion_tokens: 0,
reasoning_tokens: 0,
total_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,

View File

@@ -96,11 +96,12 @@ where
// Spawn a background task to log the completion
tokio::spawn(async move {
// Use real usage from the provider when available, otherwise fall back to estimates
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
if let Some(usage) = &real_usage {
(
usage.prompt_tokens,
usage.completion_tokens,
usage.reasoning_tokens,
usage.total_tokens,
usage.cache_read_tokens,
usage.cache_write_tokens,
@@ -109,6 +110,7 @@ where
(
estimated_prompt_tokens,
estimated_completion,
estimated_reasoning_tokens,
estimated_prompt_tokens + estimated_completion,
0u32,
0u32,
@@ -163,6 +165,7 @@ where
model,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,

View File

@@ -38,6 +38,24 @@ class LogsPage {
const statusClass = log.status === 'success' ? 'success' : 'danger';
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
let tokenDetails = `${log.tokens} total tokens`;
if (log.status === 'success') {
const parts = [];
parts.push(`${log.prompt_tokens} in`);
let completionStr = `${log.completion_tokens} out`;
if (log.reasoning_tokens > 0) {
completionStr += ` (${log.reasoning_tokens} reasoning)`;
}
parts.push(completionStr);
if (log.cache_read_tokens > 0) {
parts.push(`${log.cache_read_tokens} cache-hit`);
}
tokenDetails = parts.join(', ');
}
return `
<tr class="log-row">
<td class="whitespace-nowrap">${timestamp}</td>
@@ -55,7 +73,7 @@ class LogsPage {
<td>
<div class="log-message-container">
<code class="log-model">${log.model}</code>
<span class="log-tokens">${log.tokens} tokens</span>
<span class="log-tokens" title="${log.tokens} total tokens">${tokenDetails}</span>
<span class="log-duration">${log.duration}ms</span>
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
</div>