feat: implement reasoning_tokens tracking and enhanced usage logging
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

This commit is contained in:
2026-03-11 17:14:49 +00:00
parent 3ab00fb188
commit cc5eba1957
11 changed files with 75 additions and 4 deletions

View File

@@ -255,7 +255,10 @@ pub(super) async fn handle_system_logs(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens,
cache_write_tokens,
cost, cost,
status, status,
error_message, error_message,
@@ -279,6 +282,11 @@ pub(super) async fn handle_system_logs(
"client_id": row.get::<String, _>("client_id"), "client_id": row.get::<String, _>("client_id"),
"provider": row.get::<String, _>("provider"), "provider": row.get::<String, _>("provider"),
"model": row.get::<String, _>("model"), "model": row.get::<String, _>("model"),
"prompt_tokens": row.get::<i64, _>("prompt_tokens"),
"completion_tokens": row.get::<i64, _>("completion_tokens"),
"reasoning_tokens": row.get::<i64, _>("reasoning_tokens"),
"cache_read_tokens": row.get::<i64, _>("cache_read_tokens"),
"cache_write_tokens": row.get::<i64, _>("cache_write_tokens"),
"tokens": row.get::<i64, _>("total_tokens"), "tokens": row.get::<i64, _>("total_tokens"),
"cost": row.get::<f64, _>("cost"), "cost": row.get::<f64, _>("cost"),
"status": row.get::<String, _>("status"), "status": row.get::<String, _>("status"),

View File

@@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
model TEXT, model TEXT,
prompt_tokens INTEGER, prompt_tokens INTEGER,
completion_tokens INTEGER, completion_tokens INTEGER,
reasoning_tokens INTEGER DEFAULT 0,
total_tokens INTEGER, total_tokens INTEGER,
cost REAL, cost REAL,
has_images BOOLEAN DEFAULT FALSE, has_images BOOLEAN DEFAULT FALSE,
@@ -172,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0") let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
.execute(pool) .execute(pool)
.await; .await;
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
// Add billing_mode column if it doesn't exist (migration for existing DBs) // Add billing_mode column if it doesn't exist (migration for existing DBs)
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT") let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")

View File

@@ -15,6 +15,7 @@ pub struct RequestLog {
pub model: String, pub model: String,
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32, pub cache_read_tokens: u32,
pub cache_write_tokens: u32, pub cache_write_tokens: u32,
@@ -77,8 +78,8 @@ impl RequestLogger {
sqlx::query( sqlx::query(
r#" r#"
INSERT INTO llm_requests INSERT INTO llm_requests
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#, "#,
) )
.bind(log.timestamp) .bind(log.timestamp)
@@ -87,6 +88,7 @@ impl RequestLogger {
.bind(&log.model) .bind(&log.model)
.bind(log.prompt_tokens as i64) .bind(log.prompt_tokens as i64)
.bind(log.completion_tokens as i64) .bind(log.completion_tokens as i64)
.bind(log.reasoning_tokens as i64)
.bind(log.total_tokens as i64) .bind(log.total_tokens as i64)
.bind(log.cache_read_tokens as i64) .bind(log.cache_read_tokens as i64)
.bind(log.cache_write_tokens as i64) .bind(log.cache_write_tokens as i64)

View File

@@ -165,6 +165,8 @@ pub struct Usage {
pub completion_tokens: u32, pub completion_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_read_tokens: Option<u32>, pub cache_read_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub cache_write_tokens: Option<u32>, pub cache_write_tokens: Option<u32>,
@@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse {
pub created: u64, pub created: u64,
pub model: String, pub model: String,
pub choices: Vec<ChatStreamChoice>, pub choices: Vec<ChatStreamChoice>,
#[serde(skip_serializing_if = "Option::is_none")]
pub usage: Option<Usage>,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
let reasoning_content = candidate let reasoning_content = candidate
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone())); .and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
let reasoning_tokens = reasoning_content.as_ref()
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
.unwrap_or(0);
// Extract function calls → OpenAI tool_calls // Extract function calls → OpenAI tool_calls
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts)); let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
tool_calls, tool_calls,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens: 0, // Gemini doesn't report cache writes separately cache_write_tokens: 0, // Gemini doesn't report cache writes separately
@@ -902,6 +907,7 @@ impl super::Provider for GeminiProvider {
super::StreamUsage { super::StreamUsage {
prompt_tokens: u.prompt_token_count, prompt_tokens: u.prompt_token_count,
completion_tokens: u.candidates_token_count, completion_tokens: u.candidates_token_count,
reasoning_tokens: 0,
total_tokens: u.total_token_count, total_tokens: u.total_token_count,
cache_read_tokens: u.cached_content_token_count, cache_read_tokens: u.cached_content_token_count,
cache_write_tokens: 0, cache_write_tokens: 0,

View File

@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32; let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32; let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
// Extract reasoning tokens
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format // Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"] let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
.as_u64() .as_u64()
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
tool_calls, tool_calls,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens, cache_write_tokens,
@@ -295,6 +301,10 @@ pub fn parse_openai_stream_chunk(
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32; let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32; let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"] let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
.as_u64() .as_u64()
.or_else(|| u["prompt_cache_hit_tokens"].as_u64()) .or_else(|| u["prompt_cache_hit_tokens"].as_u64())
@@ -305,6 +315,7 @@ pub fn parse_openai_stream_chunk(
Some(StreamUsage { Some(StreamUsage {
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens, cache_write_tokens,

View File

@@ -75,6 +75,7 @@ pub struct ProviderResponse {
pub tool_calls: Option<Vec<crate::models::ToolCall>>, pub tool_calls: Option<Vec<crate::models::ToolCall>>,
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32, pub cache_read_tokens: u32,
pub cache_write_tokens: u32, pub cache_write_tokens: u32,
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
pub struct StreamUsage { pub struct StreamUsage {
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32, pub cache_read_tokens: u32,
pub cache_write_tokens: u32, pub cache_write_tokens: u32,

View File

@@ -177,6 +177,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None, tool_calls: None,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens: 0,
total_tokens, total_tokens,
cache_read_tokens: 0, cache_read_tokens: 0,
cache_write_tokens: 0, cache_write_tokens: 0,
@@ -275,6 +276,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None, tool_calls: None,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens: 0,
total_tokens, total_tokens,
cache_read_tokens: 0, cache_read_tokens: 0,
cache_write_tokens: 0, cache_write_tokens: 0,

View File

@@ -312,6 +312,14 @@ async fn chat_completions(
}, },
finish_reason: chunk.finish_reason, finish_reason: chunk.finish_reason,
}], }],
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
prompt_tokens: u.prompt_tokens,
completion_tokens: u.completion_tokens,
total_tokens: u.total_tokens,
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
}),
}; };
// Use axum's Event directly, wrap in Ok // Use axum's Event directly, wrap in Ok
@@ -383,6 +391,7 @@ async fn chat_completions(
model: response.model.clone(), model: response.model.clone(),
prompt_tokens: response.prompt_tokens, prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens, completion_tokens: response.completion_tokens,
reasoning_tokens: response.reasoning_tokens,
total_tokens: response.total_tokens, total_tokens: response.total_tokens,
cache_read_tokens: response.cache_read_tokens, cache_read_tokens: response.cache_read_tokens,
cache_write_tokens: response.cache_write_tokens, cache_write_tokens: response.cache_write_tokens,
@@ -423,6 +432,7 @@ async fn chat_completions(
prompt_tokens: response.prompt_tokens, prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens, completion_tokens: response.completion_tokens,
total_tokens: response.total_tokens, total_tokens: response.total_tokens,
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None }, cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None }, cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
}), }),
@@ -452,6 +462,7 @@ async fn chat_completions(
model: model.clone(), model: model.clone(),
prompt_tokens: 0, prompt_tokens: 0,
completion_tokens: 0, completion_tokens: 0,
reasoning_tokens: 0,
total_tokens: 0, total_tokens: 0,
cache_read_tokens: 0, cache_read_tokens: 0,
cache_write_tokens: 0, cache_write_tokens: 0,

View File

@@ -96,11 +96,12 @@ where
// Spawn a background task to log the completion // Spawn a background task to log the completion
tokio::spawn(async move { tokio::spawn(async move {
// Use real usage from the provider when available, otherwise fall back to estimates // Use real usage from the provider when available, otherwise fall back to estimates
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) = let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
if let Some(usage) = &real_usage { if let Some(usage) = &real_usage {
( (
usage.prompt_tokens, usage.prompt_tokens,
usage.completion_tokens, usage.completion_tokens,
usage.reasoning_tokens,
usage.total_tokens, usage.total_tokens,
usage.cache_read_tokens, usage.cache_read_tokens,
usage.cache_write_tokens, usage.cache_write_tokens,
@@ -109,6 +110,7 @@ where
( (
estimated_prompt_tokens, estimated_prompt_tokens,
estimated_completion, estimated_completion,
estimated_reasoning_tokens,
estimated_prompt_tokens + estimated_completion, estimated_prompt_tokens + estimated_completion,
0u32, 0u32,
0u32, 0u32,
@@ -163,6 +165,7 @@ where
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens, cache_write_tokens,

View File

@@ -38,6 +38,24 @@ class LogsPage {
const statusClass = log.status === 'success' ? 'success' : 'danger'; const statusClass = log.status === 'success' ? 'success' : 'danger';
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss'); const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
let tokenDetails = `${log.tokens} total tokens`;
if (log.status === 'success') {
const parts = [];
parts.push(`${log.prompt_tokens} in`);
let completionStr = `${log.completion_tokens} out`;
if (log.reasoning_tokens > 0) {
completionStr += ` (${log.reasoning_tokens} reasoning)`;
}
parts.push(completionStr);
if (log.cache_read_tokens > 0) {
parts.push(`${log.cache_read_tokens} cache-hit`);
}
tokenDetails = parts.join(', ');
}
return ` return `
<tr class="log-row"> <tr class="log-row">
<td class="whitespace-nowrap">${timestamp}</td> <td class="whitespace-nowrap">${timestamp}</td>
@@ -55,7 +73,7 @@ class LogsPage {
<td> <td>
<div class="log-message-container"> <div class="log-message-container">
<code class="log-model">${log.model}</code> <code class="log-model">${log.model}</code>
<span class="log-tokens">${log.tokens} tokens</span> <span class="log-tokens" title="${log.tokens} total tokens">${tokenDetails}</span>
<span class="log-duration">${log.duration}ms</span> <span class="log-duration">${log.duration}ms</span>
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''} ${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
</div> </div>