diff --git a/src/dashboard/system.rs b/src/dashboard/system.rs index 7a1be5bf..2cfdc503 100644 --- a/src/dashboard/system.rs +++ b/src/dashboard/system.rs @@ -255,7 +255,10 @@ pub(super) async fn handle_system_logs( model, prompt_tokens, completion_tokens, + reasoning_tokens, total_tokens, + cache_read_tokens, + cache_write_tokens, cost, status, error_message, @@ -279,6 +282,11 @@ pub(super) async fn handle_system_logs( "client_id": row.get::("client_id"), "provider": row.get::("provider"), "model": row.get::("model"), + "prompt_tokens": row.get::("prompt_tokens"), + "completion_tokens": row.get::("completion_tokens"), + "reasoning_tokens": row.get::("reasoning_tokens"), + "cache_read_tokens": row.get::("cache_read_tokens"), + "cache_write_tokens": row.get::("cache_write_tokens"), "tokens": row.get::("total_tokens"), "cost": row.get::("cost"), "status": row.get::("status"), diff --git a/src/database/mod.rs b/src/database/mod.rs index 8848e8ea..374599df 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> { model TEXT, prompt_tokens INTEGER, completion_tokens INTEGER, + reasoning_tokens INTEGER DEFAULT 0, total_tokens INTEGER, cost REAL, has_images BOOLEAN DEFAULT FALSE, @@ -172,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> { let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0") .execute(pool) .await; + let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0") + .execute(pool) + .await; // Add billing_mode column if it doesn't exist (migration for existing DBs) let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT") diff --git a/src/logging/mod.rs b/src/logging/mod.rs index bf1c7359..ab5df368 100644 --- a/src/logging/mod.rs +++ b/src/logging/mod.rs @@ -15,6 +15,7 @@ pub struct RequestLog { pub model: String, pub prompt_tokens: u32, pub completion_tokens: u32, + pub reasoning_tokens: u32, pub total_tokens: u32, pub cache_read_tokens: u32, pub cache_write_tokens: u32, @@ -77,8 +78,8 @@ impl RequestLogger { sqlx::query( r#" INSERT INTO llm_requests - (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) "#, ) .bind(log.timestamp) @@ -87,6 +88,7 @@ impl RequestLogger { .bind(&log.model) .bind(log.prompt_tokens as i64) .bind(log.completion_tokens as i64) + .bind(log.reasoning_tokens as i64) .bind(log.total_tokens as i64) .bind(log.cache_read_tokens as i64) .bind(log.cache_write_tokens as i64) diff --git a/src/models/mod.rs b/src/models/mod.rs index 0a3f8671..5b44bf7a 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -165,6 +165,8 @@ pub struct Usage { pub completion_tokens: u32, pub total_tokens: u32, #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning_tokens: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub cache_read_tokens: Option, #[serde(skip_serializing_if = "Option::is_none")] pub cache_write_tokens: Option, @@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse { pub created: u64, pub model: String, pub choices: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub usage: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/src/providers/gemini.rs b/src/providers/gemini.rs index fc84861a..cc2ef10b 100644 --- a/src/providers/gemini.rs +++ b/src/providers/gemini.rs @@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider { let reasoning_content = candidate .and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone())); + let reasoning_tokens = reasoning_content.as_ref() + .map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model)) + .unwrap_or(0); + // Extract function calls → OpenAI tool_calls let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts)); @@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider { tool_calls, prompt_tokens, completion_tokens, + reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens: 0, // Gemini doesn't report cache writes separately @@ -902,6 +907,7 @@ impl super::Provider for GeminiProvider { super::StreamUsage { prompt_tokens: u.prompt_token_count, completion_tokens: u.candidates_token_count, + reasoning_tokens: 0, total_tokens: u.total_token_count, cache_read_tokens: u.cached_content_token_count, cache_write_tokens: 0, diff --git a/src/providers/helpers.rs b/src/providers/helpers.rs index a49261d9..a845d065 100644 --- a/src/providers/helpers.rs +++ b/src/providers/helpers.rs @@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result Result>, pub prompt_tokens: u32, pub completion_tokens: u32, + pub reasoning_tokens: u32, pub total_tokens: u32, pub cache_read_tokens: u32, pub cache_write_tokens: u32, @@ -86,6 +87,7 @@ pub struct ProviderResponse { pub struct StreamUsage { pub prompt_tokens: u32, pub completion_tokens: u32, + pub reasoning_tokens: u32, pub total_tokens: u32, pub cache_read_tokens: u32, pub cache_write_tokens: u32, diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 7fbbcb89..04f07e08 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -177,6 +177,7 @@ impl super::Provider for OpenAIProvider { tool_calls: None, prompt_tokens, completion_tokens, + reasoning_tokens: 0, total_tokens, cache_read_tokens: 0, cache_write_tokens: 0, @@ -275,6 +276,7 @@ impl super::Provider for OpenAIProvider { tool_calls: None, prompt_tokens, completion_tokens, + reasoning_tokens: 0, total_tokens, cache_read_tokens: 0, cache_write_tokens: 0, diff --git a/src/server/mod.rs b/src/server/mod.rs index 9fe39b34..11a692b8 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -312,6 +312,14 @@ async fn chat_completions( }, finish_reason: chunk.finish_reason, }], + usage: chunk.usage.as_ref().map(|u| crate::models::Usage { + prompt_tokens: u.prompt_tokens, + completion_tokens: u.completion_tokens, + total_tokens: u.total_tokens, + reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None }, + cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None }, + cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None }, + }), }; // Use axum's Event directly, wrap in Ok @@ -383,6 +391,7 @@ async fn chat_completions( model: response.model.clone(), prompt_tokens: response.prompt_tokens, completion_tokens: response.completion_tokens, + reasoning_tokens: response.reasoning_tokens, total_tokens: response.total_tokens, cache_read_tokens: response.cache_read_tokens, cache_write_tokens: response.cache_write_tokens, @@ -423,6 +432,7 @@ async fn chat_completions( prompt_tokens: response.prompt_tokens, completion_tokens: response.completion_tokens, total_tokens: response.total_tokens, + reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None }, cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None }, cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None }, }), @@ -452,6 +462,7 @@ async fn chat_completions( model: model.clone(), prompt_tokens: 0, completion_tokens: 0, + reasoning_tokens: 0, total_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0, diff --git a/src/utils/streaming.rs b/src/utils/streaming.rs index 50a8075b..2885c9ef 100644 --- a/src/utils/streaming.rs +++ b/src/utils/streaming.rs @@ -96,11 +96,12 @@ where // Spawn a background task to log the completion tokio::spawn(async move { // Use real usage from the provider when available, otherwise fall back to estimates - let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) = + let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) = if let Some(usage) = &real_usage { ( usage.prompt_tokens, usage.completion_tokens, + usage.reasoning_tokens, usage.total_tokens, usage.cache_read_tokens, usage.cache_write_tokens, @@ -109,6 +110,7 @@ where ( estimated_prompt_tokens, estimated_completion, + estimated_reasoning_tokens, estimated_prompt_tokens + estimated_completion, 0u32, 0u32, @@ -163,6 +165,7 @@ where model, prompt_tokens, completion_tokens, + reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, diff --git a/static/js/pages/logs.js b/static/js/pages/logs.js index ebc5c92e..1672e201 100644 --- a/static/js/pages/logs.js +++ b/static/js/pages/logs.js @@ -38,6 +38,24 @@ class LogsPage { const statusClass = log.status === 'success' ? 'success' : 'danger'; const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss'); + let tokenDetails = `${log.tokens} total tokens`; + if (log.status === 'success') { + const parts = []; + parts.push(`${log.prompt_tokens} in`); + + let completionStr = `${log.completion_tokens} out`; + if (log.reasoning_tokens > 0) { + completionStr += ` (${log.reasoning_tokens} reasoning)`; + } + parts.push(completionStr); + + if (log.cache_read_tokens > 0) { + parts.push(`${log.cache_read_tokens} cache-hit`); + } + + tokenDetails = parts.join(', '); + } + return ` ${timestamp} @@ -55,7 +73,7 @@ class LogsPage {
${log.model} - ${log.tokens} tokens + ${tokenDetails} ${log.duration}ms ${log.error ? `
${log.error}
` : ''}