diff --git a/src/dashboard/models.rs b/src/dashboard/models.rs index b75c3944..eb870e91 100644 --- a/src/dashboard/models.rs +++ b/src/dashboard/models.rs @@ -84,6 +84,8 @@ pub(super) async fn handle_get_models( let mut enabled = true; let mut prompt_cost = m_meta.cost.as_ref().map(|c| c.input).unwrap_or(0.0); let mut completion_cost = m_meta.cost.as_ref().map(|c| c.output).unwrap_or(0.0); + let cache_read_cost = m_meta.cost.as_ref().and_then(|c| c.cache_read); + let cache_write_cost = m_meta.cost.as_ref().and_then(|c| c.cache_write); let mut mapping = None::; if let Some(row) = db_models.get(m_key) { @@ -105,6 +107,8 @@ pub(super) async fn handle_get_models( "enabled": enabled, "prompt_cost": prompt_cost, "completion_cost": completion_cost, + "cache_read_cost": cache_read_cost, + "cache_write_cost": cache_write_cost, "mapping": mapping, "context_limit": m_meta.limit.as_ref().map(|l| l.context).unwrap_or(0), "output_limit": m_meta.limit.as_ref().map(|l| l.output).unwrap_or(0), diff --git a/src/dashboard/usage.rs b/src/dashboard/usage.rs index 2b522b18..5a0c7133 100644 --- a/src/dashboard/usage.rs +++ b/src/dashboard/usage.rs @@ -16,7 +16,9 @@ pub(super) async fn handle_usage_summary(State(state): State) -> COUNT(*) as total_requests, COALESCE(SUM(total_tokens), 0) as total_tokens, COALESCE(SUM(cost), 0.0) as total_cost, - COUNT(DISTINCT client_id) as active_clients + COUNT(DISTINCT client_id) as active_clients, + COALESCE(SUM(cache_read_tokens), 0) as total_cache_read, + COALESCE(SUM(cache_write_tokens), 0) as total_cache_write FROM llm_requests "#, ) @@ -64,6 +66,8 @@ pub(super) async fn handle_usage_summary(State(state): State) -> let total_tokens: i64 = t.get("total_tokens"); let total_cost: f64 = t.get("total_cost"); let active_clients: i64 = t.get("active_clients"); + let total_cache_read: i64 = t.get("total_cache_read"); + let total_cache_write: i64 = t.get("total_cache_write"); let today_requests: i64 = d.get("today_requests"); let today_cost: f64 = d.get("today_cost"); @@ -87,6 +91,8 @@ pub(super) async fn handle_usage_summary(State(state): State) -> "today_cost": today_cost, "error_rate": error_rate, "avg_response_time": avg_response_time, + "total_cache_read_tokens": total_cache_read, + "total_cache_write_tokens": total_cache_write, }))) } _ => Json(ApiResponse::error("Failed to fetch usage statistics".to_string())), @@ -208,7 +214,9 @@ pub(super) async fn handle_providers_usage( provider, COUNT(*) as requests, COALESCE(SUM(total_tokens), 0) as tokens, - COALESCE(SUM(cost), 0.0) as cost + COALESCE(SUM(cost), 0.0) as cost, + COALESCE(SUM(cache_read_tokens), 0) as cache_read, + COALESCE(SUM(cache_write_tokens), 0) as cache_write FROM llm_requests GROUP BY provider ORDER BY requests DESC @@ -226,12 +234,16 @@ pub(super) async fn handle_providers_usage( let requests: i64 = row.get("requests"); let tokens: i64 = row.get("tokens"); let cost: f64 = row.get("cost"); + let cache_read: i64 = row.get("cache_read"); + let cache_write: i64 = row.get("cache_write"); provider_usage.push(serde_json::json!({ "provider": provider, "requests": requests, "tokens": tokens, "cost": cost, + "cache_read_tokens": cache_read, + "cache_write_tokens": cache_write, })); } @@ -256,7 +268,9 @@ pub(super) async fn handle_detailed_usage(State(state): State) - model, COUNT(*) as requests, COALESCE(SUM(total_tokens), 0) as tokens, - COALESCE(SUM(cost), 0.0) as cost + COALESCE(SUM(cost), 0.0) as cost, + COALESCE(SUM(cache_read_tokens), 0) as cache_read, + COALESCE(SUM(cache_write_tokens), 0) as cache_write FROM llm_requests GROUP BY date, client_id, provider, model ORDER BY date DESC @@ -279,6 +293,8 @@ pub(super) async fn handle_detailed_usage(State(state): State) - "requests": row.get::("requests"), "tokens": row.get::("tokens"), "cost": row.get::("cost"), + "cache_read_tokens": row.get::("cache_read"), + "cache_write_tokens": row.get::("cache_write"), }) }) .collect(); diff --git a/src/database/mod.rs b/src/database/mod.rs index c352abfb..1c796a40 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -135,6 +135,14 @@ async fn run_migrations(pool: &DbPool) -> Result<()> { .execute(pool) .await; + // Add cache token columns if they don't exist (migration for existing DBs) + let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_read_tokens INTEGER DEFAULT 0") + .execute(pool) + .await; + let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0") + .execute(pool) + .await; + // Insert default admin user if none exists (default password: admin) let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?; diff --git a/src/logging/mod.rs b/src/logging/mod.rs index d80fdd10..b1bd1c42 100644 --- a/src/logging/mod.rs +++ b/src/logging/mod.rs @@ -16,6 +16,8 @@ pub struct RequestLog { pub prompt_tokens: u32, pub completion_tokens: u32, pub total_tokens: u32, + pub cache_read_tokens: u32, + pub cache_write_tokens: u32, pub cost: f64, pub has_images: bool, pub status: String, // "success", "error" @@ -75,8 +77,8 @@ impl RequestLogger { sqlx::query( r#" INSERT INTO llm_requests - (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) "#, ) .bind(log.timestamp) @@ -86,6 +88,8 @@ impl RequestLogger { .bind(log.prompt_tokens as i64) .bind(log.completion_tokens as i64) .bind(log.total_tokens as i64) + .bind(log.cache_read_tokens as i64) + .bind(log.cache_write_tokens as i64) .bind(log.cost) .bind(log.has_images) .bind(log.status) diff --git a/src/models/mod.rs b/src/models/mod.rs index d632d9d7..2eea0b8f 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -152,6 +152,10 @@ pub struct Usage { pub prompt_tokens: u32, pub completion_tokens: u32, pub total_tokens: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub cache_read_tokens: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub cache_write_tokens: Option, } // ========== Streaming Response Structs ========== diff --git a/src/providers/deepseek.rs b/src/providers/deepseek.rs index 905452c7..611294cd 100644 --- a/src/providers/deepseek.rs +++ b/src/providers/deepseek.rs @@ -90,12 +90,16 @@ impl super::Provider for DeepSeekProvider { model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, registry: &crate::models::registry::ModelRegistry, ) -> f64 { helpers::calculate_cost_with_registry( model, prompt_tokens, completion_tokens, + cache_read_tokens, + cache_write_tokens, registry, &self.pricing, 0.14, diff --git a/src/providers/gemini.rs b/src/providers/gemini.rs index 53ad336a..357a7866 100644 --- a/src/providers/gemini.rs +++ b/src/providers/gemini.rs @@ -119,6 +119,8 @@ struct GeminiUsageMetadata { candidates_token_count: u32, #[serde(default)] total_token_count: u32, + #[serde(default)] + cached_content_token_count: u32, } #[derive(Debug, Deserialize)] @@ -454,6 +456,11 @@ impl super::Provider for GeminiProvider { .as_ref() .map(|u| u.total_token_count) .unwrap_or(0); + let cache_read_tokens = gemini_response + .usage_metadata + .as_ref() + .map(|u| u.cached_content_token_count) + .unwrap_or(0); Ok(ProviderResponse { content, @@ -462,6 +469,8 @@ impl super::Provider for GeminiProvider { prompt_tokens, completion_tokens, total_tokens, + cache_read_tokens, + cache_write_tokens: 0, // Gemini doesn't report cache writes separately model, }) } @@ -475,12 +484,16 @@ impl super::Provider for GeminiProvider { model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, registry: &crate::models::registry::ModelRegistry, ) -> f64 { super::helpers::calculate_cost_with_registry( model, prompt_tokens, completion_tokens, + cache_read_tokens, + cache_write_tokens, registry, &self.pricing, 0.075, @@ -537,6 +550,17 @@ impl super::Provider for GeminiProvider { let gemini_response: GeminiResponse = serde_json::from_str(&msg.data) .map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?; + // Extract usage from usageMetadata if present (reported on every/last chunk) + let stream_usage = gemini_response.usage_metadata.as_ref().map(|u| { + super::StreamUsage { + prompt_tokens: u.prompt_token_count, + completion_tokens: u.candidates_token_count, + total_tokens: u.total_token_count, + cache_read_tokens: u.cached_content_token_count, + cache_write_tokens: 0, + } + }); + if let Some(candidate) = gemini_response.candidates.first() { let content = candidate .content @@ -561,6 +585,7 @@ impl super::Provider for GeminiProvider { finish_reason, tool_calls, model: model.clone(), + usage: stream_usage, }; } } diff --git a/src/providers/grok.rs b/src/providers/grok.rs index 858c83b9..3da7fa89 100644 --- a/src/providers/grok.rs +++ b/src/providers/grok.rs @@ -86,12 +86,16 @@ impl super::Provider for GrokProvider { model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, registry: &crate::models::registry::ModelRegistry, ) -> f64 { helpers::calculate_cost_with_registry( model, prompt_tokens, completion_tokens, + cache_read_tokens, + cache_write_tokens, registry, &self.pricing, 5.0, diff --git a/src/providers/helpers.rs b/src/providers/helpers.rs index 81bc214d..4a048323 100644 --- a/src/providers/helpers.rs +++ b/src/providers/helpers.rs @@ -1,4 +1,4 @@ -use super::{ProviderResponse, ProviderStreamChunk}; +use super::{ProviderResponse, ProviderStreamChunk, StreamUsage}; use crate::errors::AppError; use crate::models::{ContentPart, ToolCall, ToolCallDelta, UnifiedMessage, UnifiedRequest}; use futures::stream::{BoxStream, StreamExt}; @@ -156,6 +156,8 @@ pub async fn messages_to_openai_json_text_only( /// Build an OpenAI-compatible request body from a UnifiedRequest and pre-converted messages. /// Includes tools and tool_choice when present. +/// When streaming, adds `stream_options.include_usage: true` so providers report +/// token counts in the final SSE chunk. pub fn build_openai_body( request: &UnifiedRequest, messages_json: Vec, @@ -167,6 +169,10 @@ pub fn build_openai_body( "stream": stream, }); + if stream { + body["stream_options"] = serde_json::json!({ "include_usage": true }); + } + if let Some(temp) = request.temperature { body["temperature"] = serde_json::json!(temp); } @@ -185,6 +191,9 @@ pub fn build_openai_body( /// Parse an OpenAI-compatible chat completion response JSON into a ProviderResponse. /// Extracts tool_calls from the message when present. +/// Extracts cache token counts from: +/// - OpenAI/Grok: `usage.prompt_tokens_details.cached_tokens` +/// - DeepSeek: `usage.prompt_cache_hit_tokens` / `usage.prompt_cache_miss_tokens` pub fn parse_openai_response(resp_json: &Value, model: String) -> Result { let choice = resp_json["choices"] .get(0) @@ -204,6 +213,17 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result Result Result Result; - /// Calculate cost based on token usage and model using the registry + /// Calculate cost based on token usage and model using the registry. + /// `cache_read_tokens` / `cache_write_tokens` allow cache-aware pricing + /// when the registry provides `cache_read` / `cache_write` rates. fn calculate_cost( &self, model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, registry: &crate::models::registry::ModelRegistry, ) -> f64; } @@ -54,9 +58,21 @@ pub struct ProviderResponse { pub prompt_tokens: u32, pub completion_tokens: u32, pub total_tokens: u32, + pub cache_read_tokens: u32, + pub cache_write_tokens: u32, pub model: String, } +/// Usage data from the final streaming chunk (when providers report real token counts). +#[derive(Debug, Clone, Default)] +pub struct StreamUsage { + pub prompt_tokens: u32, + pub completion_tokens: u32, + pub total_tokens: u32, + pub cache_read_tokens: u32, + pub cache_write_tokens: u32, +} + #[derive(Debug, Clone)] pub struct ProviderStreamChunk { pub content: String, @@ -64,6 +80,8 @@ pub struct ProviderStreamChunk { pub finish_reason: Option, pub tool_calls: Option>, pub model: String, + /// Populated only on the final chunk when providers report usage (e.g. stream_options.include_usage). + pub usage: Option, } use tokio::sync::RwLock; @@ -299,6 +317,8 @@ pub mod placeholder { _model: &str, _prompt_tokens: u32, _completion_tokens: u32, + _cache_read_tokens: u32, + _cache_write_tokens: u32, _registry: &crate::models::registry::ModelRegistry, ) -> f64 { 0.0 diff --git a/src/providers/ollama.rs b/src/providers/ollama.rs index 8f5fed90..90b63d10 100644 --- a/src/providers/ollama.rs +++ b/src/providers/ollama.rs @@ -95,12 +95,16 @@ impl super::Provider for OllamaProvider { model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, registry: &crate::models::registry::ModelRegistry, ) -> f64 { helpers::calculate_cost_with_registry( model, prompt_tokens, completion_tokens, + cache_read_tokens, + cache_write_tokens, registry, &self.pricing, 0.0, diff --git a/src/providers/openai.rs b/src/providers/openai.rs index b424065d..15001f0a 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -86,12 +86,16 @@ impl super::Provider for OpenAIProvider { model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, registry: &crate::models::registry::ModelRegistry, ) -> f64 { helpers::calculate_cost_with_registry( model, prompt_tokens, completion_tokens, + cache_read_tokens, + cache_write_tokens, registry, &self.pricing, 0.15, diff --git a/src/server/mod.rs b/src/server/mod.rs index 69666bf1..962131d0 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -97,18 +97,21 @@ async fn get_model_cost( model: &str, prompt_tokens: u32, completion_tokens: u32, + cache_read_tokens: u32, + cache_write_tokens: u32, provider: &Arc, state: &AppState, ) -> f64 { // Check in-memory cache for cost overrides (no SQLite hit) if let Some(cached) = state.model_config_cache.get(model).await { if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) { + // Manual overrides don't have cache-specific rates, so use simple formula return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0); } } - // Fallback to provider's registry-based calculation - provider.calculate_cost(model, prompt_tokens, completion_tokens, &state.model_registry) + // Fallback to provider's registry-based calculation (cache-aware) + provider.calculate_cost(model, prompt_tokens, completion_tokens, cache_read_tokens, cache_write_tokens, &state.model_registry) } async fn chat_completions( @@ -281,6 +284,8 @@ async fn chat_completions( &response.model, response.prompt_tokens, response.completion_tokens, + response.cache_read_tokens, + response.cache_write_tokens, &provider, &state, ) @@ -294,6 +299,8 @@ async fn chat_completions( prompt_tokens: response.prompt_tokens, completion_tokens: response.completion_tokens, total_tokens: response.total_tokens, + cache_read_tokens: response.cache_read_tokens, + cache_write_tokens: response.cache_write_tokens, cost, has_images, status: "success".to_string(), @@ -340,6 +347,8 @@ async fn chat_completions( prompt_tokens: response.prompt_tokens, completion_tokens: response.completion_tokens, total_tokens: response.total_tokens, + cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None }, + cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None }, }), }; @@ -368,6 +377,8 @@ async fn chat_completions( prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, cost: 0.0, has_images: false, status: "error".to_string(), diff --git a/src/utils/streaming.rs b/src/utils/streaming.rs index db1cb893..123eba2c 100644 --- a/src/utils/streaming.rs +++ b/src/utils/streaming.rs @@ -2,7 +2,7 @@ use crate::client::ClientManager; use crate::errors::AppError; use crate::logging::{RequestLog, RequestLogger}; use crate::models::ToolCall; -use crate::providers::{Provider, ProviderStreamChunk}; +use crate::providers::{Provider, ProviderStreamChunk, StreamUsage}; use crate::state::ModelConfigCache; use crate::utils::tokens::estimate_completion_tokens; use futures::stream::Stream; @@ -33,6 +33,8 @@ pub struct AggregatingStream { accumulated_content: String, accumulated_reasoning: String, accumulated_tool_calls: Vec, + /// Real usage data from the provider's final stream chunk (when available). + real_usage: Option, logger: Arc, client_manager: Arc, model_registry: Arc, @@ -56,6 +58,7 @@ where accumulated_content: String::new(), accumulated_reasoning: String::new(), accumulated_tool_calls: Vec::new(), + real_usage: None, logger: config.logger, client_manager: config.client_manager, model_registry: config.model_registry, @@ -78,33 +81,68 @@ where let logger = self.logger.clone(); let client_manager = self.client_manager.clone(); let provider = self.provider.clone(); - let prompt_tokens = self.prompt_tokens; + let estimated_prompt_tokens = self.prompt_tokens; let has_images = self.has_images; let registry = self.model_registry.clone(); let config_cache = self.model_config_cache.clone(); + let real_usage = self.real_usage.take(); // Estimate completion tokens (including reasoning if present) - let content_tokens = estimate_completion_tokens(&self.accumulated_content, &model); - let reasoning_tokens = if !self.accumulated_reasoning.is_empty() { + let estimated_content_tokens = estimate_completion_tokens(&self.accumulated_content, &model); + let estimated_reasoning_tokens = if !self.accumulated_reasoning.is_empty() { estimate_completion_tokens(&self.accumulated_reasoning, &model) } else { 0 }; - let completion_tokens = content_tokens + reasoning_tokens; - let total_tokens = prompt_tokens + completion_tokens; + let estimated_completion = estimated_content_tokens + estimated_reasoning_tokens; // Spawn a background task to log the completion tokio::spawn(async move { + // Use real usage from the provider when available, otherwise fall back to estimates + let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) = + if let Some(usage) = &real_usage { + ( + usage.prompt_tokens, + usage.completion_tokens, + usage.total_tokens, + usage.cache_read_tokens, + usage.cache_write_tokens, + ) + } else { + ( + estimated_prompt_tokens, + estimated_completion, + estimated_prompt_tokens + estimated_completion, + 0u32, + 0u32, + ) + }; + // Check in-memory cache for cost overrides (no SQLite hit) let cost = if let Some(cached) = config_cache.get(&model).await { if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) { + // Cost override doesn't have cache-aware pricing, use simple formula (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0) } else { - provider.calculate_cost(&model, prompt_tokens, completion_tokens, ®istry) + provider.calculate_cost( + &model, + prompt_tokens, + completion_tokens, + cache_read_tokens, + cache_write_tokens, + ®istry, + ) } } else { - provider.calculate_cost(&model, prompt_tokens, completion_tokens, ®istry) + provider.calculate_cost( + &model, + prompt_tokens, + completion_tokens, + cache_read_tokens, + cache_write_tokens, + ®istry, + ) }; // Log to database @@ -116,6 +154,8 @@ where prompt_tokens, completion_tokens, total_tokens, + cache_read_tokens, + cache_write_tokens, cost, has_images, status: "success".to_string(), @@ -146,6 +186,10 @@ where if let Some(reasoning) = &chunk.reasoning_content { self.accumulated_reasoning.push_str(reasoning); } + // Capture real usage from the provider when present (typically on the final chunk) + if let Some(usage) = &chunk.usage { + self.real_usage = Some(usage.clone()); + } // Accumulate tool call deltas into complete tool calls if let Some(deltas) = &chunk.tool_calls { for delta in deltas { @@ -230,7 +274,7 @@ mod tests { fn estimate_tokens(&self, _req: &crate::models::UnifiedRequest) -> Result { Ok(10) } - fn calculate_cost(&self, _model: &str, _p: u32, _c: u32, _r: &crate::models::registry::ModelRegistry) -> f64 { + fn calculate_cost(&self, _model: &str, _p: u32, _c: u32, _cr: u32, _cw: u32, _r: &crate::models::registry::ModelRegistry) -> f64 { 0.05 } } @@ -244,6 +288,7 @@ mod tests { finish_reason: None, tool_calls: None, model: "test".to_string(), + usage: None, }), Ok(ProviderStreamChunk { content: " World".to_string(), @@ -251,6 +296,7 @@ mod tests { finish_reason: Some("stop".to_string()), tool_calls: None, model: "test".to_string(), + usage: None, }), ]; let inner_stream = stream::iter(chunks); diff --git a/static/index.html b/static/index.html index f73cb3e4..913d6b64 100644 --- a/static/index.html +++ b/static/index.html @@ -4,7 +4,7 @@ LLM Proxy Gateway - Admin Dashboard - + @@ -166,19 +166,19 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + + \ No newline at end of file diff --git a/static/js/api.js b/static/js/api.js index 1ef094a2..140e9511 100644 --- a/static/js/api.js +++ b/static/js/api.js @@ -17,61 +17,25 @@ class ApiClient { headers['Authorization'] = `Bearer ${window.authManager.token}`; } + const response = await fetch(url, { + ...options, + headers + }); + + const text = await response.text(); + + let result; try { - console.log(`[API] Fetching ${url}...`); - const response = await fetch(url, { - ...options, - headers - }); - - console.log(`[API] ${url} → status=${response.status} ok=${response.ok} type=${response.headers.get('content-type')}`); - - const text = await response.text(); - console.log(`[API] ${url} → body length=${text.length}, first 200 chars:`, text.substring(0, 200)); - - let result; - try { - result = JSON.parse(text); - } catch (parseErr) { - const msg = `JSON parse failed for ${url}: ${parseErr.message}. Body: ${text.substring(0, 300)}`; - console.error(`[API] ${msg}`); - this._addDebugEntry(url, 'JSON_PARSE_ERROR', msg); - throw new Error(msg); - } - - if (!response.ok || !result.success) { - const msg = `API error for ${url}: ok=${response.ok} success=${result.success} error=${result.error} status=${response.status}`; - console.error(`[API] ${msg}`); - this._addDebugEntry(url, 'API_ERROR', msg); - throw new Error(result.error || `HTTP error! status: ${response.status}`); - } - - console.log(`[API] ${url} → SUCCESS, data keys:`, result.data ? Object.keys(result.data) : 'null'); - return result.data; - } catch (error) { - console.error(`[API] Request failed (${path}):`, error); - this._addDebugEntry(url, 'EXCEPTION', error.message); - throw error; + result = JSON.parse(text); + } catch (parseErr) { + throw new Error(`JSON parse failed for ${url}: ${parseErr.message}`); } - } - // Visible on-page debug panel for diagnosing fetch failures - _addDebugEntry(url, status, detail) { - let panel = document.getElementById('api-debug-panel'); - if (!panel) { - panel = document.createElement('div'); - panel.id = 'api-debug-panel'; - panel.style.cssText = 'position:fixed;bottom:0;left:0;right:0;max-height:200px;overflow-y:auto;background:#1d2021;color:#fbf1c7;font-family:monospace;font-size:11px;padding:8px;z-index:99999;border-top:2px solid #cc241d;'; - const title = document.createElement('div'); - title.style.cssText = 'font-weight:bold;margin-bottom:4px;color:#fb4934;'; - title.textContent = 'API Debug Panel (remove after fixing)'; - panel.appendChild(title); - document.body.appendChild(panel); + if (!response.ok || !result.success) { + throw new Error(result.error || `HTTP error! status: ${response.status}`); } - const entry = document.createElement('div'); - entry.style.cssText = 'margin:2px 0;padding:2px 4px;background:#282828;border-left:3px solid ' + (status === 'EXCEPTION' ? '#fb4934' : '#fabd2f') + ';'; - entry.textContent = `[${status}] ${url}: ${detail}`; - panel.appendChild(entry); + + return result.data; } async get(path) { diff --git a/static/js/dashboard.js b/static/js/dashboard.js index fd88935c..41d19636 100644 --- a/static/js/dashboard.js +++ b/static/js/dashboard.js @@ -348,7 +348,7 @@ class Dashboard {
- +
DateClientProviderModelRequestsTokensCost
DateClientProviderModelRequestsTokensCache ReadCache WriteCost
@@ -383,7 +383,7 @@ class Dashboard {
- +
ProviderModelInput CostOutput CostLast Updated
ProviderModelInput CostOutput CostCache ReadCache Write
diff --git a/static/js/pages/analytics.js b/static/js/pages/analytics.js index a6a6c6de..6f926462 100644 --- a/static/js/pages/analytics.js +++ b/static/js/pages/analytics.js @@ -184,21 +184,27 @@ class AnalyticsPage { if (!tableBody) return; if (data.length === 0) { - tableBody.innerHTML = 'No historical data found'; + tableBody.innerHTML = 'No historical data found'; return; } - tableBody.innerHTML = data.map(row => ` - - ${row.date} - ${row.client} - ${row.provider} - ${row.model} - ${row.requests.toLocaleString()} - ${window.api.formatNumber(row.tokens)} - ${window.api.formatCurrency(row.cost)} - - `).join(''); + tableBody.innerHTML = data.map(row => { + const cacheRead = row.cache_read_tokens || 0; + const cacheWrite = row.cache_write_tokens || 0; + return ` + + ${row.date} + ${row.client} + ${row.provider} + ${row.model} + ${row.requests.toLocaleString()} + ${window.api.formatNumber(row.tokens)} + ${window.api.formatNumber(cacheRead)} + ${window.api.formatNumber(cacheWrite)} + ${window.api.formatCurrency(row.cost)} + + `; + }).join(''); } setupEventListeners() { diff --git a/static/js/pages/costs.js b/static/js/pages/costs.js index d84a1257..034ee167 100644 --- a/static/js/pages/costs.js +++ b/static/js/pages/costs.js @@ -31,7 +31,10 @@ class CostsPage { avgDailyCost: data.total_cost / 30, // Simplified costTrend: 5.2, budgetUsed: Math.min(Math.round((data.total_cost / 100) * 100), 100), // Assuming $100 budget - projectedMonthEnd: data.today_cost * 30 + projectedMonthEnd: data.today_cost * 30, + cacheReadTokens: data.total_cache_read_tokens || 0, + cacheWriteTokens: data.total_cache_write_tokens || 0, + totalTokens: data.total_tokens || 0, }; this.renderCostStats(); @@ -44,6 +47,10 @@ class CostsPage { renderCostStats() { const container = document.getElementById('cost-stats'); if (!container) return; + + const cacheHitRate = this.costData.totalTokens > 0 + ? ((this.costData.cacheReadTokens / this.costData.totalTokens) * 100).toFixed(1) + : '0.0'; container.innerHTML = `
@@ -74,6 +81,19 @@ class CostsPage {
+
+
+ +
+
+
${cacheHitRate}%
+
Cache Hit Rate
+
+ ${window.api.formatNumber(this.costData.cacheReadTokens)} cached tokens +
+
+
+
@@ -181,15 +201,24 @@ class CostsPage { const tableBody = document.querySelector('#pricing-table tbody'); if (!tableBody) return; - tableBody.innerHTML = data.map(row => ` - - ${row.provider.toUpperCase()} - ${row.id} - ${window.api.formatCurrency(row.prompt_cost)} / 1M - ${window.api.formatCurrency(row.completion_cost)} / 1M - Now - - `).join(''); + tableBody.innerHTML = data.map(row => { + const cacheRead = row.cache_read_cost != null + ? `${window.api.formatCurrency(row.cache_read_cost)} / 1M` + : '--'; + const cacheWrite = row.cache_write_cost != null + ? `${window.api.formatCurrency(row.cache_write_cost)} / 1M` + : '--'; + return ` + + ${row.provider.toUpperCase()} + ${row.id} + ${window.api.formatCurrency(row.prompt_cost)} / 1M + ${window.api.formatCurrency(row.completion_cost)} / 1M + ${cacheRead} + ${cacheWrite} + + `; + }).join(''); } setupEventListeners() {