feat: add cache token tracking and cache-aware cost calculation
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

Track cache_read_tokens and cache_write_tokens end-to-end: parse from
provider responses (OpenAI, DeepSeek, Grok, Gemini), persist to SQLite,
apply cache-aware pricing from the model registry, and surface in API
responses and the dashboard.

- Add cache fields to ProviderResponse, StreamUsage, RequestLog structs
- Parse cached_tokens (OpenAI/Grok), prompt_cache_hit/miss (DeepSeek),
  cachedContentTokenCount (Gemini) from provider responses
- Send stream_options.include_usage for streaming; capture real usage
  from final SSE chunk in AggregatingStream
- ALTER TABLE migration for cache_read_tokens/cache_write_tokens columns
- Cache-aware cost formula using registry cache_read/cache_write rates
- Update Provider trait calculate_cost signature across all providers
- Add cache_read_tokens/cache_write_tokens to Usage API response
- Dashboard: cache hit rate card, cache columns in pricing and usage
  tables, cache token aggregation in SQL queries
- Remove API debug panel and verbose console logging from api.js
- Bump static asset cache-bust to v5
This commit is contained in:
2026-03-02 14:45:21 -05:00
parent 232f092f27
commit db5824f0fb
19 changed files with 352 additions and 109 deletions

View File

@@ -84,6 +84,8 @@ pub(super) async fn handle_get_models(
let mut enabled = true; let mut enabled = true;
let mut prompt_cost = m_meta.cost.as_ref().map(|c| c.input).unwrap_or(0.0); let mut prompt_cost = m_meta.cost.as_ref().map(|c| c.input).unwrap_or(0.0);
let mut completion_cost = m_meta.cost.as_ref().map(|c| c.output).unwrap_or(0.0); let mut completion_cost = m_meta.cost.as_ref().map(|c| c.output).unwrap_or(0.0);
let cache_read_cost = m_meta.cost.as_ref().and_then(|c| c.cache_read);
let cache_write_cost = m_meta.cost.as_ref().and_then(|c| c.cache_write);
let mut mapping = None::<String>; let mut mapping = None::<String>;
if let Some(row) = db_models.get(m_key) { if let Some(row) = db_models.get(m_key) {
@@ -105,6 +107,8 @@ pub(super) async fn handle_get_models(
"enabled": enabled, "enabled": enabled,
"prompt_cost": prompt_cost, "prompt_cost": prompt_cost,
"completion_cost": completion_cost, "completion_cost": completion_cost,
"cache_read_cost": cache_read_cost,
"cache_write_cost": cache_write_cost,
"mapping": mapping, "mapping": mapping,
"context_limit": m_meta.limit.as_ref().map(|l| l.context).unwrap_or(0), "context_limit": m_meta.limit.as_ref().map(|l| l.context).unwrap_or(0),
"output_limit": m_meta.limit.as_ref().map(|l| l.output).unwrap_or(0), "output_limit": m_meta.limit.as_ref().map(|l| l.output).unwrap_or(0),

View File

@@ -16,7 +16,9 @@ pub(super) async fn handle_usage_summary(State(state): State<DashboardState>) ->
COUNT(*) as total_requests, COUNT(*) as total_requests,
COALESCE(SUM(total_tokens), 0) as total_tokens, COALESCE(SUM(total_tokens), 0) as total_tokens,
COALESCE(SUM(cost), 0.0) as total_cost, COALESCE(SUM(cost), 0.0) as total_cost,
COUNT(DISTINCT client_id) as active_clients COUNT(DISTINCT client_id) as active_clients,
COALESCE(SUM(cache_read_tokens), 0) as total_cache_read,
COALESCE(SUM(cache_write_tokens), 0) as total_cache_write
FROM llm_requests FROM llm_requests
"#, "#,
) )
@@ -64,6 +66,8 @@ pub(super) async fn handle_usage_summary(State(state): State<DashboardState>) ->
let total_tokens: i64 = t.get("total_tokens"); let total_tokens: i64 = t.get("total_tokens");
let total_cost: f64 = t.get("total_cost"); let total_cost: f64 = t.get("total_cost");
let active_clients: i64 = t.get("active_clients"); let active_clients: i64 = t.get("active_clients");
let total_cache_read: i64 = t.get("total_cache_read");
let total_cache_write: i64 = t.get("total_cache_write");
let today_requests: i64 = d.get("today_requests"); let today_requests: i64 = d.get("today_requests");
let today_cost: f64 = d.get("today_cost"); let today_cost: f64 = d.get("today_cost");
@@ -87,6 +91,8 @@ pub(super) async fn handle_usage_summary(State(state): State<DashboardState>) ->
"today_cost": today_cost, "today_cost": today_cost,
"error_rate": error_rate, "error_rate": error_rate,
"avg_response_time": avg_response_time, "avg_response_time": avg_response_time,
"total_cache_read_tokens": total_cache_read,
"total_cache_write_tokens": total_cache_write,
}))) })))
} }
_ => Json(ApiResponse::error("Failed to fetch usage statistics".to_string())), _ => Json(ApiResponse::error("Failed to fetch usage statistics".to_string())),
@@ -208,7 +214,9 @@ pub(super) async fn handle_providers_usage(
provider, provider,
COUNT(*) as requests, COUNT(*) as requests,
COALESCE(SUM(total_tokens), 0) as tokens, COALESCE(SUM(total_tokens), 0) as tokens,
COALESCE(SUM(cost), 0.0) as cost COALESCE(SUM(cost), 0.0) as cost,
COALESCE(SUM(cache_read_tokens), 0) as cache_read,
COALESCE(SUM(cache_write_tokens), 0) as cache_write
FROM llm_requests FROM llm_requests
GROUP BY provider GROUP BY provider
ORDER BY requests DESC ORDER BY requests DESC
@@ -226,12 +234,16 @@ pub(super) async fn handle_providers_usage(
let requests: i64 = row.get("requests"); let requests: i64 = row.get("requests");
let tokens: i64 = row.get("tokens"); let tokens: i64 = row.get("tokens");
let cost: f64 = row.get("cost"); let cost: f64 = row.get("cost");
let cache_read: i64 = row.get("cache_read");
let cache_write: i64 = row.get("cache_write");
provider_usage.push(serde_json::json!({ provider_usage.push(serde_json::json!({
"provider": provider, "provider": provider,
"requests": requests, "requests": requests,
"tokens": tokens, "tokens": tokens,
"cost": cost, "cost": cost,
"cache_read_tokens": cache_read,
"cache_write_tokens": cache_write,
})); }));
} }
@@ -256,7 +268,9 @@ pub(super) async fn handle_detailed_usage(State(state): State<DashboardState>) -
model, model,
COUNT(*) as requests, COUNT(*) as requests,
COALESCE(SUM(total_tokens), 0) as tokens, COALESCE(SUM(total_tokens), 0) as tokens,
COALESCE(SUM(cost), 0.0) as cost COALESCE(SUM(cost), 0.0) as cost,
COALESCE(SUM(cache_read_tokens), 0) as cache_read,
COALESCE(SUM(cache_write_tokens), 0) as cache_write
FROM llm_requests FROM llm_requests
GROUP BY date, client_id, provider, model GROUP BY date, client_id, provider, model
ORDER BY date DESC ORDER BY date DESC
@@ -279,6 +293,8 @@ pub(super) async fn handle_detailed_usage(State(state): State<DashboardState>) -
"requests": row.get::<i64, _>("requests"), "requests": row.get::<i64, _>("requests"),
"tokens": row.get::<i64, _>("tokens"), "tokens": row.get::<i64, _>("tokens"),
"cost": row.get::<f64, _>("cost"), "cost": row.get::<f64, _>("cost"),
"cache_read_tokens": row.get::<i64, _>("cache_read"),
"cache_write_tokens": row.get::<i64, _>("cache_write"),
}) })
}) })
.collect(); .collect();

View File

@@ -135,6 +135,14 @@ async fn run_migrations(pool: &DbPool) -> Result<()> {
.execute(pool) .execute(pool)
.await; .await;
// Add cache token columns if they don't exist (migration for existing DBs)
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_read_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
// Insert default admin user if none exists (default password: admin) // Insert default admin user if none exists (default password: admin)
let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?; let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?;

View File

@@ -16,6 +16,8 @@ pub struct RequestLog {
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
pub cost: f64, pub cost: f64,
pub has_images: bool, pub has_images: bool,
pub status: String, // "success", "error" pub status: String, // "success", "error"
@@ -75,8 +77,8 @@ impl RequestLogger {
sqlx::query( sqlx::query(
r#" r#"
INSERT INTO llm_requests INSERT INTO llm_requests
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#, "#,
) )
.bind(log.timestamp) .bind(log.timestamp)
@@ -86,6 +88,8 @@ impl RequestLogger {
.bind(log.prompt_tokens as i64) .bind(log.prompt_tokens as i64)
.bind(log.completion_tokens as i64) .bind(log.completion_tokens as i64)
.bind(log.total_tokens as i64) .bind(log.total_tokens as i64)
.bind(log.cache_read_tokens as i64)
.bind(log.cache_write_tokens as i64)
.bind(log.cost) .bind(log.cost)
.bind(log.has_images) .bind(log.has_images)
.bind(log.status) .bind(log.status)

View File

@@ -152,6 +152,10 @@ pub struct Usage {
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_read_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_write_tokens: Option<u32>,
} }
// ========== Streaming Response Structs ========== // ========== Streaming Response Structs ==========

View File

@@ -90,12 +90,16 @@ impl super::Provider for DeepSeekProvider {
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
helpers::calculate_cost_with_registry( helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
cache_read_tokens,
cache_write_tokens,
registry, registry,
&self.pricing, &self.pricing,
0.14, 0.14,

View File

@@ -119,6 +119,8 @@ struct GeminiUsageMetadata {
candidates_token_count: u32, candidates_token_count: u32,
#[serde(default)] #[serde(default)]
total_token_count: u32, total_token_count: u32,
#[serde(default)]
cached_content_token_count: u32,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -454,6 +456,11 @@ impl super::Provider for GeminiProvider {
.as_ref() .as_ref()
.map(|u| u.total_token_count) .map(|u| u.total_token_count)
.unwrap_or(0); .unwrap_or(0);
let cache_read_tokens = gemini_response
.usage_metadata
.as_ref()
.map(|u| u.cached_content_token_count)
.unwrap_or(0);
Ok(ProviderResponse { Ok(ProviderResponse {
content, content,
@@ -462,6 +469,8 @@ impl super::Provider for GeminiProvider {
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
total_tokens, total_tokens,
cache_read_tokens,
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
model, model,
}) })
} }
@@ -475,12 +484,16 @@ impl super::Provider for GeminiProvider {
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
super::helpers::calculate_cost_with_registry( super::helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
cache_read_tokens,
cache_write_tokens,
registry, registry,
&self.pricing, &self.pricing,
0.075, 0.075,
@@ -537,6 +550,17 @@ impl super::Provider for GeminiProvider {
let gemini_response: GeminiResponse = serde_json::from_str(&msg.data) let gemini_response: GeminiResponse = serde_json::from_str(&msg.data)
.map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?; .map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?;
// Extract usage from usageMetadata if present (reported on every/last chunk)
let stream_usage = gemini_response.usage_metadata.as_ref().map(|u| {
super::StreamUsage {
prompt_tokens: u.prompt_token_count,
completion_tokens: u.candidates_token_count,
total_tokens: u.total_token_count,
cache_read_tokens: u.cached_content_token_count,
cache_write_tokens: 0,
}
});
if let Some(candidate) = gemini_response.candidates.first() { if let Some(candidate) = gemini_response.candidates.first() {
let content = candidate let content = candidate
.content .content
@@ -561,6 +585,7 @@ impl super::Provider for GeminiProvider {
finish_reason, finish_reason,
tool_calls, tool_calls,
model: model.clone(), model: model.clone(),
usage: stream_usage,
}; };
} }
} }

View File

@@ -86,12 +86,16 @@ impl super::Provider for GrokProvider {
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
helpers::calculate_cost_with_registry( helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
cache_read_tokens,
cache_write_tokens,
registry, registry,
&self.pricing, &self.pricing,
5.0, 5.0,

View File

@@ -1,4 +1,4 @@
use super::{ProviderResponse, ProviderStreamChunk}; use super::{ProviderResponse, ProviderStreamChunk, StreamUsage};
use crate::errors::AppError; use crate::errors::AppError;
use crate::models::{ContentPart, ToolCall, ToolCallDelta, UnifiedMessage, UnifiedRequest}; use crate::models::{ContentPart, ToolCall, ToolCallDelta, UnifiedMessage, UnifiedRequest};
use futures::stream::{BoxStream, StreamExt}; use futures::stream::{BoxStream, StreamExt};
@@ -156,6 +156,8 @@ pub async fn messages_to_openai_json_text_only(
/// Build an OpenAI-compatible request body from a UnifiedRequest and pre-converted messages. /// Build an OpenAI-compatible request body from a UnifiedRequest and pre-converted messages.
/// Includes tools and tool_choice when present. /// Includes tools and tool_choice when present.
/// When streaming, adds `stream_options.include_usage: true` so providers report
/// token counts in the final SSE chunk.
pub fn build_openai_body( pub fn build_openai_body(
request: &UnifiedRequest, request: &UnifiedRequest,
messages_json: Vec<serde_json::Value>, messages_json: Vec<serde_json::Value>,
@@ -167,6 +169,10 @@ pub fn build_openai_body(
"stream": stream, "stream": stream,
}); });
if stream {
body["stream_options"] = serde_json::json!({ "include_usage": true });
}
if let Some(temp) = request.temperature { if let Some(temp) = request.temperature {
body["temperature"] = serde_json::json!(temp); body["temperature"] = serde_json::json!(temp);
} }
@@ -185,6 +191,9 @@ pub fn build_openai_body(
/// Parse an OpenAI-compatible chat completion response JSON into a ProviderResponse. /// Parse an OpenAI-compatible chat completion response JSON into a ProviderResponse.
/// Extracts tool_calls from the message when present. /// Extracts tool_calls from the message when present.
/// Extracts cache token counts from:
/// - OpenAI/Grok: `usage.prompt_tokens_details.cached_tokens`
/// - DeepSeek: `usage.prompt_cache_hit_tokens` / `usage.prompt_cache_miss_tokens`
pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<ProviderResponse, AppError> { pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<ProviderResponse, AppError> {
let choice = resp_json["choices"] let choice = resp_json["choices"]
.get(0) .get(0)
@@ -204,6 +213,17 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32; let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32; let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
.as_u64()
// DeepSeek uses a different field name
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32;
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use).
// OpenAI doesn't report cache_write in this location, but may in the future.
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32;
Ok(ProviderResponse { Ok(ProviderResponse {
content, content,
reasoning_content, reasoning_content,
@@ -211,6 +231,8 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
total_tokens, total_tokens,
cache_read_tokens,
cache_write_tokens,
model, model,
}) })
} }
@@ -220,6 +242,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
/// The optional `reasoning_field` allows overriding the field name for /// The optional `reasoning_field` allows overriding the field name for
/// reasoning content (e.g., "thought" for Ollama). /// reasoning content (e.g., "thought" for Ollama).
/// Parses tool_calls deltas from streaming chunks when present. /// Parses tool_calls deltas from streaming chunks when present.
/// When `stream_options.include_usage: true` was sent, the provider sends a
/// final chunk with `usage` data — this is parsed into `StreamUsage` and
/// attached to the yielded `ProviderStreamChunk`.
pub fn create_openai_stream( pub fn create_openai_stream(
es: reqwest_eventsource::EventSource, es: reqwest_eventsource::EventSource,
model: String, model: String,
@@ -239,6 +264,34 @@ pub fn create_openai_stream(
let chunk: Value = serde_json::from_str(&msg.data) let chunk: Value = serde_json::from_str(&msg.data)
.map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?; .map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?;
// Parse usage from the final chunk (sent when stream_options.include_usage is true).
// This chunk may have an empty `choices` array.
let stream_usage = chunk.get("usage").and_then(|u| {
if u.is_null() {
return None;
}
let prompt_tokens = u["prompt_tokens"].as_u64().unwrap_or(0) as u32;
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
.as_u64()
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32;
let cache_write_tokens = u["prompt_cache_miss_tokens"]
.as_u64()
.unwrap_or(0) as u32;
Some(StreamUsage {
prompt_tokens,
completion_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,
})
});
if let Some(choice) = chunk["choices"].get(0) { if let Some(choice) = chunk["choices"].get(0) {
let delta = &choice["delta"]; let delta = &choice["delta"];
let content = delta["content"].as_str().unwrap_or_default().to_string(); let content = delta["content"].as_str().unwrap_or_default().to_string();
@@ -259,6 +312,18 @@ pub fn create_openai_stream(
finish_reason, finish_reason,
tool_calls, tool_calls,
model: model.clone(), model: model.clone(),
usage: stream_usage,
};
} else if stream_usage.is_some() {
// Final usage-only chunk (empty choices array) — yield it so
// AggregatingStream can capture the real token counts.
yield ProviderStreamChunk {
content: String::new(),
reasoning_content: None,
finish_reason: None,
tool_calls: None,
model: model.clone(),
usage: stream_usage,
}; };
} }
} }
@@ -274,10 +339,20 @@ pub fn create_openai_stream(
} }
/// Calculate cost using the model registry first, then falling back to provider pricing config. /// Calculate cost using the model registry first, then falling back to provider pricing config.
///
/// When the registry provides `cache_read` / `cache_write` rates, the formula is:
/// (prompt_tokens - cache_read_tokens) * input_rate
/// + cache_read_tokens * cache_read_rate
/// + cache_write_tokens * cache_write_rate (if applicable)
/// + completion_tokens * output_rate
///
/// All rates are per-token (the registry stores per-million-token rates).
pub fn calculate_cost_with_registry( pub fn calculate_cost_with_registry(
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
pricing: &[crate::config::ModelPricing], pricing: &[crate::config::ModelPricing],
default_prompt_rate: f64, default_prompt_rate: f64,
@@ -286,10 +361,25 @@ pub fn calculate_cost_with_registry(
if let Some(metadata) = registry.find_model(model) if let Some(metadata) = registry.find_model(model)
&& let Some(cost) = &metadata.cost && let Some(cost) = &metadata.cost
{ {
return (prompt_tokens as f64 * cost.input / 1_000_000.0) let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
let mut total = (non_cached_prompt as f64 * cost.input / 1_000_000.0)
+ (completion_tokens as f64 * cost.output / 1_000_000.0); + (completion_tokens as f64 * cost.output / 1_000_000.0);
if let Some(cache_read_rate) = cost.cache_read {
total += cache_read_tokens as f64 * cache_read_rate / 1_000_000.0;
} else {
// No cache_read rate — charge cached tokens at full input rate
total += cache_read_tokens as f64 * cost.input / 1_000_000.0;
}
if let Some(cache_write_rate) = cost.cache_write {
total += cache_write_tokens as f64 * cache_write_rate / 1_000_000.0;
}
return total;
} }
// Fallback: no registry entry — use provider pricing config (no cache awareness)
let (prompt_rate, completion_rate) = pricing let (prompt_rate, completion_rate) = pricing
.iter() .iter()
.find(|p| model.contains(&p.model)) .find(|p| model.contains(&p.model))

View File

@@ -37,12 +37,16 @@ pub trait Provider: Send + Sync {
/// Estimate token count for a request (for cost calculation) /// Estimate token count for a request (for cost calculation)
fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32>; fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32>;
/// Calculate cost based on token usage and model using the registry /// Calculate cost based on token usage and model using the registry.
/// `cache_read_tokens` / `cache_write_tokens` allow cache-aware pricing
/// when the registry provides `cache_read` / `cache_write` rates.
fn calculate_cost( fn calculate_cost(
&self, &self,
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64; ) -> f64;
} }
@@ -54,9 +58,21 @@ pub struct ProviderResponse {
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
pub model: String, pub model: String,
} }
/// Usage data from the final streaming chunk (when providers report real token counts).
#[derive(Debug, Clone, Default)]
pub struct StreamUsage {
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ProviderStreamChunk { pub struct ProviderStreamChunk {
pub content: String, pub content: String,
@@ -64,6 +80,8 @@ pub struct ProviderStreamChunk {
pub finish_reason: Option<String>, pub finish_reason: Option<String>,
pub tool_calls: Option<Vec<crate::models::ToolCallDelta>>, pub tool_calls: Option<Vec<crate::models::ToolCallDelta>>,
pub model: String, pub model: String,
/// Populated only on the final chunk when providers report usage (e.g. stream_options.include_usage).
pub usage: Option<StreamUsage>,
} }
use tokio::sync::RwLock; use tokio::sync::RwLock;
@@ -299,6 +317,8 @@ pub mod placeholder {
_model: &str, _model: &str,
_prompt_tokens: u32, _prompt_tokens: u32,
_completion_tokens: u32, _completion_tokens: u32,
_cache_read_tokens: u32,
_cache_write_tokens: u32,
_registry: &crate::models::registry::ModelRegistry, _registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
0.0 0.0

View File

@@ -95,12 +95,16 @@ impl super::Provider for OllamaProvider {
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
helpers::calculate_cost_with_registry( helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
cache_read_tokens,
cache_write_tokens,
registry, registry,
&self.pricing, &self.pricing,
0.0, 0.0,

View File

@@ -86,12 +86,16 @@ impl super::Provider for OpenAIProvider {
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
helpers::calculate_cost_with_registry( helpers::calculate_cost_with_registry(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
cache_read_tokens,
cache_write_tokens,
registry, registry,
&self.pricing, &self.pricing,
0.15, 0.15,

View File

@@ -97,18 +97,21 @@ async fn get_model_cost(
model: &str, model: &str,
prompt_tokens: u32, prompt_tokens: u32,
completion_tokens: u32, completion_tokens: u32,
cache_read_tokens: u32,
cache_write_tokens: u32,
provider: &Arc<dyn crate::providers::Provider>, provider: &Arc<dyn crate::providers::Provider>,
state: &AppState, state: &AppState,
) -> f64 { ) -> f64 {
// Check in-memory cache for cost overrides (no SQLite hit) // Check in-memory cache for cost overrides (no SQLite hit)
if let Some(cached) = state.model_config_cache.get(model).await { if let Some(cached) = state.model_config_cache.get(model).await {
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) { if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
// Manual overrides don't have cache-specific rates, so use simple formula
return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0); return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
} }
} }
// Fallback to provider's registry-based calculation // Fallback to provider's registry-based calculation (cache-aware)
provider.calculate_cost(model, prompt_tokens, completion_tokens, &state.model_registry) provider.calculate_cost(model, prompt_tokens, completion_tokens, cache_read_tokens, cache_write_tokens, &state.model_registry)
} }
async fn chat_completions( async fn chat_completions(
@@ -281,6 +284,8 @@ async fn chat_completions(
&response.model, &response.model,
response.prompt_tokens, response.prompt_tokens,
response.completion_tokens, response.completion_tokens,
response.cache_read_tokens,
response.cache_write_tokens,
&provider, &provider,
&state, &state,
) )
@@ -294,6 +299,8 @@ async fn chat_completions(
prompt_tokens: response.prompt_tokens, prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens, completion_tokens: response.completion_tokens,
total_tokens: response.total_tokens, total_tokens: response.total_tokens,
cache_read_tokens: response.cache_read_tokens,
cache_write_tokens: response.cache_write_tokens,
cost, cost,
has_images, has_images,
status: "success".to_string(), status: "success".to_string(),
@@ -340,6 +347,8 @@ async fn chat_completions(
prompt_tokens: response.prompt_tokens, prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens, completion_tokens: response.completion_tokens,
total_tokens: response.total_tokens, total_tokens: response.total_tokens,
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
}), }),
}; };
@@ -368,6 +377,8 @@ async fn chat_completions(
prompt_tokens: 0, prompt_tokens: 0,
completion_tokens: 0, completion_tokens: 0,
total_tokens: 0, total_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
cost: 0.0, cost: 0.0,
has_images: false, has_images: false,
status: "error".to_string(), status: "error".to_string(),

View File

@@ -2,7 +2,7 @@ use crate::client::ClientManager;
use crate::errors::AppError; use crate::errors::AppError;
use crate::logging::{RequestLog, RequestLogger}; use crate::logging::{RequestLog, RequestLogger};
use crate::models::ToolCall; use crate::models::ToolCall;
use crate::providers::{Provider, ProviderStreamChunk}; use crate::providers::{Provider, ProviderStreamChunk, StreamUsage};
use crate::state::ModelConfigCache; use crate::state::ModelConfigCache;
use crate::utils::tokens::estimate_completion_tokens; use crate::utils::tokens::estimate_completion_tokens;
use futures::stream::Stream; use futures::stream::Stream;
@@ -33,6 +33,8 @@ pub struct AggregatingStream<S> {
accumulated_content: String, accumulated_content: String,
accumulated_reasoning: String, accumulated_reasoning: String,
accumulated_tool_calls: Vec<ToolCall>, accumulated_tool_calls: Vec<ToolCall>,
/// Real usage data from the provider's final stream chunk (when available).
real_usage: Option<StreamUsage>,
logger: Arc<RequestLogger>, logger: Arc<RequestLogger>,
client_manager: Arc<ClientManager>, client_manager: Arc<ClientManager>,
model_registry: Arc<crate::models::registry::ModelRegistry>, model_registry: Arc<crate::models::registry::ModelRegistry>,
@@ -56,6 +58,7 @@ where
accumulated_content: String::new(), accumulated_content: String::new(),
accumulated_reasoning: String::new(), accumulated_reasoning: String::new(),
accumulated_tool_calls: Vec::new(), accumulated_tool_calls: Vec::new(),
real_usage: None,
logger: config.logger, logger: config.logger,
client_manager: config.client_manager, client_manager: config.client_manager,
model_registry: config.model_registry, model_registry: config.model_registry,
@@ -78,33 +81,68 @@ where
let logger = self.logger.clone(); let logger = self.logger.clone();
let client_manager = self.client_manager.clone(); let client_manager = self.client_manager.clone();
let provider = self.provider.clone(); let provider = self.provider.clone();
let prompt_tokens = self.prompt_tokens; let estimated_prompt_tokens = self.prompt_tokens;
let has_images = self.has_images; let has_images = self.has_images;
let registry = self.model_registry.clone(); let registry = self.model_registry.clone();
let config_cache = self.model_config_cache.clone(); let config_cache = self.model_config_cache.clone();
let real_usage = self.real_usage.take();
// Estimate completion tokens (including reasoning if present) // Estimate completion tokens (including reasoning if present)
let content_tokens = estimate_completion_tokens(&self.accumulated_content, &model); let estimated_content_tokens = estimate_completion_tokens(&self.accumulated_content, &model);
let reasoning_tokens = if !self.accumulated_reasoning.is_empty() { let estimated_reasoning_tokens = if !self.accumulated_reasoning.is_empty() {
estimate_completion_tokens(&self.accumulated_reasoning, &model) estimate_completion_tokens(&self.accumulated_reasoning, &model)
} else { } else {
0 0
}; };
let completion_tokens = content_tokens + reasoning_tokens; let estimated_completion = estimated_content_tokens + estimated_reasoning_tokens;
let total_tokens = prompt_tokens + completion_tokens;
// Spawn a background task to log the completion // Spawn a background task to log the completion
tokio::spawn(async move { tokio::spawn(async move {
// Use real usage from the provider when available, otherwise fall back to estimates
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
if let Some(usage) = &real_usage {
(
usage.prompt_tokens,
usage.completion_tokens,
usage.total_tokens,
usage.cache_read_tokens,
usage.cache_write_tokens,
)
} else {
(
estimated_prompt_tokens,
estimated_completion,
estimated_prompt_tokens + estimated_completion,
0u32,
0u32,
)
};
// Check in-memory cache for cost overrides (no SQLite hit) // Check in-memory cache for cost overrides (no SQLite hit)
let cost = if let Some(cached) = config_cache.get(&model).await { let cost = if let Some(cached) = config_cache.get(&model).await {
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) { if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
// Cost override doesn't have cache-aware pricing, use simple formula
(prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0) (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0)
} else { } else {
provider.calculate_cost(&model, prompt_tokens, completion_tokens, &registry) provider.calculate_cost(
&model,
prompt_tokens,
completion_tokens,
cache_read_tokens,
cache_write_tokens,
&registry,
)
} }
} else { } else {
provider.calculate_cost(&model, prompt_tokens, completion_tokens, &registry) provider.calculate_cost(
&model,
prompt_tokens,
completion_tokens,
cache_read_tokens,
cache_write_tokens,
&registry,
)
}; };
// Log to database // Log to database
@@ -116,6 +154,8 @@ where
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
total_tokens, total_tokens,
cache_read_tokens,
cache_write_tokens,
cost, cost,
has_images, has_images,
status: "success".to_string(), status: "success".to_string(),
@@ -146,6 +186,10 @@ where
if let Some(reasoning) = &chunk.reasoning_content { if let Some(reasoning) = &chunk.reasoning_content {
self.accumulated_reasoning.push_str(reasoning); self.accumulated_reasoning.push_str(reasoning);
} }
// Capture real usage from the provider when present (typically on the final chunk)
if let Some(usage) = &chunk.usage {
self.real_usage = Some(usage.clone());
}
// Accumulate tool call deltas into complete tool calls // Accumulate tool call deltas into complete tool calls
if let Some(deltas) = &chunk.tool_calls { if let Some(deltas) = &chunk.tool_calls {
for delta in deltas { for delta in deltas {
@@ -230,7 +274,7 @@ mod tests {
fn estimate_tokens(&self, _req: &crate::models::UnifiedRequest) -> Result<u32> { fn estimate_tokens(&self, _req: &crate::models::UnifiedRequest) -> Result<u32> {
Ok(10) Ok(10)
} }
fn calculate_cost(&self, _model: &str, _p: u32, _c: u32, _r: &crate::models::registry::ModelRegistry) -> f64 { fn calculate_cost(&self, _model: &str, _p: u32, _c: u32, _cr: u32, _cw: u32, _r: &crate::models::registry::ModelRegistry) -> f64 {
0.05 0.05
} }
} }
@@ -244,6 +288,7 @@ mod tests {
finish_reason: None, finish_reason: None,
tool_calls: None, tool_calls: None,
model: "test".to_string(), model: "test".to_string(),
usage: None,
}), }),
Ok(ProviderStreamChunk { Ok(ProviderStreamChunk {
content: " World".to_string(), content: " World".to_string(),
@@ -251,6 +296,7 @@ mod tests {
finish_reason: Some("stop".to_string()), finish_reason: Some("stop".to_string()),
tool_calls: None, tool_calls: None,
model: "test".to_string(), model: "test".to_string(),
usage: None,
}), }),
]; ];
let inner_stream = stream::iter(chunks); let inner_stream = stream::iter(chunks);

View File

@@ -4,7 +4,7 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Proxy Gateway - Admin Dashboard</title> <title>LLM Proxy Gateway - Admin Dashboard</title>
<link rel="stylesheet" href="/css/dashboard.css?v=4"> <link rel="stylesheet" href="/css/dashboard.css?v=5">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any"> <link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any">
<link rel="apple-touch-icon" href="img/logo-icon.png"> <link rel="apple-touch-icon" href="img/logo-icon.png">
@@ -166,19 +166,19 @@
</div> </div>
<!-- Scripts (cache-busted with version query params) --> <!-- Scripts (cache-busted with version query params) -->
<script src="/js/api.js?v=4"></script> <script src="/js/api.js?v=5"></script>
<script src="/js/auth.js?v=4"></script> <script src="/js/auth.js?v=5"></script>
<script src="/js/dashboard.js?v=4"></script> <script src="/js/dashboard.js?v=5"></script>
<script src="/js/websocket.js?v=4"></script> <script src="/js/websocket.js?v=5"></script>
<script src="/js/charts.js?v=4"></script> <script src="/js/charts.js?v=5"></script>
<script src="/js/pages/overview.js?v=4"></script> <script src="/js/pages/overview.js?v=5"></script>
<script src="/js/pages/analytics.js?v=4"></script> <script src="/js/pages/analytics.js?v=5"></script>
<script src="/js/pages/costs.js?v=4"></script> <script src="/js/pages/costs.js?v=5"></script>
<script src="/js/pages/clients.js?v=4"></script> <script src="/js/pages/clients.js?v=5"></script>
<script src="/js/pages/providers.js?v=4"></script> <script src="/js/pages/providers.js?v=5"></script>
<script src="/js/pages/models.js?v=4"></script> <script src="/js/pages/models.js?v=5"></script>
<script src="/js/pages/monitoring.js?v=4"></script> <script src="/js/pages/monitoring.js?v=5"></script>
<script src="/js/pages/settings.js?v=4"></script> <script src="/js/pages/settings.js?v=5"></script>
<script src="/js/pages/logs.js?v=4"></script> <script src="/js/pages/logs.js?v=5"></script>
</body> </body>
</html> </html>

View File

@@ -17,61 +17,25 @@ class ApiClient {
headers['Authorization'] = `Bearer ${window.authManager.token}`; headers['Authorization'] = `Bearer ${window.authManager.token}`;
} }
const response = await fetch(url, {
...options,
headers
});
const text = await response.text();
let result;
try { try {
console.log(`[API] Fetching ${url}...`); result = JSON.parse(text);
const response = await fetch(url, { } catch (parseErr) {
...options, throw new Error(`JSON parse failed for ${url}: ${parseErr.message}`);
headers
});
console.log(`[API] ${url} → status=${response.status} ok=${response.ok} type=${response.headers.get('content-type')}`);
const text = await response.text();
console.log(`[API] ${url} → body length=${text.length}, first 200 chars:`, text.substring(0, 200));
let result;
try {
result = JSON.parse(text);
} catch (parseErr) {
const msg = `JSON parse failed for ${url}: ${parseErr.message}. Body: ${text.substring(0, 300)}`;
console.error(`[API] ${msg}`);
this._addDebugEntry(url, 'JSON_PARSE_ERROR', msg);
throw new Error(msg);
}
if (!response.ok || !result.success) {
const msg = `API error for ${url}: ok=${response.ok} success=${result.success} error=${result.error} status=${response.status}`;
console.error(`[API] ${msg}`);
this._addDebugEntry(url, 'API_ERROR', msg);
throw new Error(result.error || `HTTP error! status: ${response.status}`);
}
console.log(`[API] ${url} → SUCCESS, data keys:`, result.data ? Object.keys(result.data) : 'null');
return result.data;
} catch (error) {
console.error(`[API] Request failed (${path}):`, error);
this._addDebugEntry(url, 'EXCEPTION', error.message);
throw error;
} }
}
// Visible on-page debug panel for diagnosing fetch failures if (!response.ok || !result.success) {
_addDebugEntry(url, status, detail) { throw new Error(result.error || `HTTP error! status: ${response.status}`);
let panel = document.getElementById('api-debug-panel');
if (!panel) {
panel = document.createElement('div');
panel.id = 'api-debug-panel';
panel.style.cssText = 'position:fixed;bottom:0;left:0;right:0;max-height:200px;overflow-y:auto;background:#1d2021;color:#fbf1c7;font-family:monospace;font-size:11px;padding:8px;z-index:99999;border-top:2px solid #cc241d;';
const title = document.createElement('div');
title.style.cssText = 'font-weight:bold;margin-bottom:4px;color:#fb4934;';
title.textContent = 'API Debug Panel (remove after fixing)';
panel.appendChild(title);
document.body.appendChild(panel);
} }
const entry = document.createElement('div');
entry.style.cssText = 'margin:2px 0;padding:2px 4px;background:#282828;border-left:3px solid ' + (status === 'EXCEPTION' ? '#fb4934' : '#fabd2f') + ';'; return result.data;
entry.textContent = `[${status}] ${url}: ${detail}`;
panel.appendChild(entry);
} }
async get(path) { async get(path) {

View File

@@ -348,7 +348,7 @@ class Dashboard {
<div class="table-container"> <div class="table-container">
<table class="table" id="usage-table"> <table class="table" id="usage-table">
<thead> <thead>
<tr><th>Date</th><th>Client</th><th>Provider</th><th>Model</th><th>Requests</th><th>Tokens</th><th>Cost</th></tr> <tr><th>Date</th><th>Client</th><th>Provider</th><th>Model</th><th>Requests</th><th>Tokens</th><th>Cache Read</th><th>Cache Write</th><th>Cost</th></tr>
</thead> </thead>
<tbody></tbody> <tbody></tbody>
</table> </table>
@@ -383,7 +383,7 @@ class Dashboard {
<div class="table-container"> <div class="table-container">
<table class="table" id="pricing-table"> <table class="table" id="pricing-table">
<thead> <thead>
<tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Last Updated</th></tr> <tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Cache Read</th><th>Cache Write</th></tr>
</thead> </thead>
<tbody></tbody> <tbody></tbody>
</table> </table>

View File

@@ -184,21 +184,27 @@ class AnalyticsPage {
if (!tableBody) return; if (!tableBody) return;
if (data.length === 0) { if (data.length === 0) {
tableBody.innerHTML = '<tr><td colspan="7" class="text-center">No historical data found</td></tr>'; tableBody.innerHTML = '<tr><td colspan="9" class="text-center">No historical data found</td></tr>';
return; return;
} }
tableBody.innerHTML = data.map(row => ` tableBody.innerHTML = data.map(row => {
<tr> const cacheRead = row.cache_read_tokens || 0;
<td>${row.date}</td> const cacheWrite = row.cache_write_tokens || 0;
<td><span class="badge-client">${row.client}</span></td> return `
<td>${row.provider}</td> <tr>
<td><code class="code-sm">${row.model}</code></td> <td>${row.date}</td>
<td>${row.requests.toLocaleString()}</td> <td><span class="badge-client">${row.client}</span></td>
<td>${window.api.formatNumber(row.tokens)}</td> <td>${row.provider}</td>
<td>${window.api.formatCurrency(row.cost)}</td> <td><code class="code-sm">${row.model}</code></td>
</tr> <td>${row.requests.toLocaleString()}</td>
`).join(''); <td>${window.api.formatNumber(row.tokens)}</td>
<td>${window.api.formatNumber(cacheRead)}</td>
<td>${window.api.formatNumber(cacheWrite)}</td>
<td>${window.api.formatCurrency(row.cost)}</td>
</tr>
`;
}).join('');
} }
setupEventListeners() { setupEventListeners() {

View File

@@ -31,7 +31,10 @@ class CostsPage {
avgDailyCost: data.total_cost / 30, // Simplified avgDailyCost: data.total_cost / 30, // Simplified
costTrend: 5.2, costTrend: 5.2,
budgetUsed: Math.min(Math.round((data.total_cost / 100) * 100), 100), // Assuming $100 budget budgetUsed: Math.min(Math.round((data.total_cost / 100) * 100), 100), // Assuming $100 budget
projectedMonthEnd: data.today_cost * 30 projectedMonthEnd: data.today_cost * 30,
cacheReadTokens: data.total_cache_read_tokens || 0,
cacheWriteTokens: data.total_cache_write_tokens || 0,
totalTokens: data.total_tokens || 0,
}; };
this.renderCostStats(); this.renderCostStats();
@@ -45,6 +48,10 @@ class CostsPage {
const container = document.getElementById('cost-stats'); const container = document.getElementById('cost-stats');
if (!container) return; if (!container) return;
const cacheHitRate = this.costData.totalTokens > 0
? ((this.costData.cacheReadTokens / this.costData.totalTokens) * 100).toFixed(1)
: '0.0';
container.innerHTML = ` container.innerHTML = `
<div class="stat-card"> <div class="stat-card">
<div class="stat-icon warning"> <div class="stat-icon warning">
@@ -74,6 +81,19 @@ class CostsPage {
</div> </div>
</div> </div>
<div class="stat-card">
<div class="stat-icon primary">
<i class="fas fa-bolt"></i>
</div>
<div class="stat-content">
<div class="stat-value">${cacheHitRate}%</div>
<div class="stat-label">Cache Hit Rate</div>
<div class="stat-change">
${window.api.formatNumber(this.costData.cacheReadTokens)} cached tokens
</div>
</div>
</div>
<div class="stat-card"> <div class="stat-card">
<div class="stat-icon danger"> <div class="stat-icon danger">
<i class="fas fa-piggy-bank"></i> <i class="fas fa-piggy-bank"></i>
@@ -181,15 +201,24 @@ class CostsPage {
const tableBody = document.querySelector('#pricing-table tbody'); const tableBody = document.querySelector('#pricing-table tbody');
if (!tableBody) return; if (!tableBody) return;
tableBody.innerHTML = data.map(row => ` tableBody.innerHTML = data.map(row => {
<tr> const cacheRead = row.cache_read_cost != null
<td><span class="badge-client">${row.provider.toUpperCase()}</span></td> ? `${window.api.formatCurrency(row.cache_read_cost)} / 1M`
<td><code class="code-sm">${row.id}</code></td> : '<span style="color:var(--fg4)">--</span>';
<td>${window.api.formatCurrency(row.prompt_cost)} / 1M</td> const cacheWrite = row.cache_write_cost != null
<td>${window.api.formatCurrency(row.completion_cost)} / 1M</td> ? `${window.api.formatCurrency(row.cache_write_cost)} / 1M`
<td>Now</td> : '<span style="color:var(--fg4)">--</span>';
</tr> return `
`).join(''); <tr>
<td><span class="badge-client">${row.provider.toUpperCase()}</span></td>
<td><code class="code-sm">${row.id}</code></td>
<td>${window.api.formatCurrency(row.prompt_cost)} / 1M</td>
<td>${window.api.formatCurrency(row.completion_cost)} / 1M</td>
<td>${cacheRead}</td>
<td>${cacheWrite}</td>
</tr>
`;
}).join('');
} }
setupEventListeners() { setupEventListeners() {