feat: add cache token tracking and cache-aware cost calculation
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

Track cache_read_tokens and cache_write_tokens end-to-end: parse from
provider responses (OpenAI, DeepSeek, Grok, Gemini), persist to SQLite,
apply cache-aware pricing from the model registry, and surface in API
responses and the dashboard.

- Add cache fields to ProviderResponse, StreamUsage, RequestLog structs
- Parse cached_tokens (OpenAI/Grok), prompt_cache_hit/miss (DeepSeek),
  cachedContentTokenCount (Gemini) from provider responses
- Send stream_options.include_usage for streaming; capture real usage
  from final SSE chunk in AggregatingStream
- ALTER TABLE migration for cache_read_tokens/cache_write_tokens columns
- Cache-aware cost formula using registry cache_read/cache_write rates
- Update Provider trait calculate_cost signature across all providers
- Add cache_read_tokens/cache_write_tokens to Usage API response
- Dashboard: cache hit rate card, cache columns in pricing and usage
  tables, cache token aggregation in SQL queries
- Remove API debug panel and verbose console logging from api.js
- Bump static asset cache-bust to v5
This commit is contained in:
2026-03-02 14:45:21 -05:00
parent 232f092f27
commit db5824f0fb
19 changed files with 352 additions and 109 deletions

View File

@@ -84,6 +84,8 @@ pub(super) async fn handle_get_models(
let mut enabled = true;
let mut prompt_cost = m_meta.cost.as_ref().map(|c| c.input).unwrap_or(0.0);
let mut completion_cost = m_meta.cost.as_ref().map(|c| c.output).unwrap_or(0.0);
let cache_read_cost = m_meta.cost.as_ref().and_then(|c| c.cache_read);
let cache_write_cost = m_meta.cost.as_ref().and_then(|c| c.cache_write);
let mut mapping = None::<String>;
if let Some(row) = db_models.get(m_key) {
@@ -105,6 +107,8 @@ pub(super) async fn handle_get_models(
"enabled": enabled,
"prompt_cost": prompt_cost,
"completion_cost": completion_cost,
"cache_read_cost": cache_read_cost,
"cache_write_cost": cache_write_cost,
"mapping": mapping,
"context_limit": m_meta.limit.as_ref().map(|l| l.context).unwrap_or(0),
"output_limit": m_meta.limit.as_ref().map(|l| l.output).unwrap_or(0),

View File

@@ -16,7 +16,9 @@ pub(super) async fn handle_usage_summary(State(state): State<DashboardState>) ->
COUNT(*) as total_requests,
COALESCE(SUM(total_tokens), 0) as total_tokens,
COALESCE(SUM(cost), 0.0) as total_cost,
COUNT(DISTINCT client_id) as active_clients
COUNT(DISTINCT client_id) as active_clients,
COALESCE(SUM(cache_read_tokens), 0) as total_cache_read,
COALESCE(SUM(cache_write_tokens), 0) as total_cache_write
FROM llm_requests
"#,
)
@@ -64,6 +66,8 @@ pub(super) async fn handle_usage_summary(State(state): State<DashboardState>) ->
let total_tokens: i64 = t.get("total_tokens");
let total_cost: f64 = t.get("total_cost");
let active_clients: i64 = t.get("active_clients");
let total_cache_read: i64 = t.get("total_cache_read");
let total_cache_write: i64 = t.get("total_cache_write");
let today_requests: i64 = d.get("today_requests");
let today_cost: f64 = d.get("today_cost");
@@ -87,6 +91,8 @@ pub(super) async fn handle_usage_summary(State(state): State<DashboardState>) ->
"today_cost": today_cost,
"error_rate": error_rate,
"avg_response_time": avg_response_time,
"total_cache_read_tokens": total_cache_read,
"total_cache_write_tokens": total_cache_write,
})))
}
_ => Json(ApiResponse::error("Failed to fetch usage statistics".to_string())),
@@ -208,7 +214,9 @@ pub(super) async fn handle_providers_usage(
provider,
COUNT(*) as requests,
COALESCE(SUM(total_tokens), 0) as tokens,
COALESCE(SUM(cost), 0.0) as cost
COALESCE(SUM(cost), 0.0) as cost,
COALESCE(SUM(cache_read_tokens), 0) as cache_read,
COALESCE(SUM(cache_write_tokens), 0) as cache_write
FROM llm_requests
GROUP BY provider
ORDER BY requests DESC
@@ -226,12 +234,16 @@ pub(super) async fn handle_providers_usage(
let requests: i64 = row.get("requests");
let tokens: i64 = row.get("tokens");
let cost: f64 = row.get("cost");
let cache_read: i64 = row.get("cache_read");
let cache_write: i64 = row.get("cache_write");
provider_usage.push(serde_json::json!({
"provider": provider,
"requests": requests,
"tokens": tokens,
"cost": cost,
"cache_read_tokens": cache_read,
"cache_write_tokens": cache_write,
}));
}
@@ -256,7 +268,9 @@ pub(super) async fn handle_detailed_usage(State(state): State<DashboardState>) -
model,
COUNT(*) as requests,
COALESCE(SUM(total_tokens), 0) as tokens,
COALESCE(SUM(cost), 0.0) as cost
COALESCE(SUM(cost), 0.0) as cost,
COALESCE(SUM(cache_read_tokens), 0) as cache_read,
COALESCE(SUM(cache_write_tokens), 0) as cache_write
FROM llm_requests
GROUP BY date, client_id, provider, model
ORDER BY date DESC
@@ -279,6 +293,8 @@ pub(super) async fn handle_detailed_usage(State(state): State<DashboardState>) -
"requests": row.get::<i64, _>("requests"),
"tokens": row.get::<i64, _>("tokens"),
"cost": row.get::<f64, _>("cost"),
"cache_read_tokens": row.get::<i64, _>("cache_read"),
"cache_write_tokens": row.get::<i64, _>("cache_write"),
})
})
.collect();