Compare commits
25 Commits
633b69a07b
...
rust
| Author | SHA1 | Date | |
|---|---|---|---|
| 649371154f | |||
| 78fff61660 | |||
| b131094dfd | |||
| c3d81c1733 | |||
| e123f542f1 | |||
| 0d28241e39 | |||
| 754ee9cb84 | |||
| 5a9086b883 | |||
| cc5eba1957 | |||
| 3ab00fb188 | |||
| c2595f7a74 | |||
| 0526304398 | |||
| 75e2967727 | |||
| e1bc3b35eb | |||
| 0d32d953d2 | |||
| bd5ca2dd98 | |||
| 6a0aca1a6c | |||
| 4c629e17cb | |||
| fc3bc6968d | |||
| d6280abad9 | |||
| 96486b6318 | |||
| e8955fd36c | |||
| a243a3987d | |||
| 4be23629d8 | |||
| dd54c14ff8 |
12
.env
12
.env
@@ -15,8 +15,14 @@ GROK_API_KEY=gk-demo-grok-key
|
|||||||
# Authentication tokens (comma-separated list)
|
# Authentication tokens (comma-separated list)
|
||||||
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
||||||
|
|
||||||
# Server port (optional)
|
|
||||||
LLM_PROXY__SERVER__PORT=8080
|
|
||||||
|
|
||||||
# Database path (optional)
|
# Database path (optional)
|
||||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||||
|
|
||||||
|
# Session Secret (for signed tokens)
|
||||||
|
SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0
|
||||||
|
|
||||||
|
# Encryption key (required)
|
||||||
|
LLM_PROXY__ENCRYPTION_KEY=69879f5b7913ba169982190526ae213e830b3f1f33e785ef2b68cf48c7853fcd
|
||||||
|
|
||||||
|
# Server port (optional)
|
||||||
|
LLM_PROXY__SERVER__PORT=8080
|
||||||
|
|||||||
22
.env.backup
Normal file
22
.env.backup
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# LLM Proxy Gateway Environment Variables
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
OPENAI_API_KEY=sk-demo-openai-key
|
||||||
|
|
||||||
|
# Google Gemini
|
||||||
|
GEMINI_API_KEY=AIza-demo-gemini-key
|
||||||
|
|
||||||
|
# DeepSeek
|
||||||
|
DEEPSEEK_API_KEY=sk-demo-deepseek-key
|
||||||
|
|
||||||
|
# xAI Grok (not yet available)
|
||||||
|
GROK_API_KEY=gk-demo-grok-key
|
||||||
|
|
||||||
|
# Authentication tokens (comma-separated list)
|
||||||
|
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
||||||
|
|
||||||
|
# Server port (optional)
|
||||||
|
LLM_PROXY__SERVER__PORT=8080
|
||||||
|
|
||||||
|
# Database path (optional)
|
||||||
|
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||||
45
PLAN.md
45
PLAN.md
@@ -56,7 +56,44 @@ This document outlines the roadmap for standardizing frontend security, cleaning
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Technical Standards
|
# Phase 6: Cache Cost & Provider Audit (ACTIVE)
|
||||||
- **Rust:** No `unwrap()` in production code; use proper error handling (`Result`).
|
**Primary Agents:** `frontend-developer`, `backend-developer`, `database-optimizer`, `lab-assistant`
|
||||||
- **Frontend:** Always use `window.api` wrappers for sensitive operations.
|
|
||||||
- **Security:** Secrets must never be logged or hardcoded.
|
## 6.1 Dashboard UI Updates (@frontend-developer)
|
||||||
|
- [ ] **Update Models Page Modal:** Add input fields for `Cache Read Cost` and `Cache Write Cost` in `static/js/pages/models.js`.
|
||||||
|
- [ ] **API Integration:** Ensure `window.api.put` includes these new cost fields in the request body.
|
||||||
|
- [ ] **Verify Costs Page:** Confirm `static/js/pages/costs.js` displays these rates correctly in the pricing table.
|
||||||
|
|
||||||
|
## 6.2 Provider Audit & Stream Fixes (@backend-developer)
|
||||||
|
- [ ] **Standard DeepSeek Fix:** Modify `src/providers/deepseek.rs` to stop stripping `stream_options` for `deepseek-chat`.
|
||||||
|
- [ ] **Grok Audit:** Verify if Grok correctly returns usage in streaming; it uses `build_openai_body` and doesn't seem to strip it.
|
||||||
|
- [ ] **Gemini Audit:** Confirm Gemini returns `usage_metadata` reliably in the final chunk.
|
||||||
|
- [ ] **Anthropic Audit:** Check if Anthropic streaming requires `include_usage` or similar flags.
|
||||||
|
|
||||||
|
## 6.3 Database & Migration Validation (@database-optimizer)
|
||||||
|
- [ ] **Test Migrations:** Run the server to ensure `ALTER TABLE` logic in `src/database/mod.rs` applies the new columns correctly.
|
||||||
|
- [ ] **Schema Verification:** Verify `model_configs` has `cache_read_cost_per_m` and `cache_write_cost_per_m` columns.
|
||||||
|
|
||||||
|
## 6.4 Token Estimation Refinement (@lab-assistant)
|
||||||
|
- [ ] **Analyze Heuristic:** Review `chars / 4` in `src/utils/tokens.rs`.
|
||||||
|
- [ ] **Background Precise Recount:** Propose a mechanism for a precise token count (using Tiktoken) after the response is finalized.
|
||||||
|
|
||||||
|
## Critical Path
|
||||||
|
Migration Validation → UI Fields → Provider Stream Usage Reporting.
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
gantt
|
||||||
|
title Phase 6 Timeline
|
||||||
|
dateFormat YYYY-MM-DD
|
||||||
|
section Frontend
|
||||||
|
Models Page UI :2026-03-06, 1d
|
||||||
|
Costs Table Update:after Models Page UI, 1d
|
||||||
|
section Backend
|
||||||
|
DeepSeek Fix :2026-03-06, 1d
|
||||||
|
Provider Audit (Grok/Gemini):after DeepSeek Fix, 2d
|
||||||
|
section Database
|
||||||
|
Migration Test :2026-03-06, 1d
|
||||||
|
section Optimization
|
||||||
|
Token Heuristic Review :2026-03-06, 1d
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
BIN
data/backups/llm_proxy.db.20260303T204435Z
Normal file
BIN
data/backups/llm_proxy.db.20260303T204435Z
Normal file
Binary file not shown.
BIN
data/backups/llm_proxy.db.20260303T205057Z
Normal file
BIN
data/backups/llm_proxy.db.20260303T205057Z
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
11
server.log
Normal file
11
server.log
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[2m2026-03-06T20:07:36.737914Z[0m [32m INFO[0m Starting LLM Proxy Gateway v0.1.0
|
||||||
|
[2m2026-03-06T20:07:36.738903Z[0m [32m INFO[0m Configuration loaded from Some("/home/newkirk/Documents/projects/web_projects/llm-proxy/config.toml")
|
||||||
|
[2m2026-03-06T20:07:36.738945Z[0m [32m INFO[0m Encryption initialized
|
||||||
|
[2m2026-03-06T20:07:36.739124Z[0m [32m INFO[0m Connecting to database at ./data/llm_proxy.db
|
||||||
|
[2m2026-03-06T20:07:36.753254Z[0m [32m INFO[0m Database migrations completed
|
||||||
|
[2m2026-03-06T20:07:36.753294Z[0m [32m INFO[0m Database initialized at "./data/llm_proxy.db"
|
||||||
|
[2m2026-03-06T20:07:36.755187Z[0m [32m INFO[0m Fetching model registry from https://models.dev/api.json
|
||||||
|
[2m2026-03-06T20:07:37.000853Z[0m [32m INFO[0m Successfully loaded model registry
|
||||||
|
[2m2026-03-06T20:07:37.001382Z[0m [32m INFO[0m Model config cache initialized
|
||||||
|
[2m2026-03-06T20:07:37.001702Z[0m [33m WARN[0m SESSION_SECRET environment variable not set. Using a randomly generated secret. This will invalidate all sessions on restart. Set SESSION_SECRET to a fixed hex or base64 encoded 32-byte value.
|
||||||
|
[2m2026-03-06T20:07:37.002898Z[0m [32m INFO[0m Server listening on http://0.0.0.0:8082
|
||||||
1
server.pid
Normal file
1
server.pid
Normal file
@@ -0,0 +1 @@
|
|||||||
|
945904
|
||||||
@@ -33,7 +33,15 @@ pub(super) struct UpdateClientPayload {
|
|||||||
pub(super) rate_limit_per_minute: Option<i64>,
|
pub(super) rate_limit_per_minute: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn handle_get_clients(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
|
pub(super) async fn handle_get_clients(
|
||||||
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
|
|
||||||
let result = sqlx::query(
|
let result = sqlx::query(
|
||||||
@@ -88,7 +96,7 @@ pub(super) async fn handle_create_client(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Json(payload): Json<CreateClientRequest>,
|
Json(payload): Json<CreateClientRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -199,7 +207,7 @@ pub(super) async fn handle_update_client(
|
|||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
Json(payload): Json<UpdateClientPayload>,
|
Json(payload): Json<UpdateClientPayload>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -296,7 +304,7 @@ pub(super) async fn handle_delete_client(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -321,8 +329,14 @@ pub(super) async fn handle_delete_client(
|
|||||||
|
|
||||||
pub(super) async fn handle_client_usage(
|
pub(super) async fn handle_client_usage(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
|
|
||||||
// Get per-model breakdown for this client
|
// Get per-model breakdown for this client
|
||||||
@@ -381,8 +395,14 @@ pub(super) async fn handle_client_usage(
|
|||||||
|
|
||||||
pub(super) async fn handle_get_client_tokens(
|
pub(super) async fn handle_get_client_tokens(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
|
|
||||||
let result = sqlx::query(
|
let result = sqlx::query(
|
||||||
@@ -440,7 +460,7 @@ pub(super) async fn handle_create_client_token(
|
|||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
Json(payload): Json<CreateTokenRequest>,
|
Json(payload): Json<CreateTokenRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -489,7 +509,7 @@ pub(super) async fn handle_delete_client_token(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Path((client_id, token_id)): Path<(String, i64)>,
|
Path((client_id, token_id)): Path<(String, i64)>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -60,19 +60,16 @@ impl<T> ApiResponse<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Rate limiting middleware for dashboard routes that extracts AppState from DashboardState.
|
/// Rate limiting middleware for dashboard routes
|
||||||
async fn dashboard_rate_limit_middleware(
|
async fn dashboard_rate_limit_middleware(
|
||||||
State(dashboard_state): State<DashboardState>,
|
State(_dashboard_state): State<DashboardState>,
|
||||||
request: Request,
|
request: Request,
|
||||||
next: Next,
|
next: Next,
|
||||||
) -> Result<Response, crate::errors::AppError> {
|
) -> Result<Response, crate::errors::AppError> {
|
||||||
// Delegate to the existing rate limit middleware with AppState
|
// Bypass rate limiting for dashboard routes to prevent "Failed to load statistics"
|
||||||
crate::rate_limiting::middleware::rate_limit_middleware(
|
// when the UI makes many concurrent requests on load.
|
||||||
State(dashboard_state.app_state),
|
// Dashboard endpoints are already secured via auth::require_admin.
|
||||||
request,
|
Ok(next.run(request).await)
|
||||||
next,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dashboard routes
|
// Dashboard routes
|
||||||
@@ -86,7 +83,7 @@ pub fn router(state: AppState) -> Router {
|
|||||||
// Security headers
|
// Security headers
|
||||||
let csp_header: SetResponseHeaderLayer<HeaderValue> = SetResponseHeaderLayer::overriding(
|
let csp_header: SetResponseHeaderLayer<HeaderValue> = SetResponseHeaderLayer::overriding(
|
||||||
header::CONTENT_SECURITY_POLICY,
|
header::CONTENT_SECURITY_POLICY,
|
||||||
"default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self' ws:;"
|
"default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://fonts.googleapis.com; font-src 'self' https://cdnjs.cloudflare.com https://fonts.gstatic.com; img-src 'self' data:; connect-src 'self' ws:;"
|
||||||
.parse()
|
.parse()
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -43,42 +43,17 @@ pub(super) struct ModelListParams {
|
|||||||
|
|
||||||
pub(super) async fn handle_get_models(
|
pub(super) async fn handle_get_models(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(params): Query<ModelListParams>,
|
Query(params): Query<ModelListParams>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let registry = &state.app_state.model_registry;
|
let registry = &state.app_state.model_registry;
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
|
|
||||||
// If used_only, fetch the set of models that appear in llm_requests
|
|
||||||
let used_models: Option<std::collections::HashSet<String>> =
|
|
||||||
if params.used_only.unwrap_or(false) {
|
|
||||||
match sqlx::query_scalar::<_, String>(
|
|
||||||
"SELECT DISTINCT model FROM llm_requests",
|
|
||||||
)
|
|
||||||
.fetch_all(pool)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(models) => Some(models.into_iter().collect()),
|
|
||||||
Err(_) => Some(std::collections::HashSet::new()),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// Build filter from query params
|
|
||||||
let filter = ModelFilter {
|
|
||||||
provider: params.provider,
|
|
||||||
search: params.search,
|
|
||||||
modality: params.modality,
|
|
||||||
tool_call: params.tool_call,
|
|
||||||
reasoning: params.reasoning,
|
|
||||||
has_cost: params.has_cost,
|
|
||||||
};
|
|
||||||
let sort_by = params.sort_by.unwrap_or_default();
|
|
||||||
let sort_order = params.sort_order.unwrap_or_default();
|
|
||||||
|
|
||||||
// Get filtered and sorted model entries
|
|
||||||
let entries = registry.list_models(&filter, &sort_by, &sort_order);
|
|
||||||
|
|
||||||
// Load overrides from database
|
// Load overrides from database
|
||||||
let db_models_result =
|
let db_models_result =
|
||||||
sqlx::query("SELECT id, enabled, prompt_cost_per_m, completion_cost_per_m, mapping FROM model_configs")
|
sqlx::query("SELECT id, enabled, prompt_cost_per_m, completion_cost_per_m, mapping FROM model_configs")
|
||||||
@@ -95,16 +70,89 @@ pub(super) async fn handle_get_models(
|
|||||||
|
|
||||||
let mut models_json = Vec::new();
|
let mut models_json = Vec::new();
|
||||||
|
|
||||||
|
if params.used_only.unwrap_or(false) {
|
||||||
|
// EXACT USED MODELS LOGIC
|
||||||
|
let used_pairs_result = sqlx::query(
|
||||||
|
"SELECT DISTINCT provider, model FROM llm_requests",
|
||||||
|
)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if let Ok(rows) = used_pairs_result {
|
||||||
|
for row in rows {
|
||||||
|
let provider: String = row.get("provider");
|
||||||
|
let m_key: String = row.get("model");
|
||||||
|
|
||||||
|
let provider_name = match provider.as_str() {
|
||||||
|
"openai" => "OpenAI",
|
||||||
|
"gemini" => "Google Gemini",
|
||||||
|
"deepseek" => "DeepSeek",
|
||||||
|
"grok" => "xAI Grok",
|
||||||
|
"ollama" => "Ollama",
|
||||||
|
_ => provider.as_str(),
|
||||||
|
}.to_string();
|
||||||
|
|
||||||
|
let m_meta = registry.find_model(&m_key);
|
||||||
|
|
||||||
|
let mut enabled = true;
|
||||||
|
let mut prompt_cost = m_meta.and_then(|m| m.cost.as_ref().map(|c| c.input)).unwrap_or(0.0);
|
||||||
|
let mut completion_cost = m_meta.and_then(|m| m.cost.as_ref().map(|c| c.output)).unwrap_or(0.0);
|
||||||
|
let cache_read_cost = m_meta.and_then(|m| m.cost.as_ref().and_then(|c| c.cache_read));
|
||||||
|
let cache_write_cost = m_meta.and_then(|m| m.cost.as_ref().and_then(|c| c.cache_write));
|
||||||
|
let mut mapping = None::<String>;
|
||||||
|
|
||||||
|
if let Some(db_row) = db_models.get(&m_key) {
|
||||||
|
enabled = db_row.get("enabled");
|
||||||
|
if let Some(p) = db_row.get::<Option<f64>, _>("prompt_cost_per_m") {
|
||||||
|
prompt_cost = p;
|
||||||
|
}
|
||||||
|
if let Some(c) = db_row.get::<Option<f64>, _>("completion_cost_per_m") {
|
||||||
|
completion_cost = c;
|
||||||
|
}
|
||||||
|
mapping = db_row.get("mapping");
|
||||||
|
}
|
||||||
|
|
||||||
|
models_json.push(serde_json::json!({
|
||||||
|
"id": m_key,
|
||||||
|
"provider": provider,
|
||||||
|
"provider_name": provider_name,
|
||||||
|
"name": m_meta.map(|m| m.name.clone()).unwrap_or_else(|| m_key.clone()),
|
||||||
|
"enabled": enabled,
|
||||||
|
"prompt_cost": prompt_cost,
|
||||||
|
"completion_cost": completion_cost,
|
||||||
|
"cache_read_cost": cache_read_cost,
|
||||||
|
"cache_write_cost": cache_write_cost,
|
||||||
|
"mapping": mapping,
|
||||||
|
"context_limit": m_meta.and_then(|m| m.limit.as_ref().map(|l| l.context)).unwrap_or(0),
|
||||||
|
"output_limit": m_meta.and_then(|m| m.limit.as_ref().map(|l| l.output)).unwrap_or(0),
|
||||||
|
"modalities": m_meta.and_then(|m| m.modalities.as_ref().map(|mo| serde_json::json!({
|
||||||
|
"input": mo.input,
|
||||||
|
"output": mo.output,
|
||||||
|
}))),
|
||||||
|
"tool_call": m_meta.and_then(|m| m.tool_call),
|
||||||
|
"reasoning": m_meta.and_then(|m| m.reasoning),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// REGISTRY LISTING LOGIC
|
||||||
|
// Build filter from query params
|
||||||
|
let filter = ModelFilter {
|
||||||
|
provider: params.provider,
|
||||||
|
search: params.search,
|
||||||
|
modality: params.modality,
|
||||||
|
tool_call: params.tool_call,
|
||||||
|
reasoning: params.reasoning,
|
||||||
|
has_cost: params.has_cost,
|
||||||
|
};
|
||||||
|
let sort_by = params.sort_by.unwrap_or_default();
|
||||||
|
let sort_order = params.sort_order.unwrap_or_default();
|
||||||
|
|
||||||
|
// Get filtered and sorted model entries
|
||||||
|
let entries = registry.list_models(&filter, &sort_by, &sort_order);
|
||||||
|
|
||||||
for entry in &entries {
|
for entry in &entries {
|
||||||
let m_key = entry.model_key;
|
let m_key = entry.model_key;
|
||||||
|
|
||||||
// Skip models not in the used set (when used_only is active)
|
|
||||||
if let Some(ref used) = used_models {
|
|
||||||
if !used.contains(m_key) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let m_meta = entry.metadata;
|
let m_meta = entry.metadata;
|
||||||
|
|
||||||
let mut enabled = true;
|
let mut enabled = true;
|
||||||
@@ -146,6 +194,7 @@ pub(super) async fn handle_get_models(
|
|||||||
"reasoning": m_meta.reasoning,
|
"reasoning": m_meta.reasoning,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Json(ApiResponse::success(serde_json::json!(models_json)))
|
Json(ApiResponse::success(serde_json::json!(models_json)))
|
||||||
}
|
}
|
||||||
@@ -156,7 +205,7 @@ pub(super) async fn handle_update_model(
|
|||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
Json(payload): Json<UpdateModelRequest>,
|
Json(payload): Json<UpdateModelRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -21,7 +21,15 @@ pub(super) struct UpdateProviderRequest {
|
|||||||
pub(super) billing_mode: Option<String>,
|
pub(super) billing_mode: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn handle_get_providers(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
|
pub(super) async fn handle_get_providers(
|
||||||
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let registry = &state.app_state.model_registry;
|
let registry = &state.app_state.model_registry;
|
||||||
let config = &state.app_state.config;
|
let config = &state.app_state.config;
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
@@ -154,8 +162,14 @@ pub(super) async fn handle_get_providers(State(state): State<DashboardState>) ->
|
|||||||
|
|
||||||
pub(super) async fn handle_get_provider(
|
pub(super) async fn handle_get_provider(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Path(name): Path<String>,
|
Path(name): Path<String>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let registry = &state.app_state.model_registry;
|
let registry = &state.app_state.model_registry;
|
||||||
let config = &state.app_state.config;
|
let config = &state.app_state.config;
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
@@ -266,7 +280,7 @@ pub(super) async fn handle_update_provider(
|
|||||||
Path(name): Path<String>,
|
Path(name): Path<String>,
|
||||||
Json(payload): Json<UpdateProviderRequest>,
|
Json(payload): Json<UpdateProviderRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -351,8 +365,14 @@ pub(super) async fn handle_update_provider(
|
|||||||
|
|
||||||
pub(super) async fn handle_test_provider(
|
pub(super) async fn handle_test_provider(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Path(name): Path<String>,
|
Path(name): Path<String>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
|
|
||||||
let provider = match state.app_state.provider_manager.get_provider(&name).await {
|
let provider = match state.app_state.provider_manager.get_provider(&name).await {
|
||||||
|
|||||||
@@ -12,7 +12,15 @@ fn read_proc_file(path: &str) -> Option<String> {
|
|||||||
std::fs::read_to_string(path).ok()
|
std::fs::read_to_string(path).ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn handle_system_health(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
|
pub(super) async fn handle_system_health(
|
||||||
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let mut components = HashMap::new();
|
let mut components = HashMap::new();
|
||||||
components.insert("api_server".to_string(), "online".to_string());
|
components.insert("api_server".to_string(), "online".to_string());
|
||||||
components.insert("database".to_string(), "online".to_string());
|
components.insert("database".to_string(), "online".to_string());
|
||||||
@@ -67,7 +75,13 @@ pub(super) async fn handle_system_health(State(state): State<DashboardState>) ->
|
|||||||
/// Real system metrics from /proc (Linux only).
|
/// Real system metrics from /proc (Linux only).
|
||||||
pub(super) async fn handle_system_metrics(
|
pub(super) async fn handle_system_metrics(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
// --- CPU usage (aggregate across all cores) ---
|
// --- CPU usage (aggregate across all cores) ---
|
||||||
// /proc/stat first line: cpu user nice system idle iowait irq softirq steal guest guest_nice
|
// /proc/stat first line: cpu user nice system idle iowait irq softirq steal guest guest_nice
|
||||||
let cpu_percent = read_proc_file("/proc/stat")
|
let cpu_percent = read_proc_file("/proc/stat")
|
||||||
@@ -220,7 +234,15 @@ pub(super) async fn handle_system_metrics(
|
|||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn handle_system_logs(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
|
pub(super) async fn handle_system_logs(
|
||||||
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
|
|
||||||
let result = sqlx::query(
|
let result = sqlx::query(
|
||||||
@@ -233,7 +255,10 @@ pub(super) async fn handle_system_logs(State(state): State<DashboardState>) -> J
|
|||||||
model,
|
model,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
|
cache_read_tokens,
|
||||||
|
cache_write_tokens,
|
||||||
cost,
|
cost,
|
||||||
status,
|
status,
|
||||||
error_message,
|
error_message,
|
||||||
@@ -257,6 +282,11 @@ pub(super) async fn handle_system_logs(State(state): State<DashboardState>) -> J
|
|||||||
"client_id": row.get::<String, _>("client_id"),
|
"client_id": row.get::<String, _>("client_id"),
|
||||||
"provider": row.get::<String, _>("provider"),
|
"provider": row.get::<String, _>("provider"),
|
||||||
"model": row.get::<String, _>("model"),
|
"model": row.get::<String, _>("model"),
|
||||||
|
"prompt_tokens": row.get::<i64, _>("prompt_tokens"),
|
||||||
|
"completion_tokens": row.get::<i64, _>("completion_tokens"),
|
||||||
|
"reasoning_tokens": row.get::<i64, _>("reasoning_tokens"),
|
||||||
|
"cache_read_tokens": row.get::<i64, _>("cache_read_tokens"),
|
||||||
|
"cache_write_tokens": row.get::<i64, _>("cache_write_tokens"),
|
||||||
"tokens": row.get::<i64, _>("total_tokens"),
|
"tokens": row.get::<i64, _>("total_tokens"),
|
||||||
"cost": row.get::<f64, _>("cost"),
|
"cost": row.get::<f64, _>("cost"),
|
||||||
"status": row.get::<String, _>("status"),
|
"status": row.get::<String, _>("status"),
|
||||||
@@ -279,7 +309,7 @@ pub(super) async fn handle_system_backup(
|
|||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -318,7 +348,15 @@ pub(super) async fn handle_system_backup(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn handle_get_settings(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
|
pub(super) async fn handle_get_settings(
|
||||||
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let registry = &state.app_state.model_registry;
|
let registry = &state.app_state.model_registry;
|
||||||
let provider_count = registry.providers.len();
|
let provider_count = registry.providers.len();
|
||||||
let model_count: usize = registry.providers.values().map(|p| p.models.len()).sum();
|
let model_count: usize = registry.providers.values().map(|p| p.models.len()).sum();
|
||||||
@@ -342,7 +380,7 @@ pub(super) async fn handle_update_settings(
|
|||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -71,8 +71,14 @@ impl UsagePeriodFilter {
|
|||||||
|
|
||||||
pub(super) async fn handle_usage_summary(
|
pub(super) async fn handle_usage_summary(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(filter): Query<UsagePeriodFilter>,
|
Query(filter): Query<UsagePeriodFilter>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
let (period_clause, period_binds) = filter.to_sql();
|
let (period_clause, period_binds) = filter.to_sql();
|
||||||
|
|
||||||
@@ -133,7 +139,9 @@ pub(super) async fn handle_usage_summary(
|
|||||||
)
|
)
|
||||||
.fetch_one(pool);
|
.fetch_one(pool);
|
||||||
|
|
||||||
match tokio::join!(total_stats, today_stats, error_stats, avg_response) {
|
let results = tokio::join!(total_stats, today_stats, error_stats, avg_response);
|
||||||
|
|
||||||
|
match results {
|
||||||
(Ok(t), Ok(d), Ok(e), Ok(a)) => {
|
(Ok(t), Ok(d), Ok(e), Ok(a)) => {
|
||||||
let total_requests: i64 = t.get("total_requests");
|
let total_requests: i64 = t.get("total_requests");
|
||||||
let total_tokens: i64 = t.get("total_tokens");
|
let total_tokens: i64 = t.get("total_tokens");
|
||||||
@@ -168,14 +176,26 @@ pub(super) async fn handle_usage_summary(
|
|||||||
"total_cache_write_tokens": total_cache_write,
|
"total_cache_write_tokens": total_cache_write,
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
_ => Json(ApiResponse::error("Failed to fetch usage statistics".to_string())),
|
(t_res, d_res, e_res, a_res) => {
|
||||||
|
if let Err(e) = t_res { warn!("Total stats query failed: {}", e); }
|
||||||
|
if let Err(e) = d_res { warn!("Today stats query failed: {}", e); }
|
||||||
|
if let Err(e) = e_res { warn!("Error stats query failed: {}", e); }
|
||||||
|
if let Err(e) = a_res { warn!("Avg response query failed: {}", e); }
|
||||||
|
Json(ApiResponse::error("Failed to fetch usage statistics".to_string()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn handle_time_series(
|
pub(super) async fn handle_time_series(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(filter): Query<UsagePeriodFilter>,
|
Query(filter): Query<UsagePeriodFilter>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
let (period_clause, period_binds) = filter.to_sql();
|
let (period_clause, period_binds) = filter.to_sql();
|
||||||
let granularity = filter.granularity();
|
let granularity = filter.granularity();
|
||||||
@@ -248,8 +268,14 @@ pub(super) async fn handle_time_series(
|
|||||||
|
|
||||||
pub(super) async fn handle_clients_usage(
|
pub(super) async fn handle_clients_usage(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(filter): Query<UsagePeriodFilter>,
|
Query(filter): Query<UsagePeriodFilter>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
let (period_clause, period_binds) = filter.to_sql();
|
let (period_clause, period_binds) = filter.to_sql();
|
||||||
|
|
||||||
@@ -308,8 +334,14 @@ pub(super) async fn handle_clients_usage(
|
|||||||
|
|
||||||
pub(super) async fn handle_providers_usage(
|
pub(super) async fn handle_providers_usage(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(filter): Query<UsagePeriodFilter>,
|
Query(filter): Query<UsagePeriodFilter>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
let (period_clause, period_binds) = filter.to_sql();
|
let (period_clause, period_binds) = filter.to_sql();
|
||||||
|
|
||||||
@@ -370,8 +402,14 @@ pub(super) async fn handle_providers_usage(
|
|||||||
|
|
||||||
pub(super) async fn handle_detailed_usage(
|
pub(super) async fn handle_detailed_usage(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(filter): Query<UsagePeriodFilter>,
|
Query(filter): Query<UsagePeriodFilter>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
let (period_clause, period_binds) = filter.to_sql();
|
let (period_clause, period_binds) = filter.to_sql();
|
||||||
|
|
||||||
@@ -433,8 +471,14 @@ pub(super) async fn handle_detailed_usage(
|
|||||||
|
|
||||||
pub(super) async fn handle_analytics_breakdown(
|
pub(super) async fn handle_analytics_breakdown(
|
||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
|
headers: axum::http::HeaderMap,
|
||||||
Query(filter): Query<UsagePeriodFilter>,
|
Query(filter): Query<UsagePeriodFilter>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
|
Ok((session, new_token)) => (session, new_token),
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
|
||||||
let pool = &state.app_state.db_pool;
|
let pool = &state.app_state.db_pool;
|
||||||
let (period_clause, period_binds) = filter.to_sql();
|
let (period_clause, period_binds) = filter.to_sql();
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ pub(super) async fn handle_get_users(
|
|||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match auth::require_admin(&state, &headers).await {
|
let (_session, _) = match auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -67,7 +67,7 @@ pub(super) async fn handle_create_user(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Json(payload): Json<CreateUserRequest>,
|
Json(payload): Json<CreateUserRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match auth::require_admin(&state, &headers).await {
|
let (_session, _) = match auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -149,7 +149,7 @@ pub(super) async fn handle_update_user(
|
|||||||
Path(id): Path<i64>,
|
Path(id): Path<i64>,
|
||||||
Json(payload): Json<UpdateUserRequest>,
|
Json(payload): Json<UpdateUserRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match auth::require_admin(&state, &headers).await {
|
let (_session, _) = match auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
model TEXT,
|
model TEXT,
|
||||||
prompt_tokens INTEGER,
|
prompt_tokens INTEGER,
|
||||||
completion_tokens INTEGER,
|
completion_tokens INTEGER,
|
||||||
|
reasoning_tokens INTEGER DEFAULT 0,
|
||||||
total_tokens INTEGER,
|
total_tokens INTEGER,
|
||||||
cost REAL,
|
cost REAL,
|
||||||
has_images BOOLEAN DEFAULT FALSE,
|
has_images BOOLEAN DEFAULT FALSE,
|
||||||
@@ -109,6 +110,8 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
enabled BOOLEAN DEFAULT TRUE,
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
prompt_cost_per_m REAL,
|
prompt_cost_per_m REAL,
|
||||||
completion_cost_per_m REAL,
|
completion_cost_per_m REAL,
|
||||||
|
cache_read_cost_per_m REAL,
|
||||||
|
cache_write_cost_per_m REAL,
|
||||||
mapping TEXT,
|
mapping TEXT,
|
||||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
FOREIGN KEY (provider_id) REFERENCES provider_configs(id) ON DELETE CASCADE
|
FOREIGN KEY (provider_id) REFERENCES provider_configs(id) ON DELETE CASCADE
|
||||||
@@ -170,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
|
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await;
|
.await;
|
||||||
|
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0")
|
||||||
|
.execute(pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
// Add billing_mode column if it doesn't exist (migration for existing DBs)
|
// Add billing_mode column if it doesn't exist (migration for existing DBs)
|
||||||
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")
|
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")
|
||||||
@@ -180,6 +186,14 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
// Add manual cache cost columns to model_configs if they don't exist
|
||||||
|
let _ = sqlx::query("ALTER TABLE model_configs ADD COLUMN cache_read_cost_per_m REAL")
|
||||||
|
.execute(pool)
|
||||||
|
.await;
|
||||||
|
let _ = sqlx::query("ALTER TABLE model_configs ADD COLUMN cache_write_cost_per_m REAL")
|
||||||
|
.execute(pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
// Insert default admin user if none exists (default password: admin)
|
// Insert default admin user if none exists (default password: admin)
|
||||||
let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?;
|
let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?;
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ pub struct RequestLog {
|
|||||||
pub model: String,
|
pub model: String,
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
|
pub reasoning_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cache_read_tokens: u32,
|
pub cache_read_tokens: u32,
|
||||||
pub cache_write_tokens: u32,
|
pub cache_write_tokens: u32,
|
||||||
@@ -77,8 +78,8 @@ impl RequestLogger {
|
|||||||
sqlx::query(
|
sqlx::query(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO llm_requests
|
INSERT INTO llm_requests
|
||||||
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
|
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(log.timestamp)
|
.bind(log.timestamp)
|
||||||
@@ -87,6 +88,7 @@ impl RequestLogger {
|
|||||||
.bind(&log.model)
|
.bind(&log.model)
|
||||||
.bind(log.prompt_tokens as i64)
|
.bind(log.prompt_tokens as i64)
|
||||||
.bind(log.completion_tokens as i64)
|
.bind(log.completion_tokens as i64)
|
||||||
|
.bind(log.reasoning_tokens as i64)
|
||||||
.bind(log.total_tokens as i64)
|
.bind(log.total_tokens as i64)
|
||||||
.bind(log.cache_read_tokens as i64)
|
.bind(log.cache_read_tokens as i64)
|
||||||
.bind(log.cache_write_tokens as i64)
|
.bind(log.cache_write_tokens as i64)
|
||||||
|
|||||||
@@ -165,6 +165,8 @@ pub struct Usage {
|
|||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub reasoning_tokens: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub cache_read_tokens: Option<u32>,
|
pub cache_read_tokens: Option<u32>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub cache_write_tokens: Option<u32>,
|
pub cache_write_tokens: Option<u32>,
|
||||||
@@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse {
|
|||||||
pub created: u64,
|
pub created: u64,
|
||||||
pub model: String,
|
pub model: String,
|
||||||
pub choices: Vec<ChatStreamChoice>,
|
pub choices: Vec<ChatStreamChoice>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub usage: Option<Usage>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|||||||
@@ -128,7 +128,9 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
cache_write_tokens: u32,
|
cache_write_tokens: u32,
|
||||||
registry: &crate::models::registry::ModelRegistry,
|
registry: &crate::models::registry::ModelRegistry,
|
||||||
) -> f64 {
|
) -> f64 {
|
||||||
helpers::calculate_cost_with_registry(
|
if let Some(metadata) = registry.find_model(model) {
|
||||||
|
if metadata.cost.is_some() {
|
||||||
|
return helpers::calculate_cost_with_registry(
|
||||||
model,
|
model,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
@@ -136,9 +138,26 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
registry,
|
registry,
|
||||||
&self.pricing,
|
&self.pricing,
|
||||||
0.14,
|
|
||||||
0.28,
|
0.28,
|
||||||
)
|
0.42,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom DeepSeek fallback that correctly handles cache hits
|
||||||
|
let (prompt_rate, completion_rate) = self
|
||||||
|
.pricing
|
||||||
|
.iter()
|
||||||
|
.find(|p| model.contains(&p.model))
|
||||||
|
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
||||||
|
.unwrap_or((0.28, 0.42)); // Default to DeepSeek's current API pricing
|
||||||
|
|
||||||
|
let cache_hit_rate = prompt_rate / 10.0;
|
||||||
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
|
||||||
|
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
|
||||||
|
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
|
||||||
|
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_completion_stream(
|
async fn chat_completion_stream(
|
||||||
@@ -152,8 +171,11 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
// Sanitize and fix for deepseek-reasoner (R1)
|
// Sanitize and fix for deepseek-reasoner (R1)
|
||||||
if request.model == "deepseek-reasoner" {
|
if request.model == "deepseek-reasoner" {
|
||||||
if let Some(obj) = body.as_object_mut() {
|
if let Some(obj) = body.as_object_mut() {
|
||||||
obj.remove("stream_options");
|
// Keep stream_options if present (DeepSeek supports include_usage)
|
||||||
|
|
||||||
|
// Remove unsupported parameters
|
||||||
obj.remove("temperature");
|
obj.remove("temperature");
|
||||||
|
|
||||||
obj.remove("top_p");
|
obj.remove("top_p");
|
||||||
obj.remove("presence_penalty");
|
obj.remove("presence_penalty");
|
||||||
obj.remove("frequency_penalty");
|
obj.remove("frequency_penalty");
|
||||||
@@ -177,11 +199,6 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// For standard deepseek-chat, keep it clean
|
|
||||||
if let Some(obj) = body.as_object_mut() {
|
|
||||||
obj.remove("stream_options");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let url = format!("{}/chat/completions", self.config.base_url);
|
let url = format!("{}/chat/completions", self.config.base_url);
|
||||||
|
|||||||
@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
|
|||||||
let reasoning_content = candidate
|
let reasoning_content = candidate
|
||||||
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
|
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
|
||||||
|
|
||||||
|
let reasoning_tokens = reasoning_content.as_ref()
|
||||||
|
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
// Extract function calls → OpenAI tool_calls
|
// Extract function calls → OpenAI tool_calls
|
||||||
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
|
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
|
||||||
|
|
||||||
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
tool_calls,
|
tool_calls,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
|
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
|
||||||
@@ -772,7 +777,9 @@ impl super::Provider for GeminiProvider {
|
|||||||
cache_write_tokens: u32,
|
cache_write_tokens: u32,
|
||||||
registry: &crate::models::registry::ModelRegistry,
|
registry: &crate::models::registry::ModelRegistry,
|
||||||
) -> f64 {
|
) -> f64 {
|
||||||
super::helpers::calculate_cost_with_registry(
|
if let Some(metadata) = registry.find_model(model) {
|
||||||
|
if metadata.cost.is_some() {
|
||||||
|
return super::helpers::calculate_cost_with_registry(
|
||||||
model,
|
model,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
@@ -782,7 +789,24 @@ impl super::Provider for GeminiProvider {
|
|||||||
&self.pricing,
|
&self.pricing,
|
||||||
0.075,
|
0.075,
|
||||||
0.30,
|
0.30,
|
||||||
)
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom Gemini fallback that correctly handles cache hits (25% of input cost)
|
||||||
|
let (prompt_rate, completion_rate) = self
|
||||||
|
.pricing
|
||||||
|
.iter()
|
||||||
|
.find(|p| model.contains(&p.model))
|
||||||
|
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
||||||
|
.unwrap_or((0.075, 0.30)); // Default to Gemini 1.5 Flash current API pricing
|
||||||
|
|
||||||
|
let cache_hit_rate = prompt_rate * 0.25;
|
||||||
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
|
||||||
|
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
|
||||||
|
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
|
||||||
|
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_completion_stream(
|
async fn chat_completion_stream(
|
||||||
@@ -883,6 +907,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
super::StreamUsage {
|
super::StreamUsage {
|
||||||
prompt_tokens: u.prompt_token_count,
|
prompt_tokens: u.prompt_token_count,
|
||||||
completion_tokens: u.candidates_token_count,
|
completion_tokens: u.candidates_token_count,
|
||||||
|
reasoning_tokens: 0,
|
||||||
total_tokens: u.total_token_count,
|
total_tokens: u.total_token_count,
|
||||||
cache_read_tokens: u.cached_content_token_count,
|
cache_read_tokens: u.cached_content_token_count,
|
||||||
cache_write_tokens: 0,
|
cache_write_tokens: 0,
|
||||||
|
|||||||
@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
|||||||
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
|
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
|
|
||||||
|
// Extract reasoning tokens
|
||||||
|
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
|
||||||
|
.as_u64()
|
||||||
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
|
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
|
||||||
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
|
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
|
||||||
.as_u64()
|
.as_u64()
|
||||||
@@ -261,9 +266,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
|||||||
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
|
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
|
||||||
.unwrap_or(0) as u32;
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use).
|
// DeepSeek reports prompt_cache_miss_tokens which are just regular non-cached tokens.
|
||||||
// OpenAI doesn't report cache_write in this location, but may in the future.
|
// They do not incur a separate cache_write fee, so we don't map them here to avoid double-charging.
|
||||||
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32;
|
let cache_write_tokens = 0;
|
||||||
|
|
||||||
Ok(ProviderResponse {
|
Ok(ProviderResponse {
|
||||||
content,
|
content,
|
||||||
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
|||||||
tool_calls,
|
tool_calls,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
@@ -295,18 +301,21 @@ pub fn parse_openai_stream_chunk(
|
|||||||
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
|
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
|
|
||||||
|
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
|
||||||
|
.as_u64()
|
||||||
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
|
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
|
||||||
.as_u64()
|
.as_u64()
|
||||||
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
|
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
|
||||||
.unwrap_or(0) as u32;
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
let cache_write_tokens = u["prompt_cache_miss_tokens"]
|
let cache_write_tokens = 0;
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0) as u32;
|
|
||||||
|
|
||||||
Some(StreamUsage {
|
Some(StreamUsage {
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
|
|||||||
@@ -42,6 +42,16 @@ pub trait Provider: Send + Sync {
|
|||||||
request: UnifiedRequest,
|
request: UnifiedRequest,
|
||||||
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError>;
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError>;
|
||||||
|
|
||||||
|
/// Process a streaming chat request using provider-specific "responses" style endpoint
|
||||||
|
/// Default implementation falls back to `chat_completion_stream` for providers
|
||||||
|
/// that do not implement a dedicated responses endpoint.
|
||||||
|
async fn chat_responses_stream(
|
||||||
|
&self,
|
||||||
|
request: UnifiedRequest,
|
||||||
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
||||||
|
self.chat_completion_stream(request).await
|
||||||
|
}
|
||||||
|
|
||||||
/// Estimate token count for a request (for cost calculation)
|
/// Estimate token count for a request (for cost calculation)
|
||||||
fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32>;
|
fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32>;
|
||||||
|
|
||||||
@@ -65,6 +75,7 @@ pub struct ProviderResponse {
|
|||||||
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
|
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
|
pub reasoning_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cache_read_tokens: u32,
|
pub cache_read_tokens: u32,
|
||||||
pub cache_write_tokens: u32,
|
pub cache_write_tokens: u32,
|
||||||
@@ -76,6 +87,7 @@ pub struct ProviderResponse {
|
|||||||
pub struct StreamUsage {
|
pub struct StreamUsage {
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
|
pub reasoning_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cache_read_tokens: u32,
|
pub cache_read_tokens: u32,
|
||||||
pub cache_write_tokens: u32,
|
pub cache_write_tokens: u32,
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ impl OpenAIProvider {
|
|||||||
.timeout(std::time::Duration::from_secs(300))
|
.timeout(std::time::Duration::from_secs(300))
|
||||||
.pool_idle_timeout(std::time::Duration::from_secs(90))
|
.pool_idle_timeout(std::time::Duration::from_secs(90))
|
||||||
.pool_max_idle_per_host(4)
|
.pool_max_idle_per_host(4)
|
||||||
.tcp_keepalive(std::time::Duration::from_secs(30))
|
.tcp_keepalive(std::time::Duration::from_secs(15))
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
@@ -45,7 +45,13 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn supports_model(&self, model: &str) -> bool {
|
fn supports_model(&self, model: &str) -> bool {
|
||||||
model.starts_with("gpt-") || model.starts_with("o1-") || model.starts_with("o3-") || model.starts_with("o4-") || model == "gpt-5-nano"
|
model.starts_with("gpt-") ||
|
||||||
|
model.starts_with("o1-") ||
|
||||||
|
model.starts_with("o2-") ||
|
||||||
|
model.starts_with("o3-") ||
|
||||||
|
model.starts_with("o4-") ||
|
||||||
|
model.starts_with("o5-") ||
|
||||||
|
model.contains("gpt-5")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn supports_multimodal(&self) -> bool {
|
fn supports_multimodal(&self) -> bool {
|
||||||
@@ -53,6 +59,12 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_completion(&self, request: UnifiedRequest) -> Result<ProviderResponse, AppError> {
|
async fn chat_completion(&self, request: UnifiedRequest) -> Result<ProviderResponse, AppError> {
|
||||||
|
// Allow proactive routing to Responses API based on heuristic
|
||||||
|
let model_lc = request.model.to_lowercase();
|
||||||
|
if model_lc.contains("gpt-5") || model_lc.contains("codex") {
|
||||||
|
return self.chat_responses(request).await;
|
||||||
|
}
|
||||||
|
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut body = helpers::build_openai_body(&request, messages_json, false);
|
let mut body = helpers::build_openai_body(&request, messages_json, false);
|
||||||
|
|
||||||
@@ -80,96 +92,7 @@ impl super::Provider for OpenAIProvider {
|
|||||||
// Read error body to diagnose. If the model requires the Responses
|
// Read error body to diagnose. If the model requires the Responses
|
||||||
// API (v1/responses), retry against that endpoint.
|
// API (v1/responses), retry against that endpoint.
|
||||||
if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") {
|
if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") {
|
||||||
// Build a simple `input` string by concatenating message parts.
|
return self.chat_responses(request).await;
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
|
||||||
let mut inputs: Vec<String> = Vec::new();
|
|
||||||
for m in &messages_json {
|
|
||||||
let role = m["role"].as_str().unwrap_or("");
|
|
||||||
let parts = m.get("content").and_then(|c| c.as_array()).cloned().unwrap_or_default();
|
|
||||||
let mut text_parts = Vec::new();
|
|
||||||
for p in parts {
|
|
||||||
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
|
||||||
text_parts.push(t.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
inputs.push(format!("{}: {}", role, text_parts.join("")));
|
|
||||||
}
|
|
||||||
let input_text = inputs.join("\n");
|
|
||||||
|
|
||||||
let resp = self
|
|
||||||
.client
|
|
||||||
.post(format!("{}/responses", self.config.base_url))
|
|
||||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
|
||||||
.json(&serde_json::json!({ "model": request.model, "input": input_text }))
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
|
||||||
|
|
||||||
if !resp.status().is_success() {
|
|
||||||
let err = resp.text().await.unwrap_or_default();
|
|
||||||
return Err(AppError::ProviderError(format!("OpenAI Responses API error: {}", err)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
|
||||||
// Try to normalize: if it's chat-style, use existing parser
|
|
||||||
if resp_json.get("choices").is_some() {
|
|
||||||
return helpers::parse_openai_response(&resp_json, request.model);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Responses API: try to extract text from `output` or `candidates`
|
|
||||||
let mut content_text = String::new();
|
|
||||||
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
|
|
||||||
if let Some(first) = output.get(0) {
|
|
||||||
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
|
|
||||||
for item in contents {
|
|
||||||
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
|
||||||
content_text.push_str(text);
|
|
||||||
} else if let Some(parts) = item.get("parts").and_then(|p| p.as_array()) {
|
|
||||||
for p in parts {
|
|
||||||
if let Some(t) = p.as_str() {
|
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
|
||||||
content_text.push_str(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if content_text.is_empty() {
|
|
||||||
if let Some(cands) = resp_json.get("candidates").and_then(|c| c.as_array()) {
|
|
||||||
if let Some(c0) = cands.get(0) {
|
|
||||||
if let Some(content) = c0.get("content") {
|
|
||||||
if let Some(parts) = content.get("parts").and_then(|p| p.as_array()) {
|
|
||||||
for p in parts {
|
|
||||||
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
|
||||||
content_text.push_str(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let prompt_tokens = resp_json.get("usage").and_then(|u| u.get("prompt_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
|
||||||
let completion_tokens = resp_json.get("usage").and_then(|u| u.get("completion_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
|
||||||
let total_tokens = resp_json.get("usage").and_then(|u| u.get("total_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
|
||||||
|
|
||||||
return Ok(ProviderResponse {
|
|
||||||
content: content_text,
|
|
||||||
reasoning_content: None,
|
|
||||||
tool_calls: None,
|
|
||||||
prompt_tokens,
|
|
||||||
completion_tokens,
|
|
||||||
total_tokens,
|
|
||||||
cache_read_tokens: 0,
|
|
||||||
cache_write_tokens: 0,
|
|
||||||
model: request.model,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::error!("OpenAI API error ({}): {}", status, error_text);
|
tracing::error!("OpenAI API error ({}): {}", status, error_text);
|
||||||
@@ -185,27 +108,80 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_responses(&self, request: UnifiedRequest) -> Result<ProviderResponse, AppError> {
|
async fn chat_responses(&self, request: UnifiedRequest) -> Result<ProviderResponse, AppError> {
|
||||||
// Build a simple `input` string by concatenating message parts.
|
// Build a structured input for the Responses API.
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut inputs: Vec<String> = Vec::new();
|
let mut input_parts = Vec::new();
|
||||||
for m in &messages_json {
|
for m in &messages_json {
|
||||||
let role = m["role"].as_str().unwrap_or("");
|
let mut role = m["role"].as_str().unwrap_or("user").to_string();
|
||||||
let parts = m.get("content").and_then(|c| c.as_array()).cloned().unwrap_or_default();
|
// Newer models (gpt-5, o1) prefer "developer" over "system"
|
||||||
let mut text_parts = Vec::new();
|
if role == "system" {
|
||||||
for p in parts {
|
role = "developer".to_string();
|
||||||
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
}
|
||||||
text_parts.push(t.to_string());
|
|
||||||
|
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
|
||||||
|
|
||||||
|
// Map content types based on role for Responses API
|
||||||
|
if let Some(content_array) = content.as_array_mut() {
|
||||||
|
for part in content_array {
|
||||||
|
if let Some(part_obj) = part.as_object_mut() {
|
||||||
|
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
|
||||||
|
match t {
|
||||||
|
"text" => {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
}
|
||||||
|
"image_url" => {
|
||||||
|
// Assistant typically doesn't have image_url in history this way, but for safety:
|
||||||
|
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
if let Some(img_url) = part_obj.remove("image_url") {
|
||||||
|
part_obj.insert("image".to_string(), img_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inputs.push(format!("{}: {}", role, text_parts.join("")));
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(text) = content.as_str() {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
content = serde_json::json!([{ "type": new_type, "text": text }]);
|
||||||
|
}
|
||||||
|
|
||||||
|
input_parts.push(serde_json::json!({
|
||||||
|
"role": role,
|
||||||
|
"content": content
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut body = serde_json::json!({
|
||||||
|
"model": request.model,
|
||||||
|
"input": input_parts,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add standard parameters
|
||||||
|
if let Some(temp) = request.temperature {
|
||||||
|
body["temperature"] = serde_json::json!(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
|
||||||
|
if let Some(max_tokens) = request.max_tokens {
|
||||||
|
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
|
||||||
|
body["max_output_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
} else {
|
||||||
|
body["max_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(tools) = &request.tools {
|
||||||
|
body["tools"] = serde_json::json!(tools);
|
||||||
}
|
}
|
||||||
let input_text = inputs.join("\n");
|
|
||||||
|
|
||||||
let resp = self
|
let resp = self
|
||||||
.client
|
.client
|
||||||
.post(format!("{}/responses", self.config.base_url))
|
.post(format!("{}/responses", self.config.base_url))
|
||||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||||
.json(&serde_json::json!({ "model": request.model, "input": input_text }))
|
.json(&body)
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
||||||
@@ -217,11 +193,16 @@ impl super::Provider for OpenAIProvider {
|
|||||||
|
|
||||||
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Try to normalize: if it's chat-style, use existing parser
|
||||||
|
if resp_json.get("choices").is_some() {
|
||||||
|
return helpers::parse_openai_response(&resp_json, request.model);
|
||||||
|
}
|
||||||
|
|
||||||
// Normalize Responses API output into ProviderResponse
|
// Normalize Responses API output into ProviderResponse
|
||||||
let mut content_text = String::new();
|
let mut content_text = String::new();
|
||||||
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
|
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
|
||||||
if let Some(first) = output.get(0) {
|
for out in output {
|
||||||
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
|
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
|
||||||
for item in contents {
|
for item in contents {
|
||||||
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
if !content_text.is_empty() { content_text.push_str("\n"); }
|
||||||
@@ -266,6 +247,7 @@ impl super::Provider for OpenAIProvider {
|
|||||||
tool_calls: None,
|
tool_calls: None,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens: 0,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens: 0,
|
cache_read_tokens: 0,
|
||||||
cache_write_tokens: 0,
|
cache_write_tokens: 0,
|
||||||
@@ -303,13 +285,18 @@ impl super::Provider for OpenAIProvider {
|
|||||||
&self,
|
&self,
|
||||||
request: UnifiedRequest,
|
request: UnifiedRequest,
|
||||||
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
||||||
|
// Allow proactive routing to Responses API based on heuristic
|
||||||
|
let model_lc = request.model.to_lowercase();
|
||||||
|
if model_lc.contains("gpt-5") || model_lc.contains("codex") {
|
||||||
|
return self.chat_responses_stream(request).await;
|
||||||
|
}
|
||||||
|
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut body = helpers::build_openai_body(&request, messages_json, true);
|
let mut body = helpers::build_openai_body(&request, messages_json, true);
|
||||||
|
|
||||||
// Standard OpenAI cleanup
|
// Standard OpenAI cleanup
|
||||||
if let Some(obj) = body.as_object_mut() {
|
if let Some(obj) = body.as_object_mut() {
|
||||||
obj.remove("stream_options");
|
// stream_options.include_usage is supported by OpenAI for token usage in streaming
|
||||||
|
|
||||||
// Transition: Newer OpenAI models (o1, o3, gpt-5) require max_completion_tokens
|
// Transition: Newer OpenAI models (o1, o3, gpt-5) require max_completion_tokens
|
||||||
if request.model.starts_with("o1-") || request.model.starts_with("o3-") || request.model.contains("gpt-5") {
|
if request.model.starts_with("o1-") || request.model.starts_with("o3-") || request.model.contains("gpt-5") {
|
||||||
if let Some(max_tokens) = obj.remove("max_tokens") {
|
if let Some(max_tokens) = obj.remove("max_tokens") {
|
||||||
@@ -383,4 +370,195 @@ impl super::Provider for OpenAIProvider {
|
|||||||
|
|
||||||
Ok(Box::pin(stream))
|
Ok(Box::pin(stream))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn chat_responses_stream(
|
||||||
|
&self,
|
||||||
|
request: UnifiedRequest,
|
||||||
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
||||||
|
// Build a structured input for the Responses API.
|
||||||
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
|
let mut input_parts = Vec::new();
|
||||||
|
for m in &messages_json {
|
||||||
|
let mut role = m["role"].as_str().unwrap_or("user").to_string();
|
||||||
|
// Newer models (gpt-5, o1) prefer "developer" over "system"
|
||||||
|
if role == "system" {
|
||||||
|
role = "developer".to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
|
||||||
|
|
||||||
|
// Map content types based on role for Responses API
|
||||||
|
if let Some(content_array) = content.as_array_mut() {
|
||||||
|
for part in content_array {
|
||||||
|
if let Some(part_obj) = part.as_object_mut() {
|
||||||
|
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
|
||||||
|
match t {
|
||||||
|
"text" => {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
}
|
||||||
|
"image_url" => {
|
||||||
|
// Assistant typically doesn't have image_url in history this way, but for safety:
|
||||||
|
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
if let Some(img_url) = part_obj.remove("image_url") {
|
||||||
|
part_obj.insert("image".to_string(), img_url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(text) = content.as_str() {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
content = serde_json::json!([{ "type": new_type, "text": text }]);
|
||||||
|
}
|
||||||
|
|
||||||
|
input_parts.push(serde_json::json!({
|
||||||
|
"role": role,
|
||||||
|
"content": content
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut body = serde_json::json!({
|
||||||
|
"model": request.model,
|
||||||
|
"input": input_parts,
|
||||||
|
"stream": true,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add standard parameters
|
||||||
|
if let Some(temp) = request.temperature {
|
||||||
|
body["temperature"] = serde_json::json!(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
|
||||||
|
if let Some(max_tokens) = request.max_tokens {
|
||||||
|
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
|
||||||
|
body["max_output_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
} else {
|
||||||
|
body["max_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let url = format!("{}/responses", self.config.base_url);
|
||||||
|
let api_key = self.api_key.clone();
|
||||||
|
let model = request.model.clone();
|
||||||
|
let probe_client = self.client.clone();
|
||||||
|
let probe_body = body.clone();
|
||||||
|
|
||||||
|
let es = reqwest_eventsource::EventSource::new(
|
||||||
|
self.client
|
||||||
|
.post(&url)
|
||||||
|
.header("Authorization", format!("Bearer {}", api_key))
|
||||||
|
.header("Accept", "text/event-stream")
|
||||||
|
.json(&body),
|
||||||
|
)
|
||||||
|
.map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?;
|
||||||
|
|
||||||
|
let stream = async_stream::try_stream! {
|
||||||
|
let mut es = es;
|
||||||
|
while let Some(event) = es.next().await {
|
||||||
|
match event {
|
||||||
|
Ok(reqwest_eventsource::Event::Message(msg)) => {
|
||||||
|
if msg.data == "[DONE]" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let chunk: serde_json::Value = serde_json::from_str(&msg.data)
|
||||||
|
.map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?;
|
||||||
|
|
||||||
|
// Try standard OpenAI parsing first (choices/usage)
|
||||||
|
if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) {
|
||||||
|
yield p_chunk?;
|
||||||
|
} else {
|
||||||
|
// Responses API specific parsing for streaming
|
||||||
|
let mut content = String::new();
|
||||||
|
let mut finish_reason = None;
|
||||||
|
|
||||||
|
let event_type = chunk.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
|
||||||
|
match event_type {
|
||||||
|
"response.output_text.delta" => {
|
||||||
|
if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
|
||||||
|
content.push_str(delta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"response.output_text.done" => {
|
||||||
|
if let Some(text) = chunk.get("text").and_then(|v| v.as_str()) {
|
||||||
|
// Some implementations send the full text at the end
|
||||||
|
// We usually prefer deltas, but if we haven't seen them, this is the fallback.
|
||||||
|
// However, if we're already yielding deltas, we might not want this.
|
||||||
|
// For now, let's just use it as a signal that we're done.
|
||||||
|
finish_reason = Some("stop".to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"response.done" => {
|
||||||
|
finish_reason = Some("stop".to_string());
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Fallback to older nested structure if present
|
||||||
|
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
|
||||||
|
for out in output {
|
||||||
|
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
|
||||||
|
for item in contents {
|
||||||
|
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
||||||
|
content.push_str(text);
|
||||||
|
} else if let Some(delta) = item.get("delta").and_then(|d| d.get("text")).and_then(|t| t.as_str()) {
|
||||||
|
content.push_str(delta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !content.is_empty() || finish_reason.is_some() {
|
||||||
|
yield ProviderStreamChunk {
|
||||||
|
content,
|
||||||
|
reasoning_content: None,
|
||||||
|
finish_reason,
|
||||||
|
tool_calls: None,
|
||||||
|
model: model.clone(),
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(_) => continue,
|
||||||
|
Err(e) => {
|
||||||
|
// Attempt to probe for the actual error body
|
||||||
|
let probe_resp = probe_client
|
||||||
|
.post(&url)
|
||||||
|
.header("Authorization", format!("Bearer {}", api_key))
|
||||||
|
.header("Accept", "application/json") // Ask for JSON during probe
|
||||||
|
.json(&probe_body)
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match probe_resp {
|
||||||
|
Ok(r) => {
|
||||||
|
let status = r.status();
|
||||||
|
let body = r.text().await.unwrap_or_default();
|
||||||
|
if status.is_success() {
|
||||||
|
tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", body);
|
||||||
|
Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", body)))?;
|
||||||
|
} else {
|
||||||
|
tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, body);
|
||||||
|
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, body)))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(probe_err) => {
|
||||||
|
tracing::error!("OpenAI Responses Stream Error Probe failed: {}", probe_err);
|
||||||
|
Err(AppError::ProviderError(format!("Responses stream error (probe failed: {}): {}", probe_err, e)))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Box::pin(stream))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -184,10 +184,12 @@ pub struct RateLimitManager {
|
|||||||
impl RateLimitManager {
|
impl RateLimitManager {
|
||||||
pub fn new(config: RateLimiterConfig, circuit_config: CircuitBreakerConfig) -> Self {
|
pub fn new(config: RateLimiterConfig, circuit_config: CircuitBreakerConfig) -> Self {
|
||||||
// Create global rate limiter quota
|
// Create global rate limiter quota
|
||||||
|
// Use a much larger burst size for the global bucket to handle concurrent dashboard load
|
||||||
|
let global_burst = config.global_requests_per_minute / 6; // e.g., 100 for 600 req/min
|
||||||
let global_quota = Quota::per_minute(
|
let global_quota = Quota::per_minute(
|
||||||
NonZeroU32::new(config.global_requests_per_minute).expect("global_requests_per_minute must be positive")
|
NonZeroU32::new(config.global_requests_per_minute).expect("global_requests_per_minute must be positive")
|
||||||
)
|
)
|
||||||
.allow_burst(NonZeroU32::new(config.burst_size).expect("burst_size must be positive"));
|
.allow_burst(NonZeroU32::new(global_burst).expect("global_burst must be positive"));
|
||||||
let global_bucket = RateLimiter::direct(global_quota);
|
let global_bucket = RateLimiter::direct(global_quota);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
|
|||||||
@@ -137,8 +137,23 @@ async fn get_model_cost(
|
|||||||
// Check in-memory cache for cost overrides (no SQLite hit)
|
// Check in-memory cache for cost overrides (no SQLite hit)
|
||||||
if let Some(cached) = state.model_config_cache.get(model).await {
|
if let Some(cached) = state.model_config_cache.get(model).await {
|
||||||
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
||||||
// Manual overrides don't have cache-specific rates, so use simple formula
|
// Manual overrides logic: if cache rates are provided, use cache-aware formula.
|
||||||
return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
// Formula: (non_cached_prompt * input_rate) + (cache_read * read_rate) + (cache_write * write_rate) + (completion * output_rate)
|
||||||
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
let mut total = (non_cached_prompt as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
||||||
|
|
||||||
|
if let Some(cr) = cached.cache_read_cost_per_m {
|
||||||
|
total += cache_read_tokens as f64 * cr / 1_000_000.0;
|
||||||
|
} else {
|
||||||
|
// No manual cache_read rate — charge cached tokens at full input rate (backwards compatibility)
|
||||||
|
total += cache_read_tokens as f64 * p / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(cw) = cached.cache_write_cost_per_m {
|
||||||
|
total += cache_write_tokens as f64 * cw / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return total;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,7 +241,15 @@ async fn chat_completions(
|
|||||||
let prompt_tokens = crate::utils::tokens::estimate_request_tokens(&model, &unified_request);
|
let prompt_tokens = crate::utils::tokens::estimate_request_tokens(&model, &unified_request);
|
||||||
|
|
||||||
// Handle streaming response
|
// Handle streaming response
|
||||||
let stream_result = provider.chat_completion_stream(unified_request).await;
|
// Allow provider-specific routing for streaming too
|
||||||
|
let use_responses = provider.name() == "openai"
|
||||||
|
&& crate::utils::registry::model_prefers_responses(&state.model_registry, &unified_request.model);
|
||||||
|
|
||||||
|
let stream_result = if use_responses {
|
||||||
|
provider.chat_responses_stream(unified_request).await
|
||||||
|
} else {
|
||||||
|
provider.chat_completion_stream(unified_request).await
|
||||||
|
};
|
||||||
|
|
||||||
match stream_result {
|
match stream_result {
|
||||||
Ok(stream) => {
|
Ok(stream) => {
|
||||||
@@ -289,6 +312,14 @@ async fn chat_completions(
|
|||||||
},
|
},
|
||||||
finish_reason: chunk.finish_reason,
|
finish_reason: chunk.finish_reason,
|
||||||
}],
|
}],
|
||||||
|
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
|
||||||
|
prompt_tokens: u.prompt_tokens,
|
||||||
|
completion_tokens: u.completion_tokens,
|
||||||
|
total_tokens: u.total_tokens,
|
||||||
|
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
|
||||||
|
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
|
||||||
|
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
|
||||||
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Use axum's Event directly, wrap in Ok
|
// Use axum's Event directly, wrap in Ok
|
||||||
@@ -360,6 +391,7 @@ async fn chat_completions(
|
|||||||
model: response.model.clone(),
|
model: response.model.clone(),
|
||||||
prompt_tokens: response.prompt_tokens,
|
prompt_tokens: response.prompt_tokens,
|
||||||
completion_tokens: response.completion_tokens,
|
completion_tokens: response.completion_tokens,
|
||||||
|
reasoning_tokens: response.reasoning_tokens,
|
||||||
total_tokens: response.total_tokens,
|
total_tokens: response.total_tokens,
|
||||||
cache_read_tokens: response.cache_read_tokens,
|
cache_read_tokens: response.cache_read_tokens,
|
||||||
cache_write_tokens: response.cache_write_tokens,
|
cache_write_tokens: response.cache_write_tokens,
|
||||||
@@ -400,6 +432,7 @@ async fn chat_completions(
|
|||||||
prompt_tokens: response.prompt_tokens,
|
prompt_tokens: response.prompt_tokens,
|
||||||
completion_tokens: response.completion_tokens,
|
completion_tokens: response.completion_tokens,
|
||||||
total_tokens: response.total_tokens,
|
total_tokens: response.total_tokens,
|
||||||
|
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
|
||||||
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
|
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
|
||||||
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
|
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
|
||||||
}),
|
}),
|
||||||
@@ -429,6 +462,7 @@ async fn chat_completions(
|
|||||||
model: model.clone(),
|
model: model.clone(),
|
||||||
prompt_tokens: 0,
|
prompt_tokens: 0,
|
||||||
completion_tokens: 0,
|
completion_tokens: 0,
|
||||||
|
reasoning_tokens: 0,
|
||||||
total_tokens: 0,
|
total_tokens: 0,
|
||||||
cache_read_tokens: 0,
|
cache_read_tokens: 0,
|
||||||
cache_write_tokens: 0,
|
cache_write_tokens: 0,
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ pub struct CachedModelConfig {
|
|||||||
pub mapping: Option<String>,
|
pub mapping: Option<String>,
|
||||||
pub prompt_cost_per_m: Option<f64>,
|
pub prompt_cost_per_m: Option<f64>,
|
||||||
pub completion_cost_per_m: Option<f64>,
|
pub completion_cost_per_m: Option<f64>,
|
||||||
|
pub cache_read_cost_per_m: Option<f64>,
|
||||||
|
pub cache_write_cost_per_m: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// In-memory cache for model_configs table.
|
/// In-memory cache for model_configs table.
|
||||||
@@ -35,15 +37,15 @@ impl ModelConfigCache {
|
|||||||
|
|
||||||
/// Load all model configs from the database into cache
|
/// Load all model configs from the database into cache
|
||||||
pub async fn refresh(&self) {
|
pub async fn refresh(&self) {
|
||||||
match sqlx::query_as::<_, (String, bool, Option<String>, Option<f64>, Option<f64>)>(
|
match sqlx::query_as::<_, (String, bool, Option<String>, Option<f64>, Option<f64>, Option<f64>, Option<f64>)>(
|
||||||
"SELECT id, enabled, mapping, prompt_cost_per_m, completion_cost_per_m FROM model_configs",
|
"SELECT id, enabled, mapping, prompt_cost_per_m, completion_cost_per_m, cache_read_cost_per_m, cache_write_cost_per_m FROM model_configs",
|
||||||
)
|
)
|
||||||
.fetch_all(&self.db_pool)
|
.fetch_all(&self.db_pool)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(rows) => {
|
Ok(rows) => {
|
||||||
let mut map = HashMap::with_capacity(rows.len());
|
let mut map = HashMap::with_capacity(rows.len());
|
||||||
for (id, enabled, mapping, prompt_cost, completion_cost) in rows {
|
for (id, enabled, mapping, prompt_cost, completion_cost, cache_read_cost, cache_write_cost) in rows {
|
||||||
map.insert(
|
map.insert(
|
||||||
id,
|
id,
|
||||||
CachedModelConfig {
|
CachedModelConfig {
|
||||||
@@ -51,6 +53,8 @@ impl ModelConfigCache {
|
|||||||
mapping,
|
mapping,
|
||||||
prompt_cost_per_m: prompt_cost,
|
prompt_cost_per_m: prompt_cost,
|
||||||
completion_cost_per_m: completion_cost,
|
completion_cost_per_m: completion_cost,
|
||||||
|
cache_read_cost_per_m: cache_read_cost,
|
||||||
|
cache_write_cost_per_m: cache_write_cost,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -96,11 +96,12 @@ where
|
|||||||
// Spawn a background task to log the completion
|
// Spawn a background task to log the completion
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
// Use real usage from the provider when available, otherwise fall back to estimates
|
// Use real usage from the provider when available, otherwise fall back to estimates
|
||||||
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
|
let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
|
||||||
if let Some(usage) = &real_usage {
|
if let Some(usage) = &real_usage {
|
||||||
(
|
(
|
||||||
usage.prompt_tokens,
|
usage.prompt_tokens,
|
||||||
usage.completion_tokens,
|
usage.completion_tokens,
|
||||||
|
usage.reasoning_tokens,
|
||||||
usage.total_tokens,
|
usage.total_tokens,
|
||||||
usage.cache_read_tokens,
|
usage.cache_read_tokens,
|
||||||
usage.cache_write_tokens,
|
usage.cache_write_tokens,
|
||||||
@@ -109,6 +110,7 @@ where
|
|||||||
(
|
(
|
||||||
estimated_prompt_tokens,
|
estimated_prompt_tokens,
|
||||||
estimated_completion,
|
estimated_completion,
|
||||||
|
estimated_reasoning_tokens,
|
||||||
estimated_prompt_tokens + estimated_completion,
|
estimated_prompt_tokens + estimated_completion,
|
||||||
0u32,
|
0u32,
|
||||||
0u32,
|
0u32,
|
||||||
@@ -118,8 +120,22 @@ where
|
|||||||
// Check in-memory cache for cost overrides (no SQLite hit)
|
// Check in-memory cache for cost overrides (no SQLite hit)
|
||||||
let cost = if let Some(cached) = config_cache.get(&model).await {
|
let cost = if let Some(cached) = config_cache.get(&model).await {
|
||||||
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
||||||
// Cost override doesn't have cache-aware pricing, use simple formula
|
// Manual overrides logic: if cache rates are provided, use cache-aware formula.
|
||||||
(prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0)
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
let mut total = (non_cached_prompt as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
||||||
|
|
||||||
|
if let Some(cr) = cached.cache_read_cost_per_m {
|
||||||
|
total += cache_read_tokens as f64 * cr / 1_000_000.0;
|
||||||
|
} else {
|
||||||
|
// Charge cached tokens at full input rate if no specific rate provided
|
||||||
|
total += cache_read_tokens as f64 * p / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(cw) = cached.cache_write_cost_per_m {
|
||||||
|
total += cache_write_tokens as f64 * cw / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
total
|
||||||
} else {
|
} else {
|
||||||
provider.calculate_cost(
|
provider.calculate_cost(
|
||||||
&model,
|
&model,
|
||||||
@@ -149,6 +165,7 @@ where
|
|||||||
model,
|
model,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
|
|||||||
@@ -61,9 +61,9 @@
|
|||||||
--text-white: var(--fg0);
|
--text-white: var(--fg0);
|
||||||
|
|
||||||
/* Borders */
|
/* Borders */
|
||||||
--border-color: var(--bg2);
|
--border-color: var(--bg3);
|
||||||
--border-radius: 8px;
|
--border-radius: 0px;
|
||||||
--border-radius-sm: 4px;
|
--border-radius-sm: 0px;
|
||||||
|
|
||||||
/* Spacing System */
|
/* Spacing System */
|
||||||
--spacing-xs: 0.25rem;
|
--spacing-xs: 0.25rem;
|
||||||
@@ -72,15 +72,15 @@
|
|||||||
--spacing-lg: 1.5rem;
|
--spacing-lg: 1.5rem;
|
||||||
--spacing-xl: 2rem;
|
--spacing-xl: 2rem;
|
||||||
|
|
||||||
/* Shadows */
|
/* Shadows - Retro Block Style */
|
||||||
--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.2);
|
--shadow-sm: 2px 2px 0px rgba(0, 0, 0, 0.4);
|
||||||
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3);
|
--shadow: 4px 4px 0px rgba(0, 0, 0, 0.5);
|
||||||
--shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.4);
|
--shadow-md: 6px 6px 0px rgba(0, 0, 0, 0.6);
|
||||||
--shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.5);
|
--shadow-lg: 8px 8px 0px rgba(0, 0, 0, 0.7);
|
||||||
}
|
}
|
||||||
|
|
||||||
body {
|
body {
|
||||||
font-family: 'Inter', -apple-system, sans-serif;
|
font-family: 'JetBrains Mono', 'Fira Code', 'Courier New', monospace;
|
||||||
background-color: var(--bg-primary);
|
background-color: var(--bg-primary);
|
||||||
color: var(--text-primary);
|
color: var(--text-primary);
|
||||||
line-height: 1.6;
|
line-height: 1.6;
|
||||||
@@ -105,12 +105,12 @@ body {
|
|||||||
|
|
||||||
.login-card {
|
.login-card {
|
||||||
background: var(--bg1);
|
background: var(--bg1);
|
||||||
border-radius: 24px;
|
border-radius: var(--border-radius);
|
||||||
padding: 4rem 2.5rem 3rem;
|
padding: 4rem 2.5rem 3rem;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
max-width: 440px;
|
max-width: 440px;
|
||||||
box-shadow: var(--shadow-lg);
|
box-shadow: var(--shadow-lg);
|
||||||
border: 1px solid var(--bg2);
|
border: 2px solid var(--bg3);
|
||||||
text-align: center;
|
text-align: center;
|
||||||
animation: slideUp 0.6s cubic-bezier(0.34, 1.56, 0.64, 1);
|
animation: slideUp 0.6s cubic-bezier(0.34, 1.56, 0.64, 1);
|
||||||
position: relative;
|
position: relative;
|
||||||
@@ -191,7 +191,7 @@ body {
|
|||||||
color: var(--fg3);
|
color: var(--fg3);
|
||||||
pointer-events: none;
|
pointer-events: none;
|
||||||
transition: all 0.25s ease;
|
transition: all 0.25s ease;
|
||||||
background: var(--bg1);
|
background: transparent;
|
||||||
padding: 0 0.375rem;
|
padding: 0 0.375rem;
|
||||||
z-index: 2;
|
z-index: 2;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
@@ -202,30 +202,32 @@ body {
|
|||||||
|
|
||||||
.form-group input:focus ~ label,
|
.form-group input:focus ~ label,
|
||||||
.form-group input:not(:placeholder-shown) ~ label {
|
.form-group input:not(:placeholder-shown) ~ label {
|
||||||
top: -0.625rem;
|
top: 0;
|
||||||
left: 0.875rem;
|
left: 0.875rem;
|
||||||
font-size: 0.7rem;
|
font-size: 0.75rem;
|
||||||
color: var(--orange);
|
color: var(--orange);
|
||||||
font-weight: 600;
|
font-weight: 600;
|
||||||
transform: translateY(0);
|
transform: translateY(-50%);
|
||||||
|
background: linear-gradient(180deg, var(--bg1) 50%, var(--bg0) 50%);
|
||||||
}
|
}
|
||||||
|
|
||||||
.form-group input {
|
.form-group input {
|
||||||
padding: 1rem 1.25rem;
|
padding: 1rem 1.25rem;
|
||||||
background: var(--bg0);
|
background: var(--bg0);
|
||||||
border: 2px solid var(--bg3);
|
border: 2px solid var(--bg3);
|
||||||
border-radius: 12px;
|
border-radius: var(--border-radius);
|
||||||
|
font-family: inherit;
|
||||||
font-size: 1rem;
|
font-size: 1rem;
|
||||||
color: var(--fg1);
|
color: var(--fg1);
|
||||||
transition: all 0.3s;
|
transition: all 0.2s;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
}
|
}
|
||||||
|
|
||||||
.form-group input:focus {
|
.form-group input:focus {
|
||||||
border-color: var(--orange);
|
border-color: var(--orange);
|
||||||
box-shadow: 0 0 0 4px rgba(214, 93, 14, 0.2);
|
|
||||||
outline: none;
|
outline: none;
|
||||||
|
box-shadow: 4px 4px 0px rgba(214, 93, 14, 0.4);
|
||||||
}
|
}
|
||||||
|
|
||||||
.login-btn {
|
.login-btn {
|
||||||
@@ -732,11 +734,11 @@ body {
|
|||||||
.stat-change.positive { color: var(--green-light); }
|
.stat-change.positive { color: var(--green-light); }
|
||||||
.stat-change.negative { color: var(--red-light); }
|
.stat-change.negative { color: var(--red-light); }
|
||||||
|
|
||||||
/* Generic Cards */
|
/* Cards */
|
||||||
.card {
|
.card {
|
||||||
background: var(--bg1);
|
background: var(--bg1);
|
||||||
border-radius: var(--border-radius);
|
border-radius: var(--border-radius);
|
||||||
border: 1px solid var(--bg2);
|
border: 1px solid var(--bg3);
|
||||||
box-shadow: var(--shadow-sm);
|
box-shadow: var(--shadow-sm);
|
||||||
margin-bottom: 1.5rem;
|
margin-bottom: 1.5rem;
|
||||||
display: flex;
|
display: flex;
|
||||||
@@ -749,6 +751,15 @@ body {
|
|||||||
display: flex;
|
display: flex;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card-actions {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
flex-wrap: wrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
.card-title {
|
.card-title {
|
||||||
@@ -817,25 +828,26 @@ body {
|
|||||||
/* Badges */
|
/* Badges */
|
||||||
.status-badge {
|
.status-badge {
|
||||||
padding: 0.25rem 0.75rem;
|
padding: 0.25rem 0.75rem;
|
||||||
border-radius: 9999px;
|
border-radius: var(--border-radius);
|
||||||
font-size: 0.7rem;
|
font-size: 0.7rem;
|
||||||
font-weight: 700;
|
font-weight: 700;
|
||||||
text-transform: uppercase;
|
text-transform: uppercase;
|
||||||
display: inline-flex;
|
display: inline-flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
gap: 0.375rem;
|
gap: 0.375rem;
|
||||||
|
border: 1px solid transparent;
|
||||||
}
|
}
|
||||||
|
|
||||||
.status-badge.online, .status-badge.success { background: rgba(184, 187, 38, 0.2); color: var(--green-light); }
|
.status-badge.online, .status-badge.success { background: rgba(184, 187, 38, 0.2); color: var(--green-light); border-color: rgba(184, 187, 38, 0.4); }
|
||||||
.status-badge.offline, .status-badge.danger { background: rgba(251, 73, 52, 0.2); color: var(--red-light); }
|
.status-badge.offline, .status-badge.danger { background: rgba(251, 73, 52, 0.2); color: var(--red-light); border-color: rgba(251, 73, 52, 0.4); }
|
||||||
.status-badge.warning { background: rgba(250, 189, 47, 0.2); color: var(--yellow-light); }
|
.status-badge.warning { background: rgba(250, 189, 47, 0.2); color: var(--yellow-light); border-color: rgba(250, 189, 47, 0.4); }
|
||||||
|
|
||||||
.badge-client {
|
.badge-client {
|
||||||
background: var(--bg2);
|
background: var(--bg2);
|
||||||
color: var(--blue-light);
|
color: var(--blue-light);
|
||||||
padding: 2px 8px;
|
padding: 2px 8px;
|
||||||
border-radius: 6px;
|
border-radius: var(--border-radius);
|
||||||
font-family: monospace;
|
font-family: inherit;
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
border: 1px solid var(--bg3);
|
border: 1px solid var(--bg3);
|
||||||
}
|
}
|
||||||
@@ -889,7 +901,7 @@ body {
|
|||||||
width: 100%;
|
width: 100%;
|
||||||
background: var(--bg0);
|
background: var(--bg0);
|
||||||
border: 1px solid var(--bg3);
|
border: 1px solid var(--bg3);
|
||||||
border-radius: 8px;
|
border-radius: var(--border-radius);
|
||||||
padding: 0.75rem;
|
padding: 0.75rem;
|
||||||
font-family: inherit;
|
font-family: inherit;
|
||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
@@ -900,7 +912,7 @@ body {
|
|||||||
.form-control input:focus, .form-control textarea:focus, .form-control select:focus {
|
.form-control input:focus, .form-control textarea:focus, .form-control select:focus {
|
||||||
outline: none;
|
outline: none;
|
||||||
border-color: var(--orange);
|
border-color: var(--orange);
|
||||||
box-shadow: 0 0 0 2px rgba(214, 93, 14, 0.2);
|
box-shadow: 2px 2px 0px rgba(214, 93, 14, 0.4);
|
||||||
}
|
}
|
||||||
|
|
||||||
.btn {
|
.btn {
|
||||||
@@ -908,21 +920,27 @@ body {
|
|||||||
align-items: center;
|
align-items: center;
|
||||||
gap: 0.5rem;
|
gap: 0.5rem;
|
||||||
padding: 0.625rem 1.25rem;
|
padding: 0.625rem 1.25rem;
|
||||||
border-radius: 8px;
|
border-radius: var(--border-radius);
|
||||||
font-weight: 600;
|
font-weight: 600;
|
||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
transition: all 0.2s;
|
transition: all 0.1s;
|
||||||
border: 1px solid transparent;
|
border: 1px solid transparent;
|
||||||
|
text-transform: uppercase;
|
||||||
}
|
}
|
||||||
|
|
||||||
.btn-primary { background: var(--orange); color: var(--bg0); }
|
.btn:active {
|
||||||
|
transform: translate(2px, 2px);
|
||||||
|
box-shadow: none !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-primary { background: var(--orange); color: var(--bg0); box-shadow: 2px 2px 0px var(--bg4); }
|
||||||
.btn-primary:hover { background: var(--orange-light); }
|
.btn-primary:hover { background: var(--orange-light); }
|
||||||
|
|
||||||
.btn-secondary { background: var(--bg2); border-color: var(--bg3); color: var(--fg1); }
|
.btn-secondary { background: var(--bg2); border-color: var(--bg3); color: var(--fg1); box-shadow: 2px 2px 0px var(--bg0); }
|
||||||
.btn-secondary:hover { background: var(--bg3); color: var(--fg0); }
|
.btn-secondary:hover { background: var(--bg3); color: var(--fg0); }
|
||||||
|
|
||||||
.btn-danger { background: var(--red); color: var(--fg0); }
|
.btn-danger { background: var(--red); color: var(--fg0); box-shadow: 2px 2px 0px var(--bg4); }
|
||||||
.btn-danger:hover { background: var(--red-light); }
|
.btn-danger:hover { background: var(--red-light); }
|
||||||
|
|
||||||
/* Small inline action buttons (edit, delete, copy) */
|
/* Small inline action buttons (edit, delete, copy) */
|
||||||
@@ -981,13 +999,13 @@ body {
|
|||||||
|
|
||||||
.modal-content {
|
.modal-content {
|
||||||
background: var(--bg1);
|
background: var(--bg1);
|
||||||
border-radius: 16px;
|
border-radius: var(--border-radius);
|
||||||
width: 90%;
|
width: 90%;
|
||||||
max-width: 500px;
|
max-width: 500px;
|
||||||
box-shadow: var(--shadow-lg);
|
box-shadow: var(--shadow-lg);
|
||||||
border: 1px solid var(--bg3);
|
border: 2px solid var(--bg3);
|
||||||
transform: translateY(20px);
|
transform: translateY(20px);
|
||||||
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
transition: all 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.modal.active .modal-content {
|
.modal.active .modal-content {
|
||||||
|
|||||||
@@ -4,11 +4,11 @@
|
|||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>LLM Proxy Gateway - Admin Dashboard</title>
|
<title>LLM Proxy Gateway - Admin Dashboard</title>
|
||||||
<link rel="stylesheet" href="/css/dashboard.css?v=7">
|
<link rel="stylesheet" href="/css/dashboard.css?v=11">
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
||||||
<link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any">
|
<link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any">
|
||||||
<link rel="apple-touch-icon" href="img/logo-icon.png">
|
<link rel="apple-touch-icon" href="img/logo-icon.png">
|
||||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
<link href="https://fonts.googleapis.com/css2?family=Fira+Code:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
|
||||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/luxon@3.4.4/build/global/luxon.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/luxon@3.4.4/build/global/luxon.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
@@ -17,12 +17,11 @@
|
|||||||
<div id="login-screen" class="login-container">
|
<div id="login-screen" class="login-container">
|
||||||
<div class="login-card">
|
<div class="login-card">
|
||||||
<div class="login-header">
|
<div class="login-header">
|
||||||
<img src="img/logo-full.png" alt="LLM Proxy Logo" class="login-logo" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
|
<i class="fas fa-terminal login-logo-fallback"></i>
|
||||||
<i class="fas fa-robot login-logo-fallback" style="display: none;"></i>
|
|
||||||
<h1>LLM Proxy Gateway</h1>
|
<h1>LLM Proxy Gateway</h1>
|
||||||
<p class="login-subtitle">Admin Dashboard</p>
|
<p class="login-subtitle">Admin Dashboard</p>
|
||||||
</div>
|
</div>
|
||||||
<form id="login-form" class="login-form">
|
<form id="login-form" class="login-form" onsubmit="event.preventDefault();">
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<input type="text" id="username" name="username" placeholder=" " required>
|
<input type="text" id="username" name="username" placeholder=" " required>
|
||||||
<label for="username">
|
<label for="username">
|
||||||
|
|||||||
@@ -32,6 +32,17 @@ class ApiClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!response.ok || !result.success) {
|
if (!response.ok || !result.success) {
|
||||||
|
// Handle authentication errors (session expired, server restarted, etc.)
|
||||||
|
if (response.status === 401 ||
|
||||||
|
result.error === 'Session expired or invalid' ||
|
||||||
|
result.error === 'Not authenticated' ||
|
||||||
|
result.error === 'Admin access required') {
|
||||||
|
|
||||||
|
if (window.authManager) {
|
||||||
|
// Try to logout to clear local state and show login screen
|
||||||
|
window.authManager.logout();
|
||||||
|
}
|
||||||
|
}
|
||||||
throw new Error(result.error || `HTTP error! status: ${response.status}`);
|
throw new Error(result.error || `HTTP error! status: ${response.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -285,7 +285,30 @@ class Dashboard {
|
|||||||
<p class="card-subtitle">Manage model availability and custom pricing</p>
|
<p class="card-subtitle">Manage model availability and custom pricing</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-actions">
|
<div class="card-actions">
|
||||||
<input type="text" id="model-search" placeholder="Search models..." class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: 250px;">
|
<select id="model-provider-filter" class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: auto;">
|
||||||
|
<option value="">All Providers</option>
|
||||||
|
<option value="openai">OpenAI</option>
|
||||||
|
<option value="anthropic">Anthropic / Gemini</option>
|
||||||
|
<option value="google">Google</option>
|
||||||
|
<option value="deepseek">DeepSeek</option>
|
||||||
|
<option value="xai">xAI</option>
|
||||||
|
<option value="meta">Meta</option>
|
||||||
|
<option value="cohere">Cohere</option>
|
||||||
|
<option value="mistral">Mistral</option>
|
||||||
|
<option value="other">Other</option>
|
||||||
|
</select>
|
||||||
|
<select id="model-modality-filter" class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: auto;">
|
||||||
|
<option value="">All Modalities</option>
|
||||||
|
<option value="text">Text</option>
|
||||||
|
<option value="image">Vision/Image</option>
|
||||||
|
<option value="audio">Audio</option>
|
||||||
|
</select>
|
||||||
|
<select id="model-capability-filter" class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: auto;">
|
||||||
|
<option value="">All Capabilities</option>
|
||||||
|
<option value="tool_call">Tool Calling</option>
|
||||||
|
<option value="reasoning">Reasoning</option>
|
||||||
|
</select>
|
||||||
|
<input type="text" id="model-search" placeholder="Search models..." class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: 200px;">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="table-container">
|
<div class="table-container">
|
||||||
|
|||||||
@@ -38,6 +38,24 @@ class LogsPage {
|
|||||||
const statusClass = log.status === 'success' ? 'success' : 'danger';
|
const statusClass = log.status === 'success' ? 'success' : 'danger';
|
||||||
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
|
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
|
||||||
|
|
||||||
|
let tokenDetails = `${log.tokens} total tokens`;
|
||||||
|
if (log.status === 'success') {
|
||||||
|
const parts = [];
|
||||||
|
parts.push(`${log.prompt_tokens} in`);
|
||||||
|
|
||||||
|
let completionStr = `${log.completion_tokens} out`;
|
||||||
|
if (log.reasoning_tokens > 0) {
|
||||||
|
completionStr += ` (${log.reasoning_tokens} reasoning)`;
|
||||||
|
}
|
||||||
|
parts.push(completionStr);
|
||||||
|
|
||||||
|
if (log.cache_read_tokens > 0) {
|
||||||
|
parts.push(`${log.cache_read_tokens} cache-hit`);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenDetails = parts.join(', ');
|
||||||
|
}
|
||||||
|
|
||||||
return `
|
return `
|
||||||
<tr class="log-row">
|
<tr class="log-row">
|
||||||
<td class="whitespace-nowrap">${timestamp}</td>
|
<td class="whitespace-nowrap">${timestamp}</td>
|
||||||
@@ -55,7 +73,7 @@ class LogsPage {
|
|||||||
<td>
|
<td>
|
||||||
<div class="log-message-container">
|
<div class="log-message-container">
|
||||||
<code class="log-model">${log.model}</code>
|
<code class="log-model">${log.model}</code>
|
||||||
<span class="log-tokens">${log.tokens} tokens</span>
|
<span class="log-tokens" title="${log.tokens} total tokens">${tokenDetails}</span>
|
||||||
<span class="log-duration">${log.duration}ms</span>
|
<span class="log-duration">${log.duration}ms</span>
|
||||||
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
|
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -31,13 +31,58 @@ class ModelsPage {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const searchInput = document.getElementById('model-search');
|
||||||
|
const providerFilter = document.getElementById('model-provider-filter');
|
||||||
|
const modalityFilter = document.getElementById('model-modality-filter');
|
||||||
|
const capabilityFilter = document.getElementById('model-capability-filter');
|
||||||
|
|
||||||
|
const q = searchInput ? searchInput.value.toLowerCase() : '';
|
||||||
|
const providerVal = providerFilter ? providerFilter.value : '';
|
||||||
|
const modalityVal = modalityFilter ? modalityFilter.value : '';
|
||||||
|
const capabilityVal = capabilityFilter ? capabilityFilter.value : '';
|
||||||
|
|
||||||
|
// Apply filters non-destructively
|
||||||
|
let filteredModels = this.models.filter(m => {
|
||||||
|
// Text search
|
||||||
|
if (q && !(m.id.toLowerCase().includes(q) || m.name.toLowerCase().includes(q) || m.provider.toLowerCase().includes(q))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provider filter
|
||||||
|
if (providerVal) {
|
||||||
|
if (providerVal === 'other') {
|
||||||
|
const known = ['openai', 'anthropic', 'google', 'deepseek', 'xai', 'meta', 'cohere', 'mistral'];
|
||||||
|
if (known.includes(m.provider.toLowerCase())) return false;
|
||||||
|
} else if (m.provider.toLowerCase() !== providerVal) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Modality filter
|
||||||
|
if (modalityVal) {
|
||||||
|
const mods = m.modalities && m.modalities.input ? m.modalities.input.map(x => x.toLowerCase()) : [];
|
||||||
|
if (!mods.includes(modalityVal)) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capability filter
|
||||||
|
if (capabilityVal === 'tool_call' && !m.tool_call) return false;
|
||||||
|
if (capabilityVal === 'reasoning' && !m.reasoning) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (filteredModels.length === 0) {
|
||||||
|
tableBody.innerHTML = '<tr><td colspan="7" class="text-center">No models match the selected filters</td></tr>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Sort by provider then name
|
// Sort by provider then name
|
||||||
this.models.sort((a, b) => {
|
filteredModels.sort((a, b) => {
|
||||||
if (a.provider !== b.provider) return a.provider.localeCompare(b.provider);
|
if (a.provider !== b.provider) return a.provider.localeCompare(b.provider);
|
||||||
return a.name.localeCompare(b.name);
|
return a.name.localeCompare(b.name);
|
||||||
});
|
});
|
||||||
|
|
||||||
tableBody.innerHTML = this.models.map(model => {
|
tableBody.innerHTML = filteredModels.map(model => {
|
||||||
const statusClass = model.enabled ? 'success' : 'secondary';
|
const statusClass = model.enabled ? 'success' : 'secondary';
|
||||||
const statusIcon = model.enabled ? 'check-circle' : 'ban';
|
const statusIcon = model.enabled ? 'check-circle' : 'ban';
|
||||||
|
|
||||||
@@ -99,6 +144,14 @@ class ModelsPage {
|
|||||||
<input type="number" id="model-completion-cost" value="${model.completion_cost}" step="0.01">
|
<input type="number" id="model-completion-cost" value="${model.completion_cost}" step="0.01">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-control">
|
||||||
|
<label for="model-cache-read-cost">Cache Read Cost (per 1M tokens)</label>
|
||||||
|
<input type="number" id="model-cache-read-cost" value="${model.cache_read_cost || 0}" step="0.01">
|
||||||
|
</div>
|
||||||
|
<div class="form-control">
|
||||||
|
<label for="model-cache-write-cost">Cache Write Cost (per 1M tokens)</label>
|
||||||
|
<input type="number" id="model-cache-write-cost" value="${model.cache_write_cost || 0}" step="0.01">
|
||||||
|
</div>
|
||||||
<div class="form-control">
|
<div class="form-control">
|
||||||
<label for="model-mapping">Internal Mapping (Optional)</label>
|
<label for="model-mapping">Internal Mapping (Optional)</label>
|
||||||
<input type="text" id="model-mapping" value="${model.mapping || ''}" placeholder="e.g. gpt-4o-2024-05-13">
|
<input type="text" id="model-mapping" value="${model.mapping || ''}" placeholder="e.g. gpt-4o-2024-05-13">
|
||||||
@@ -118,6 +171,8 @@ class ModelsPage {
|
|||||||
const enabled = modal.querySelector('#model-enabled').checked;
|
const enabled = modal.querySelector('#model-enabled').checked;
|
||||||
const promptCost = parseFloat(modal.querySelector('#model-prompt-cost').value);
|
const promptCost = parseFloat(modal.querySelector('#model-prompt-cost').value);
|
||||||
const completionCost = parseFloat(modal.querySelector('#model-completion-cost').value);
|
const completionCost = parseFloat(modal.querySelector('#model-completion-cost').value);
|
||||||
|
const cacheReadCost = parseFloat(modal.querySelector('#model-cache-read-cost').value);
|
||||||
|
const cacheWriteCost = parseFloat(modal.querySelector('#model-cache-write-cost').value);
|
||||||
const mapping = modal.querySelector('#model-mapping').value;
|
const mapping = modal.querySelector('#model-mapping').value;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -125,6 +180,8 @@ class ModelsPage {
|
|||||||
enabled,
|
enabled,
|
||||||
prompt_cost: promptCost,
|
prompt_cost: promptCost,
|
||||||
completion_cost: completionCost,
|
completion_cost: completionCost,
|
||||||
|
cache_read_cost: isNaN(cacheReadCost) ? null : cacheReadCost,
|
||||||
|
cache_write_cost: isNaN(cacheWriteCost) ? null : cacheWriteCost,
|
||||||
mapping: mapping || null
|
mapping: mapping || null
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -138,27 +195,18 @@ class ModelsPage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
setupEventListeners() {
|
setupEventListeners() {
|
||||||
const searchInput = document.getElementById('model-search');
|
const attachFilter = (id) => {
|
||||||
if (searchInput) {
|
const el = document.getElementById(id);
|
||||||
searchInput.oninput = (e) => this.filterModels(e.target.value);
|
if (el) {
|
||||||
}
|
el.addEventListener('input', () => this.renderModelsTable());
|
||||||
|
el.addEventListener('change', () => this.renderModelsTable());
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
filterModels(query) {
|
attachFilter('model-search');
|
||||||
if (!query) {
|
attachFilter('model-provider-filter');
|
||||||
this.renderModelsTable();
|
attachFilter('model-modality-filter');
|
||||||
return;
|
attachFilter('model-capability-filter');
|
||||||
}
|
|
||||||
|
|
||||||
const q = query.toLowerCase();
|
|
||||||
const originalModels = this.models;
|
|
||||||
this.models = this.models.filter(m =>
|
|
||||||
m.id.toLowerCase().includes(q) ||
|
|
||||||
m.name.toLowerCase().includes(q) ||
|
|
||||||
m.provider.toLowerCase().includes(q)
|
|
||||||
);
|
|
||||||
this.renderModelsTable();
|
|
||||||
this.models = originalModels;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user