Compare commits

...

13 Commits

Author SHA1 Message Date
649371154f fix(openai): implement parsing for real-time Responses API streaming format
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 19:00:40 +00:00
78fff61660 fix(openai): map system to developer role and enhance stream diagnostics for Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:50:55 +00:00
b131094dfd fix(openai): improve Responses API streaming reliability and diagnostics
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:45:09 +00:00
c3d81c1733 fix(openai): remove unsupported stream_options from Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:40:06 +00:00
e123f542f1 fix(openai): use max_output_tokens for Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:36:05 +00:00
0d28241e39 fix(openai): enhance Responses API integration with full parameters and improved parsing
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:31:23 +00:00
754ee9cb84 fix(openai): implement role-based content mapping for Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:23:50 +00:00
5a9086b883 fix(openai): map content types for Responses API (v1/responses)
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:18:23 +00:00
cc5eba1957 feat: implement reasoning_tokens tracking and enhanced usage logging
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-11 17:14:49 +00:00
3ab00fb188 fixed tracking
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-11 16:21:32 +00:00
c2595f7a74 style(dashboard): implement retro terminal gruvbox aesthetic
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
This commit overhauls the dashboard styling to achieve a 'retro terminal' look matching the Gruvbox color palette. Changes include switching the global font to JetBrains Mono/Fira Code, removing rounded corners on all major elements (cards, buttons, inputs, modals, badges), and replacing modern soft shadows with sharp, hard-offset block shadows.
2026-03-07 01:42:01 +00:00
0526304398 style(dashboard): fix bunched buttons in card headers
This commit adds a proper flexbox layout to the '.card-actions' CSS class. Previously, action buttons (like Export and Refresh on the Analytics page) were bunching up because they lacked a flex container with appropriate gap and wrapping rules. It also updates the '.card-header' to wrap gracefully on smaller screens.
2026-03-07 01:37:27 +00:00
75e2967727 style(dashboard): fix login form floating labels rendering
This commit corrects the CSS for the login form's floating labels. Previously, the label floated too high and had a solid background that contrasted poorly with the input box. The label now floats exactly on the border line and uses a linear-gradient to seamlessly blend the card background into the input background, fixing the 'misframed' appearance.
2026-03-07 01:33:45 +00:00
16 changed files with 364 additions and 204 deletions

Binary file not shown.

View File

@@ -255,7 +255,10 @@ pub(super) async fn handle_system_logs(
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens,
cache_write_tokens,
cost, cost,
status, status,
error_message, error_message,
@@ -279,6 +282,11 @@ pub(super) async fn handle_system_logs(
"client_id": row.get::<String, _>("client_id"), "client_id": row.get::<String, _>("client_id"),
"provider": row.get::<String, _>("provider"), "provider": row.get::<String, _>("provider"),
"model": row.get::<String, _>("model"), "model": row.get::<String, _>("model"),
"prompt_tokens": row.get::<i64, _>("prompt_tokens"),
"completion_tokens": row.get::<i64, _>("completion_tokens"),
"reasoning_tokens": row.get::<i64, _>("reasoning_tokens"),
"cache_read_tokens": row.get::<i64, _>("cache_read_tokens"),
"cache_write_tokens": row.get::<i64, _>("cache_write_tokens"),
"tokens": row.get::<i64, _>("total_tokens"), "tokens": row.get::<i64, _>("total_tokens"),
"cost": row.get::<f64, _>("cost"), "cost": row.get::<f64, _>("cost"),
"status": row.get::<String, _>("status"), "status": row.get::<String, _>("status"),

View File

@@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
model TEXT, model TEXT,
prompt_tokens INTEGER, prompt_tokens INTEGER,
completion_tokens INTEGER, completion_tokens INTEGER,
reasoning_tokens INTEGER DEFAULT 0,
total_tokens INTEGER, total_tokens INTEGER,
cost REAL, cost REAL,
has_images BOOLEAN DEFAULT FALSE, has_images BOOLEAN DEFAULT FALSE,
@@ -172,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0") let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
.execute(pool) .execute(pool)
.await; .await;
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
// Add billing_mode column if it doesn't exist (migration for existing DBs) // Add billing_mode column if it doesn't exist (migration for existing DBs)
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT") let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")

View File

@@ -15,6 +15,7 @@ pub struct RequestLog {
pub model: String, pub model: String,
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32, pub cache_read_tokens: u32,
pub cache_write_tokens: u32, pub cache_write_tokens: u32,
@@ -77,8 +78,8 @@ impl RequestLogger {
sqlx::query( sqlx::query(
r#" r#"
INSERT INTO llm_requests INSERT INTO llm_requests
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body) (timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#, "#,
) )
.bind(log.timestamp) .bind(log.timestamp)
@@ -87,6 +88,7 @@ impl RequestLogger {
.bind(&log.model) .bind(&log.model)
.bind(log.prompt_tokens as i64) .bind(log.prompt_tokens as i64)
.bind(log.completion_tokens as i64) .bind(log.completion_tokens as i64)
.bind(log.reasoning_tokens as i64)
.bind(log.total_tokens as i64) .bind(log.total_tokens as i64)
.bind(log.cache_read_tokens as i64) .bind(log.cache_read_tokens as i64)
.bind(log.cache_write_tokens as i64) .bind(log.cache_write_tokens as i64)

View File

@@ -165,6 +165,8 @@ pub struct Usage {
pub completion_tokens: u32, pub completion_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_read_tokens: Option<u32>, pub cache_read_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub cache_write_tokens: Option<u32>, pub cache_write_tokens: Option<u32>,
@@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse {
pub created: u64, pub created: u64,
pub model: String, pub model: String,
pub choices: Vec<ChatStreamChoice>, pub choices: Vec<ChatStreamChoice>,
#[serde(skip_serializing_if = "Option::is_none")]
pub usage: Option<Usage>,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -128,17 +128,36 @@ impl super::Provider for DeepSeekProvider {
cache_write_tokens: u32, cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
helpers::calculate_cost_with_registry( if let Some(metadata) = registry.find_model(model) {
model, if metadata.cost.is_some() {
prompt_tokens, return helpers::calculate_cost_with_registry(
completion_tokens, model,
cache_read_tokens, prompt_tokens,
cache_write_tokens, completion_tokens,
registry, cache_read_tokens,
&self.pricing, cache_write_tokens,
0.14, registry,
0.28, &self.pricing,
) 0.28,
0.42,
);
}
}
// Custom DeepSeek fallback that correctly handles cache hits
let (prompt_rate, completion_rate) = self
.pricing
.iter()
.find(|p| model.contains(&p.model))
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
.unwrap_or((0.28, 0.42)); // Default to DeepSeek's current API pricing
let cache_hit_rate = prompt_rate / 10.0;
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
} }
async fn chat_completion_stream( async fn chat_completion_stream(

View File

@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
let reasoning_content = candidate let reasoning_content = candidate
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone())); .and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
let reasoning_tokens = reasoning_content.as_ref()
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
.unwrap_or(0);
// Extract function calls → OpenAI tool_calls // Extract function calls → OpenAI tool_calls
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts)); let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
tool_calls, tool_calls,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens: 0, // Gemini doesn't report cache writes separately cache_write_tokens: 0, // Gemini doesn't report cache writes separately
@@ -772,17 +777,36 @@ impl super::Provider for GeminiProvider {
cache_write_tokens: u32, cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry, registry: &crate::models::registry::ModelRegistry,
) -> f64 { ) -> f64 {
super::helpers::calculate_cost_with_registry( if let Some(metadata) = registry.find_model(model) {
model, if metadata.cost.is_some() {
prompt_tokens, return super::helpers::calculate_cost_with_registry(
completion_tokens, model,
cache_read_tokens, prompt_tokens,
cache_write_tokens, completion_tokens,
registry, cache_read_tokens,
&self.pricing, cache_write_tokens,
0.075, registry,
0.30, &self.pricing,
) 0.075,
0.30,
);
}
}
// Custom Gemini fallback that correctly handles cache hits (25% of input cost)
let (prompt_rate, completion_rate) = self
.pricing
.iter()
.find(|p| model.contains(&p.model))
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
.unwrap_or((0.075, 0.30)); // Default to Gemini 1.5 Flash current API pricing
let cache_hit_rate = prompt_rate * 0.25;
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
} }
async fn chat_completion_stream( async fn chat_completion_stream(
@@ -883,6 +907,7 @@ impl super::Provider for GeminiProvider {
super::StreamUsage { super::StreamUsage {
prompt_tokens: u.prompt_token_count, prompt_tokens: u.prompt_token_count,
completion_tokens: u.candidates_token_count, completion_tokens: u.candidates_token_count,
reasoning_tokens: 0,
total_tokens: u.total_token_count, total_tokens: u.total_token_count,
cache_read_tokens: u.cached_content_token_count, cache_read_tokens: u.cached_content_token_count,
cache_write_tokens: 0, cache_write_tokens: 0,

View File

@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32; let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32; let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
// Extract reasoning tokens
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format // Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"] let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
.as_u64() .as_u64()
@@ -261,9 +266,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64()) .or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32; .unwrap_or(0) as u32;
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use). // DeepSeek reports prompt_cache_miss_tokens which are just regular non-cached tokens.
// OpenAI doesn't report cache_write in this location, but may in the future. // They do not incur a separate cache_write fee, so we don't map them here to avoid double-charging.
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32; let cache_write_tokens = 0;
Ok(ProviderResponse { Ok(ProviderResponse {
content, content,
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
tool_calls, tool_calls,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens, cache_write_tokens,
@@ -295,18 +301,21 @@ pub fn parse_openai_stream_chunk(
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32; let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32; let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"] let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
.as_u64() .as_u64()
.or_else(|| u["prompt_cache_hit_tokens"].as_u64()) .or_else(|| u["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32; .unwrap_or(0) as u32;
let cache_write_tokens = u["prompt_cache_miss_tokens"] let cache_write_tokens = 0;
.as_u64()
.unwrap_or(0) as u32;
Some(StreamUsage { Some(StreamUsage {
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens, cache_write_tokens,

View File

@@ -75,6 +75,7 @@ pub struct ProviderResponse {
pub tool_calls: Option<Vec<crate::models::ToolCall>>, pub tool_calls: Option<Vec<crate::models::ToolCall>>,
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32, pub cache_read_tokens: u32,
pub cache_write_tokens: u32, pub cache_write_tokens: u32,
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
pub struct StreamUsage { pub struct StreamUsage {
pub prompt_tokens: u32, pub prompt_tokens: u32,
pub completion_tokens: u32, pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32, pub total_tokens: u32,
pub cache_read_tokens: u32, pub cache_read_tokens: u32,
pub cache_write_tokens: u32, pub cache_write_tokens: u32,

View File

@@ -26,7 +26,7 @@ impl OpenAIProvider {
.timeout(std::time::Duration::from_secs(300)) .timeout(std::time::Duration::from_secs(300))
.pool_idle_timeout(std::time::Duration::from_secs(90)) .pool_idle_timeout(std::time::Duration::from_secs(90))
.pool_max_idle_per_host(4) .pool_max_idle_per_host(4)
.tcp_keepalive(std::time::Duration::from_secs(30)) .tcp_keepalive(std::time::Duration::from_secs(15))
.build()?; .build()?;
Ok(Self { Ok(Self {
@@ -92,96 +92,7 @@ impl super::Provider for OpenAIProvider {
// Read error body to diagnose. If the model requires the Responses // Read error body to diagnose. If the model requires the Responses
// API (v1/responses), retry against that endpoint. // API (v1/responses), retry against that endpoint.
if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") { if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") {
// Build a simple `input` string by concatenating message parts. return self.chat_responses(request).await;
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut inputs: Vec<String> = Vec::new();
for m in &messages_json {
let role = m["role"].as_str().unwrap_or("");
let parts = m.get("content").and_then(|c| c.as_array()).cloned().unwrap_or_default();
let mut text_parts = Vec::new();
for p in parts {
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
text_parts.push(t.to_string());
}
}
inputs.push(format!("{}: {}", role, text_parts.join("")));
}
let input_text = inputs.join("\n");
let resp = self
.client
.post(format!("{}/responses", self.config.base_url))
.header("Authorization", format!("Bearer {}", self.api_key))
.json(&serde_json::json!({ "model": request.model, "input": input_text }))
.send()
.await
.map_err(|e| AppError::ProviderError(e.to_string()))?;
if !resp.status().is_success() {
let err = resp.text().await.unwrap_or_default();
return Err(AppError::ProviderError(format!("OpenAI Responses API error: {}", err)));
}
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
// Try to normalize: if it's chat-style, use existing parser
if resp_json.get("choices").is_some() {
return helpers::parse_openai_response(&resp_json, request.model);
}
// Responses API: try to extract text from `output` or `candidates`
let mut content_text = String::new();
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
if let Some(first) = output.get(0) {
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
for item in contents {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
if !content_text.is_empty() { content_text.push_str("\n"); }
content_text.push_str(text);
} else if let Some(parts) = item.get("parts").and_then(|p| p.as_array()) {
for p in parts {
if let Some(t) = p.as_str() {
if !content_text.is_empty() { content_text.push_str("\n"); }
content_text.push_str(t);
}
}
}
}
}
}
}
if content_text.is_empty() {
if let Some(cands) = resp_json.get("candidates").and_then(|c| c.as_array()) {
if let Some(c0) = cands.get(0) {
if let Some(content) = c0.get("content") {
if let Some(parts) = content.get("parts").and_then(|p| p.as_array()) {
for p in parts {
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
if !content_text.is_empty() { content_text.push_str("\n"); }
content_text.push_str(t);
}
}
}
}
}
}
}
let prompt_tokens = resp_json.get("usage").and_then(|u| u.get("prompt_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let completion_tokens = resp_json.get("usage").and_then(|u| u.get("completion_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let total_tokens = resp_json.get("usage").and_then(|u| u.get("total_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
return Ok(ProviderResponse {
content: content_text,
reasoning_content: None,
tool_calls: None,
prompt_tokens,
completion_tokens,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
model: request.model,
});
} }
tracing::error!("OpenAI API error ({}): {}", status, error_text); tracing::error!("OpenAI API error ({}): {}", status, error_text);
@@ -201,20 +112,76 @@ impl super::Provider for OpenAIProvider {
let messages_json = helpers::messages_to_openai_json(&request.messages).await?; let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut input_parts = Vec::new(); let mut input_parts = Vec::new();
for m in &messages_json { for m in &messages_json {
let role = m["role"].as_str().unwrap_or("user"); let mut role = m["role"].as_str().unwrap_or("user").to_string();
let content = m.get("content").cloned().unwrap_or(serde_json::json!("")); // Newer models (gpt-5, o1) prefer "developer" over "system"
if role == "system" {
role = "developer".to_string();
}
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
// Map content types based on role for Responses API
if let Some(content_array) = content.as_array_mut() {
for part in content_array {
if let Some(part_obj) = part.as_object_mut() {
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
match t {
"text" => {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
}
"image_url" => {
// Assistant typically doesn't have image_url in history this way, but for safety:
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
if let Some(img_url) = part_obj.remove("image_url") {
part_obj.insert("image".to_string(), img_url);
}
}
_ => {}
}
}
}
}
} else if let Some(text) = content.as_str() {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
content = serde_json::json!([{ "type": new_type, "text": text }]);
}
input_parts.push(serde_json::json!({ input_parts.push(serde_json::json!({
"role": role, "role": role,
"content": content "content": content
})); }));
} }
let mut body = serde_json::json!({
"model": request.model,
"input": input_parts,
});
// Add standard parameters
if let Some(temp) = request.temperature {
body["temperature"] = serde_json::json!(temp);
}
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
if let Some(max_tokens) = request.max_tokens {
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
body["max_output_tokens"] = serde_json::json!(max_tokens);
} else {
body["max_tokens"] = serde_json::json!(max_tokens);
}
}
if let Some(tools) = &request.tools {
body["tools"] = serde_json::json!(tools);
}
let resp = self let resp = self
.client .client
.post(format!("{}/responses", self.config.base_url)) .post(format!("{}/responses", self.config.base_url))
.header("Authorization", format!("Bearer {}", self.api_key)) .header("Authorization", format!("Bearer {}", self.api_key))
.json(&serde_json::json!({ "model": request.model, "input": input_parts })) .json(&body)
.send() .send()
.await .await
.map_err(|e| AppError::ProviderError(e.to_string()))?; .map_err(|e| AppError::ProviderError(e.to_string()))?;
@@ -226,11 +193,16 @@ impl super::Provider for OpenAIProvider {
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?; let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
// Try to normalize: if it's chat-style, use existing parser
if resp_json.get("choices").is_some() {
return helpers::parse_openai_response(&resp_json, request.model);
}
// Normalize Responses API output into ProviderResponse // Normalize Responses API output into ProviderResponse
let mut content_text = String::new(); let mut content_text = String::new();
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) { if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
if let Some(first) = output.get(0) { for out in output {
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) { if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
for item in contents { for item in contents {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) { if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
if !content_text.is_empty() { content_text.push_str("\n"); } if !content_text.is_empty() { content_text.push_str("\n"); }
@@ -275,6 +247,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None, tool_calls: None,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens: 0,
total_tokens, total_tokens,
cache_read_tokens: 0, cache_read_tokens: 0,
cache_write_tokens: 0, cache_write_tokens: 0,
@@ -312,6 +285,12 @@ impl super::Provider for OpenAIProvider {
&self, &self,
request: UnifiedRequest, request: UnifiedRequest,
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> { ) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
// Allow proactive routing to Responses API based on heuristic
let model_lc = request.model.to_lowercase();
if model_lc.contains("gpt-5") || model_lc.contains("codex") {
return self.chat_responses_stream(request).await;
}
let messages_json = helpers::messages_to_openai_json(&request.messages).await?; let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut body = helpers::build_openai_body(&request, messages_json, true); let mut body = helpers::build_openai_body(&request, messages_json, true);
@@ -400,21 +379,68 @@ impl super::Provider for OpenAIProvider {
let messages_json = helpers::messages_to_openai_json(&request.messages).await?; let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut input_parts = Vec::new(); let mut input_parts = Vec::new();
for m in &messages_json { for m in &messages_json {
let role = m["role"].as_str().unwrap_or("user"); let mut role = m["role"].as_str().unwrap_or("user").to_string();
let content = m.get("content").cloned().unwrap_or(serde_json::json!("")); // Newer models (gpt-5, o1) prefer "developer" over "system"
if role == "system" {
role = "developer".to_string();
}
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
// Map content types based on role for Responses API
if let Some(content_array) = content.as_array_mut() {
for part in content_array {
if let Some(part_obj) = part.as_object_mut() {
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
match t {
"text" => {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
}
"image_url" => {
// Assistant typically doesn't have image_url in history this way, but for safety:
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
if let Some(img_url) = part_obj.remove("image_url") {
part_obj.insert("image".to_string(), img_url);
}
}
_ => {}
}
}
}
}
} else if let Some(text) = content.as_str() {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
content = serde_json::json!([{ "type": new_type, "text": text }]);
}
input_parts.push(serde_json::json!({ input_parts.push(serde_json::json!({
"role": role, "role": role,
"content": content "content": content
})); }));
} }
let body = serde_json::json!({ let mut body = serde_json::json!({
"model": request.model, "model": request.model,
"input": input_parts, "input": input_parts,
"stream": true "stream": true,
}); });
// Add standard parameters
if let Some(temp) = request.temperature {
body["temperature"] = serde_json::json!(temp);
}
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
if let Some(max_tokens) = request.max_tokens {
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
body["max_output_tokens"] = serde_json::json!(max_tokens);
} else {
body["max_tokens"] = serde_json::json!(max_tokens);
}
}
let url = format!("{}/responses", self.config.base_url); let url = format!("{}/responses", self.config.base_url);
let api_key = self.api_key.clone(); let api_key = self.api_key.clone();
let model = request.model.clone(); let model = request.model.clone();
@@ -425,6 +451,7 @@ impl super::Provider for OpenAIProvider {
self.client self.client
.post(&url) .post(&url)
.header("Authorization", format!("Bearer {}", api_key)) .header("Authorization", format!("Bearer {}", api_key))
.header("Accept", "text/event-stream")
.json(&body), .json(&body),
) )
.map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?; .map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?;
@@ -441,36 +468,44 @@ impl super::Provider for OpenAIProvider {
let chunk: serde_json::Value = serde_json::from_str(&msg.data) let chunk: serde_json::Value = serde_json::from_str(&msg.data)
.map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?; .map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?;
// Try standard OpenAI parsing first // Try standard OpenAI parsing first (choices/usage)
if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) { if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) {
yield p_chunk?; yield p_chunk?;
} else { } else {
// Responses API specific parsing for streaming // Responses API specific parsing for streaming
// Often it follows a similar structure to the non-streaming response but in chunks
let mut content = String::new(); let mut content = String::new();
let mut finish_reason = None;
// Check for output[0].content[0].text (similar to non-stream) let event_type = chunk.get("type").and_then(|v| v.as_str()).unwrap_or("");
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
if let Some(first) = output.get(0) { match event_type {
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) { "response.output_text.delta" => {
for item in contents { if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) { content.push_str(delta);
content.push_str(text);
}
}
} }
} }
} "response.output_text.done" => {
if let Some(text) = chunk.get("text").and_then(|v| v.as_str()) {
// Check for candidates[0].content.parts[0].text (Gemini-like, which OpenAI sometimes uses for v1/responses) // Some implementations send the full text at the end
if content.is_empty() { // We usually prefer deltas, but if we haven't seen them, this is the fallback.
if let Some(cands) = chunk.get("candidates").and_then(|c| c.as_array()) { // However, if we're already yielding deltas, we might not want this.
if let Some(c0) = cands.get(0) { // For now, let's just use it as a signal that we're done.
if let Some(content_obj) = c0.get("content") { finish_reason = Some("stop".to_string());
if let Some(parts) = content_obj.get("parts").and_then(|p| p.as_array()) { }
for p in parts { }
if let Some(t) = p.get("text").and_then(|v| v.as_str()) { "response.done" => {
content.push_str(t); finish_reason = Some("stop".to_string());
}
_ => {
// Fallback to older nested structure if present
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
for out in output {
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
for item in contents {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
content.push_str(text);
} else if let Some(delta) = item.get("delta").and_then(|d| d.get("text")).and_then(|t| t.as_str()) {
content.push_str(delta);
} }
} }
} }
@@ -479,11 +514,11 @@ impl super::Provider for OpenAIProvider {
} }
} }
if !content.is_empty() { if !content.is_empty() || finish_reason.is_some() {
yield ProviderStreamChunk { yield ProviderStreamChunk {
content, content,
reasoning_content: None, reasoning_content: None,
finish_reason: None, finish_reason,
tool_calls: None, tool_calls: None,
model: model.clone(), model: model.clone(),
usage: None, usage: None,
@@ -497,19 +532,22 @@ impl super::Provider for OpenAIProvider {
let probe_resp = probe_client let probe_resp = probe_client
.post(&url) .post(&url)
.header("Authorization", format!("Bearer {}", api_key)) .header("Authorization", format!("Bearer {}", api_key))
.header("Accept", "application/json") // Ask for JSON during probe
.json(&probe_body) .json(&probe_body)
.send() .send()
.await; .await;
match probe_resp { match probe_resp {
Ok(r) if !r.status().is_success() => { Ok(r) => {
let status = r.status(); let status = r.status();
let error_body = r.text().await.unwrap_or_default(); let body = r.text().await.unwrap_or_default();
tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, error_body); if status.is_success() {
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, error_body)))?; tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", body);
} Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", body)))?;
Ok(_) => { } else {
Err(AppError::ProviderError(format!("Responses stream error (probe returned 200): {}", e)))?; tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, body);
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, body)))?;
}
} }
Err(probe_err) => { Err(probe_err) => {
tracing::error!("OpenAI Responses Stream Error Probe failed: {}", probe_err); tracing::error!("OpenAI Responses Stream Error Probe failed: {}", probe_err);

View File

@@ -312,6 +312,14 @@ async fn chat_completions(
}, },
finish_reason: chunk.finish_reason, finish_reason: chunk.finish_reason,
}], }],
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
prompt_tokens: u.prompt_tokens,
completion_tokens: u.completion_tokens,
total_tokens: u.total_tokens,
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
}),
}; };
// Use axum's Event directly, wrap in Ok // Use axum's Event directly, wrap in Ok
@@ -383,6 +391,7 @@ async fn chat_completions(
model: response.model.clone(), model: response.model.clone(),
prompt_tokens: response.prompt_tokens, prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens, completion_tokens: response.completion_tokens,
reasoning_tokens: response.reasoning_tokens,
total_tokens: response.total_tokens, total_tokens: response.total_tokens,
cache_read_tokens: response.cache_read_tokens, cache_read_tokens: response.cache_read_tokens,
cache_write_tokens: response.cache_write_tokens, cache_write_tokens: response.cache_write_tokens,
@@ -423,6 +432,7 @@ async fn chat_completions(
prompt_tokens: response.prompt_tokens, prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens, completion_tokens: response.completion_tokens,
total_tokens: response.total_tokens, total_tokens: response.total_tokens,
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None }, cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None }, cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
}), }),
@@ -452,6 +462,7 @@ async fn chat_completions(
model: model.clone(), model: model.clone(),
prompt_tokens: 0, prompt_tokens: 0,
completion_tokens: 0, completion_tokens: 0,
reasoning_tokens: 0,
total_tokens: 0, total_tokens: 0,
cache_read_tokens: 0, cache_read_tokens: 0,
cache_write_tokens: 0, cache_write_tokens: 0,

View File

@@ -96,11 +96,12 @@ where
// Spawn a background task to log the completion // Spawn a background task to log the completion
tokio::spawn(async move { tokio::spawn(async move {
// Use real usage from the provider when available, otherwise fall back to estimates // Use real usage from the provider when available, otherwise fall back to estimates
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) = let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
if let Some(usage) = &real_usage { if let Some(usage) = &real_usage {
( (
usage.prompt_tokens, usage.prompt_tokens,
usage.completion_tokens, usage.completion_tokens,
usage.reasoning_tokens,
usage.total_tokens, usage.total_tokens,
usage.cache_read_tokens, usage.cache_read_tokens,
usage.cache_write_tokens, usage.cache_write_tokens,
@@ -109,6 +110,7 @@ where
( (
estimated_prompt_tokens, estimated_prompt_tokens,
estimated_completion, estimated_completion,
estimated_reasoning_tokens,
estimated_prompt_tokens + estimated_completion, estimated_prompt_tokens + estimated_completion,
0u32, 0u32,
0u32, 0u32,
@@ -163,6 +165,7 @@ where
model, model,
prompt_tokens, prompt_tokens,
completion_tokens, completion_tokens,
reasoning_tokens,
total_tokens, total_tokens,
cache_read_tokens, cache_read_tokens,
cache_write_tokens, cache_write_tokens,

View File

@@ -61,9 +61,9 @@
--text-white: var(--fg0); --text-white: var(--fg0);
/* Borders */ /* Borders */
--border-color: var(--bg2); --border-color: var(--bg3);
--border-radius: 8px; --border-radius: 0px;
--border-radius-sm: 4px; --border-radius-sm: 0px;
/* Spacing System */ /* Spacing System */
--spacing-xs: 0.25rem; --spacing-xs: 0.25rem;
@@ -72,15 +72,15 @@
--spacing-lg: 1.5rem; --spacing-lg: 1.5rem;
--spacing-xl: 2rem; --spacing-xl: 2rem;
/* Shadows */ /* Shadows - Retro Block Style */
--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.2); --shadow-sm: 2px 2px 0px rgba(0, 0, 0, 0.4);
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3); --shadow: 4px 4px 0px rgba(0, 0, 0, 0.5);
--shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.4); --shadow-md: 6px 6px 0px rgba(0, 0, 0, 0.6);
--shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.5); --shadow-lg: 8px 8px 0px rgba(0, 0, 0, 0.7);
} }
body { body {
font-family: 'Inter', -apple-system, sans-serif; font-family: 'JetBrains Mono', 'Fira Code', 'Courier New', monospace;
background-color: var(--bg-primary); background-color: var(--bg-primary);
color: var(--text-primary); color: var(--text-primary);
line-height: 1.6; line-height: 1.6;
@@ -105,12 +105,12 @@ body {
.login-card { .login-card {
background: var(--bg1); background: var(--bg1);
border-radius: 24px; border-radius: var(--border-radius);
padding: 4rem 2.5rem 3rem; padding: 4rem 2.5rem 3rem;
width: 100%; width: 100%;
max-width: 440px; max-width: 440px;
box-shadow: var(--shadow-lg); box-shadow: var(--shadow-lg);
border: 1px solid var(--bg2); border: 2px solid var(--bg3);
text-align: center; text-align: center;
animation: slideUp 0.6s cubic-bezier(0.34, 1.56, 0.64, 1); animation: slideUp 0.6s cubic-bezier(0.34, 1.56, 0.64, 1);
position: relative; position: relative;
@@ -191,7 +191,7 @@ body {
color: var(--fg3); color: var(--fg3);
pointer-events: none; pointer-events: none;
transition: all 0.25s ease; transition: all 0.25s ease;
background: var(--bg1); background: transparent;
padding: 0 0.375rem; padding: 0 0.375rem;
z-index: 2; z-index: 2;
font-weight: 500; font-weight: 500;
@@ -202,30 +202,32 @@ body {
.form-group input:focus ~ label, .form-group input:focus ~ label,
.form-group input:not(:placeholder-shown) ~ label { .form-group input:not(:placeholder-shown) ~ label {
top: -0.625rem; top: 0;
left: 0.875rem; left: 0.875rem;
font-size: 0.7rem; font-size: 0.75rem;
color: var(--orange); color: var(--orange);
font-weight: 600; font-weight: 600;
transform: translateY(0); transform: translateY(-50%);
background: linear-gradient(180deg, var(--bg1) 50%, var(--bg0) 50%);
} }
.form-group input { .form-group input {
padding: 1rem 1.25rem; padding: 1rem 1.25rem;
background: var(--bg0); background: var(--bg0);
border: 2px solid var(--bg3); border: 2px solid var(--bg3);
border-radius: 12px; border-radius: var(--border-radius);
font-family: inherit;
font-size: 1rem; font-size: 1rem;
color: var(--fg1); color: var(--fg1);
transition: all 0.3s; transition: all 0.2s;
width: 100%; width: 100%;
box-sizing: border-box; box-sizing: border-box;
} }
.form-group input:focus { .form-group input:focus {
border-color: var(--orange); border-color: var(--orange);
box-shadow: 0 0 0 4px rgba(214, 93, 14, 0.2);
outline: none; outline: none;
box-shadow: 4px 4px 0px rgba(214, 93, 14, 0.4);
} }
.login-btn { .login-btn {
@@ -732,11 +734,11 @@ body {
.stat-change.positive { color: var(--green-light); } .stat-change.positive { color: var(--green-light); }
.stat-change.negative { color: var(--red-light); } .stat-change.negative { color: var(--red-light); }
/* Generic Cards */ /* Cards */
.card { .card {
background: var(--bg1); background: var(--bg1);
border-radius: var(--border-radius); border-radius: var(--border-radius);
border: 1px solid var(--bg2); border: 1px solid var(--bg3);
box-shadow: var(--shadow-sm); box-shadow: var(--shadow-sm);
margin-bottom: 1.5rem; margin-bottom: 1.5rem;
display: flex; display: flex;
@@ -749,6 +751,15 @@ body {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
align-items: center; align-items: center;
flex-wrap: wrap;
gap: 1rem;
}
.card-actions {
display: flex;
align-items: center;
gap: 0.5rem;
flex-wrap: wrap;
} }
.card-title { .card-title {
@@ -817,25 +828,26 @@ body {
/* Badges */ /* Badges */
.status-badge { .status-badge {
padding: 0.25rem 0.75rem; padding: 0.25rem 0.75rem;
border-radius: 9999px; border-radius: var(--border-radius);
font-size: 0.7rem; font-size: 0.7rem;
font-weight: 700; font-weight: 700;
text-transform: uppercase; text-transform: uppercase;
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
gap: 0.375rem; gap: 0.375rem;
border: 1px solid transparent;
} }
.status-badge.online, .status-badge.success { background: rgba(184, 187, 38, 0.2); color: var(--green-light); } .status-badge.online, .status-badge.success { background: rgba(184, 187, 38, 0.2); color: var(--green-light); border-color: rgba(184, 187, 38, 0.4); }
.status-badge.offline, .status-badge.danger { background: rgba(251, 73, 52, 0.2); color: var(--red-light); } .status-badge.offline, .status-badge.danger { background: rgba(251, 73, 52, 0.2); color: var(--red-light); border-color: rgba(251, 73, 52, 0.4); }
.status-badge.warning { background: rgba(250, 189, 47, 0.2); color: var(--yellow-light); } .status-badge.warning { background: rgba(250, 189, 47, 0.2); color: var(--yellow-light); border-color: rgba(250, 189, 47, 0.4); }
.badge-client { .badge-client {
background: var(--bg2); background: var(--bg2);
color: var(--blue-light); color: var(--blue-light);
padding: 2px 8px; padding: 2px 8px;
border-radius: 6px; border-radius: var(--border-radius);
font-family: monospace; font-family: inherit;
font-size: 0.85rem; font-size: 0.85rem;
border: 1px solid var(--bg3); border: 1px solid var(--bg3);
} }
@@ -889,7 +901,7 @@ body {
width: 100%; width: 100%;
background: var(--bg0); background: var(--bg0);
border: 1px solid var(--bg3); border: 1px solid var(--bg3);
border-radius: 8px; border-radius: var(--border-radius);
padding: 0.75rem; padding: 0.75rem;
font-family: inherit; font-family: inherit;
font-size: 0.875rem; font-size: 0.875rem;
@@ -900,7 +912,7 @@ body {
.form-control input:focus, .form-control textarea:focus, .form-control select:focus { .form-control input:focus, .form-control textarea:focus, .form-control select:focus {
outline: none; outline: none;
border-color: var(--orange); border-color: var(--orange);
box-shadow: 0 0 0 2px rgba(214, 93, 14, 0.2); box-shadow: 2px 2px 0px rgba(214, 93, 14, 0.4);
} }
.btn { .btn {
@@ -908,21 +920,27 @@ body {
align-items: center; align-items: center;
gap: 0.5rem; gap: 0.5rem;
padding: 0.625rem 1.25rem; padding: 0.625rem 1.25rem;
border-radius: 8px; border-radius: var(--border-radius);
font-weight: 600; font-weight: 600;
font-size: 0.875rem; font-size: 0.875rem;
cursor: pointer; cursor: pointer;
transition: all 0.2s; transition: all 0.1s;
border: 1px solid transparent; border: 1px solid transparent;
text-transform: uppercase;
} }
.btn-primary { background: var(--orange); color: var(--bg0); } .btn:active {
transform: translate(2px, 2px);
box-shadow: none !important;
}
.btn-primary { background: var(--orange); color: var(--bg0); box-shadow: 2px 2px 0px var(--bg4); }
.btn-primary:hover { background: var(--orange-light); } .btn-primary:hover { background: var(--orange-light); }
.btn-secondary { background: var(--bg2); border-color: var(--bg3); color: var(--fg1); } .btn-secondary { background: var(--bg2); border-color: var(--bg3); color: var(--fg1); box-shadow: 2px 2px 0px var(--bg0); }
.btn-secondary:hover { background: var(--bg3); color: var(--fg0); } .btn-secondary:hover { background: var(--bg3); color: var(--fg0); }
.btn-danger { background: var(--red); color: var(--fg0); } .btn-danger { background: var(--red); color: var(--fg0); box-shadow: 2px 2px 0px var(--bg4); }
.btn-danger:hover { background: var(--red-light); } .btn-danger:hover { background: var(--red-light); }
/* Small inline action buttons (edit, delete, copy) */ /* Small inline action buttons (edit, delete, copy) */
@@ -981,13 +999,13 @@ body {
.modal-content { .modal-content {
background: var(--bg1); background: var(--bg1);
border-radius: 16px; border-radius: var(--border-radius);
width: 90%; width: 90%;
max-width: 500px; max-width: 500px;
box-shadow: var(--shadow-lg); box-shadow: var(--shadow-lg);
border: 1px solid var(--bg3); border: 2px solid var(--bg3);
transform: translateY(20px); transform: translateY(20px);
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transition: all 0.2s;
} }
.modal.active .modal-content { .modal.active .modal-content {

View File

@@ -4,11 +4,11 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Proxy Gateway - Admin Dashboard</title> <title>LLM Proxy Gateway - Admin Dashboard</title>
<link rel="stylesheet" href="/css/dashboard.css?v=7"> <link rel="stylesheet" href="/css/dashboard.css?v=11">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any"> <link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any">
<link rel="apple-touch-icon" href="img/logo-icon.png"> <link rel="apple-touch-icon" href="img/logo-icon.png">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet"> <link href="https://fonts.googleapis.com/css2?family=Fira+Code:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/luxon@3.4.4/build/global/luxon.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/luxon@3.4.4/build/global/luxon.min.js"></script>
</head> </head>
@@ -17,8 +17,7 @@
<div id="login-screen" class="login-container"> <div id="login-screen" class="login-container">
<div class="login-card"> <div class="login-card">
<div class="login-header"> <div class="login-header">
<img src="img/logo-full.png" alt="LLM Proxy Logo" class="login-logo" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';"> <i class="fas fa-terminal login-logo-fallback"></i>
<i class="fas fa-robot login-logo-fallback" style="display: none;"></i>
<h1>LLM Proxy Gateway</h1> <h1>LLM Proxy Gateway</h1>
<p class="login-subtitle">Admin Dashboard</p> <p class="login-subtitle">Admin Dashboard</p>
</div> </div>

View File

@@ -284,7 +284,7 @@ class Dashboard {
<h3 class="card-title">Model Registry</h3> <h3 class="card-title">Model Registry</h3>
<p class="card-subtitle">Manage model availability and custom pricing</p> <p class="card-subtitle">Manage model availability and custom pricing</p>
</div> </div>
<div class="card-actions" style="display: flex; gap: 0.5rem; align-items: center; flex-wrap: wrap;"> <div class="card-actions">
<select id="model-provider-filter" class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: auto;"> <select id="model-provider-filter" class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: auto;">
<option value="">All Providers</option> <option value="">All Providers</option>
<option value="openai">OpenAI</option> <option value="openai">OpenAI</option>

View File

@@ -38,6 +38,24 @@ class LogsPage {
const statusClass = log.status === 'success' ? 'success' : 'danger'; const statusClass = log.status === 'success' ? 'success' : 'danger';
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss'); const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
let tokenDetails = `${log.tokens} total tokens`;
if (log.status === 'success') {
const parts = [];
parts.push(`${log.prompt_tokens} in`);
let completionStr = `${log.completion_tokens} out`;
if (log.reasoning_tokens > 0) {
completionStr += ` (${log.reasoning_tokens} reasoning)`;
}
parts.push(completionStr);
if (log.cache_read_tokens > 0) {
parts.push(`${log.cache_read_tokens} cache-hit`);
}
tokenDetails = parts.join(', ');
}
return ` return `
<tr class="log-row"> <tr class="log-row">
<td class="whitespace-nowrap">${timestamp}</td> <td class="whitespace-nowrap">${timestamp}</td>
@@ -55,7 +73,7 @@ class LogsPage {
<td> <td>
<div class="log-message-container"> <div class="log-message-container">
<code class="log-model">${log.model}</code> <code class="log-model">${log.model}</code>
<span class="log-tokens">${log.tokens} tokens</span> <span class="log-tokens" title="${log.tokens} total tokens">${tokenDetails}</span>
<span class="log-duration">${log.duration}ms</span> <span class="log-duration">${log.duration}ms</span>
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''} ${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
</div> </div>