Compare commits

...

13 Commits

Author SHA1 Message Date
649371154f fix(openai): implement parsing for real-time Responses API streaming format
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 19:00:40 +00:00
78fff61660 fix(openai): map system to developer role and enhance stream diagnostics for Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:50:55 +00:00
b131094dfd fix(openai): improve Responses API streaming reliability and diagnostics
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:45:09 +00:00
c3d81c1733 fix(openai): remove unsupported stream_options from Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:40:06 +00:00
e123f542f1 fix(openai): use max_output_tokens for Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:36:05 +00:00
0d28241e39 fix(openai): enhance Responses API integration with full parameters and improved parsing
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:31:23 +00:00
754ee9cb84 fix(openai): implement role-based content mapping for Responses API
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:23:50 +00:00
5a9086b883 fix(openai): map content types for Responses API (v1/responses)
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-17 18:18:23 +00:00
cc5eba1957 feat: implement reasoning_tokens tracking and enhanced usage logging
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-11 17:14:49 +00:00
3ab00fb188 fixed tracking
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-11 16:21:32 +00:00
c2595f7a74 style(dashboard): implement retro terminal gruvbox aesthetic
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
This commit overhauls the dashboard styling to achieve a 'retro terminal' look matching the Gruvbox color palette. Changes include switching the global font to JetBrains Mono/Fira Code, removing rounded corners on all major elements (cards, buttons, inputs, modals, badges), and replacing modern soft shadows with sharp, hard-offset block shadows.
2026-03-07 01:42:01 +00:00
0526304398 style(dashboard): fix bunched buttons in card headers
This commit adds a proper flexbox layout to the '.card-actions' CSS class. Previously, action buttons (like Export and Refresh on the Analytics page) were bunching up because they lacked a flex container with appropriate gap and wrapping rules. It also updates the '.card-header' to wrap gracefully on smaller screens.
2026-03-07 01:37:27 +00:00
75e2967727 style(dashboard): fix login form floating labels rendering
This commit corrects the CSS for the login form's floating labels. Previously, the label floated too high and had a solid background that contrasted poorly with the input box. The label now floats exactly on the border line and uses a linear-gradient to seamlessly blend the card background into the input background, fixing the 'misframed' appearance.
2026-03-07 01:33:45 +00:00
16 changed files with 364 additions and 204 deletions

Binary file not shown.

View File

@@ -255,7 +255,10 @@ pub(super) async fn handle_system_logs(
model,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,
cost,
status,
error_message,
@@ -279,6 +282,11 @@ pub(super) async fn handle_system_logs(
"client_id": row.get::<String, _>("client_id"),
"provider": row.get::<String, _>("provider"),
"model": row.get::<String, _>("model"),
"prompt_tokens": row.get::<i64, _>("prompt_tokens"),
"completion_tokens": row.get::<i64, _>("completion_tokens"),
"reasoning_tokens": row.get::<i64, _>("reasoning_tokens"),
"cache_read_tokens": row.get::<i64, _>("cache_read_tokens"),
"cache_write_tokens": row.get::<i64, _>("cache_write_tokens"),
"tokens": row.get::<i64, _>("total_tokens"),
"cost": row.get::<f64, _>("cost"),
"status": row.get::<String, _>("status"),

View File

@@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
model TEXT,
prompt_tokens INTEGER,
completion_tokens INTEGER,
reasoning_tokens INTEGER DEFAULT 0,
total_tokens INTEGER,
cost REAL,
has_images BOOLEAN DEFAULT FALSE,
@@ -172,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0")
.execute(pool)
.await;
// Add billing_mode column if it doesn't exist (migration for existing DBs)
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")

View File

@@ -15,6 +15,7 @@ pub struct RequestLog {
pub model: String,
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
@@ -77,8 +78,8 @@ impl RequestLogger {
sqlx::query(
r#"
INSERT INTO llm_requests
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
)
.bind(log.timestamp)
@@ -87,6 +88,7 @@ impl RequestLogger {
.bind(&log.model)
.bind(log.prompt_tokens as i64)
.bind(log.completion_tokens as i64)
.bind(log.reasoning_tokens as i64)
.bind(log.total_tokens as i64)
.bind(log.cache_read_tokens as i64)
.bind(log.cache_write_tokens as i64)

View File

@@ -165,6 +165,8 @@ pub struct Usage {
pub completion_tokens: u32,
pub total_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_read_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_write_tokens: Option<u32>,
@@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse {
pub created: u64,
pub model: String,
pub choices: Vec<ChatStreamChoice>,
#[serde(skip_serializing_if = "Option::is_none")]
pub usage: Option<Usage>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -128,7 +128,9 @@ impl super::Provider for DeepSeekProvider {
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry,
) -> f64 {
helpers::calculate_cost_with_registry(
if let Some(metadata) = registry.find_model(model) {
if metadata.cost.is_some() {
return helpers::calculate_cost_with_registry(
model,
prompt_tokens,
completion_tokens,
@@ -136,9 +138,26 @@ impl super::Provider for DeepSeekProvider {
cache_write_tokens,
registry,
&self.pricing,
0.14,
0.28,
)
0.42,
);
}
}
// Custom DeepSeek fallback that correctly handles cache hits
let (prompt_rate, completion_rate) = self
.pricing
.iter()
.find(|p| model.contains(&p.model))
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
.unwrap_or((0.28, 0.42)); // Default to DeepSeek's current API pricing
let cache_hit_rate = prompt_rate / 10.0;
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
}
async fn chat_completion_stream(

View File

@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
let reasoning_content = candidate
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
let reasoning_tokens = reasoning_content.as_ref()
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
.unwrap_or(0);
// Extract function calls → OpenAI tool_calls
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
tool_calls,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
@@ -772,7 +777,9 @@ impl super::Provider for GeminiProvider {
cache_write_tokens: u32,
registry: &crate::models::registry::ModelRegistry,
) -> f64 {
super::helpers::calculate_cost_with_registry(
if let Some(metadata) = registry.find_model(model) {
if metadata.cost.is_some() {
return super::helpers::calculate_cost_with_registry(
model,
prompt_tokens,
completion_tokens,
@@ -782,7 +789,24 @@ impl super::Provider for GeminiProvider {
&self.pricing,
0.075,
0.30,
)
);
}
}
// Custom Gemini fallback that correctly handles cache hits (25% of input cost)
let (prompt_rate, completion_rate) = self
.pricing
.iter()
.find(|p| model.contains(&p.model))
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
.unwrap_or((0.075, 0.30)); // Default to Gemini 1.5 Flash current API pricing
let cache_hit_rate = prompt_rate * 0.25;
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
}
async fn chat_completion_stream(
@@ -883,6 +907,7 @@ impl super::Provider for GeminiProvider {
super::StreamUsage {
prompt_tokens: u.prompt_token_count,
completion_tokens: u.candidates_token_count,
reasoning_tokens: 0,
total_tokens: u.total_token_count,
cache_read_tokens: u.cached_content_token_count,
cache_write_tokens: 0,

View File

@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
// Extract reasoning tokens
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
.as_u64()
@@ -261,9 +266,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32;
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use).
// OpenAI doesn't report cache_write in this location, but may in the future.
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32;
// DeepSeek reports prompt_cache_miss_tokens which are just regular non-cached tokens.
// They do not incur a separate cache_write fee, so we don't map them here to avoid double-charging.
let cache_write_tokens = 0;
Ok(ProviderResponse {
content,
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
tool_calls,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,
@@ -295,18 +301,21 @@ pub fn parse_openai_stream_chunk(
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
.as_u64()
.unwrap_or(0) as u32;
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
.as_u64()
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
.unwrap_or(0) as u32;
let cache_write_tokens = u["prompt_cache_miss_tokens"]
.as_u64()
.unwrap_or(0) as u32;
let cache_write_tokens = 0;
Some(StreamUsage {
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,

View File

@@ -75,6 +75,7 @@ pub struct ProviderResponse {
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
pub struct StreamUsage {
pub prompt_tokens: u32,
pub completion_tokens: u32,
pub reasoning_tokens: u32,
pub total_tokens: u32,
pub cache_read_tokens: u32,
pub cache_write_tokens: u32,

View File

@@ -26,7 +26,7 @@ impl OpenAIProvider {
.timeout(std::time::Duration::from_secs(300))
.pool_idle_timeout(std::time::Duration::from_secs(90))
.pool_max_idle_per_host(4)
.tcp_keepalive(std::time::Duration::from_secs(30))
.tcp_keepalive(std::time::Duration::from_secs(15))
.build()?;
Ok(Self {
@@ -92,96 +92,7 @@ impl super::Provider for OpenAIProvider {
// Read error body to diagnose. If the model requires the Responses
// API (v1/responses), retry against that endpoint.
if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") {
// Build a simple `input` string by concatenating message parts.
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut inputs: Vec<String> = Vec::new();
for m in &messages_json {
let role = m["role"].as_str().unwrap_or("");
let parts = m.get("content").and_then(|c| c.as_array()).cloned().unwrap_or_default();
let mut text_parts = Vec::new();
for p in parts {
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
text_parts.push(t.to_string());
}
}
inputs.push(format!("{}: {}", role, text_parts.join("")));
}
let input_text = inputs.join("\n");
let resp = self
.client
.post(format!("{}/responses", self.config.base_url))
.header("Authorization", format!("Bearer {}", self.api_key))
.json(&serde_json::json!({ "model": request.model, "input": input_text }))
.send()
.await
.map_err(|e| AppError::ProviderError(e.to_string()))?;
if !resp.status().is_success() {
let err = resp.text().await.unwrap_or_default();
return Err(AppError::ProviderError(format!("OpenAI Responses API error: {}", err)));
}
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
// Try to normalize: if it's chat-style, use existing parser
if resp_json.get("choices").is_some() {
return helpers::parse_openai_response(&resp_json, request.model);
}
// Responses API: try to extract text from `output` or `candidates`
let mut content_text = String::new();
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
if let Some(first) = output.get(0) {
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
for item in contents {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
if !content_text.is_empty() { content_text.push_str("\n"); }
content_text.push_str(text);
} else if let Some(parts) = item.get("parts").and_then(|p| p.as_array()) {
for p in parts {
if let Some(t) = p.as_str() {
if !content_text.is_empty() { content_text.push_str("\n"); }
content_text.push_str(t);
}
}
}
}
}
}
}
if content_text.is_empty() {
if let Some(cands) = resp_json.get("candidates").and_then(|c| c.as_array()) {
if let Some(c0) = cands.get(0) {
if let Some(content) = c0.get("content") {
if let Some(parts) = content.get("parts").and_then(|p| p.as_array()) {
for p in parts {
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
if !content_text.is_empty() { content_text.push_str("\n"); }
content_text.push_str(t);
}
}
}
}
}
}
}
let prompt_tokens = resp_json.get("usage").and_then(|u| u.get("prompt_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let completion_tokens = resp_json.get("usage").and_then(|u| u.get("completion_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let total_tokens = resp_json.get("usage").and_then(|u| u.get("total_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
return Ok(ProviderResponse {
content: content_text,
reasoning_content: None,
tool_calls: None,
prompt_tokens,
completion_tokens,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
model: request.model,
});
return self.chat_responses(request).await;
}
tracing::error!("OpenAI API error ({}): {}", status, error_text);
@@ -201,8 +112,41 @@ impl super::Provider for OpenAIProvider {
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut input_parts = Vec::new();
for m in &messages_json {
let role = m["role"].as_str().unwrap_or("user");
let content = m.get("content").cloned().unwrap_or(serde_json::json!(""));
let mut role = m["role"].as_str().unwrap_or("user").to_string();
// Newer models (gpt-5, o1) prefer "developer" over "system"
if role == "system" {
role = "developer".to_string();
}
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
// Map content types based on role for Responses API
if let Some(content_array) = content.as_array_mut() {
for part in content_array {
if let Some(part_obj) = part.as_object_mut() {
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
match t {
"text" => {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
}
"image_url" => {
// Assistant typically doesn't have image_url in history this way, but for safety:
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
if let Some(img_url) = part_obj.remove("image_url") {
part_obj.insert("image".to_string(), img_url);
}
}
_ => {}
}
}
}
}
} else if let Some(text) = content.as_str() {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
content = serde_json::json!([{ "type": new_type, "text": text }]);
}
input_parts.push(serde_json::json!({
"role": role,
@@ -210,11 +154,34 @@ impl super::Provider for OpenAIProvider {
}));
}
let mut body = serde_json::json!({
"model": request.model,
"input": input_parts,
});
// Add standard parameters
if let Some(temp) = request.temperature {
body["temperature"] = serde_json::json!(temp);
}
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
if let Some(max_tokens) = request.max_tokens {
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
body["max_output_tokens"] = serde_json::json!(max_tokens);
} else {
body["max_tokens"] = serde_json::json!(max_tokens);
}
}
if let Some(tools) = &request.tools {
body["tools"] = serde_json::json!(tools);
}
let resp = self
.client
.post(format!("{}/responses", self.config.base_url))
.header("Authorization", format!("Bearer {}", self.api_key))
.json(&serde_json::json!({ "model": request.model, "input": input_parts }))
.json(&body)
.send()
.await
.map_err(|e| AppError::ProviderError(e.to_string()))?;
@@ -226,11 +193,16 @@ impl super::Provider for OpenAIProvider {
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
// Try to normalize: if it's chat-style, use existing parser
if resp_json.get("choices").is_some() {
return helpers::parse_openai_response(&resp_json, request.model);
}
// Normalize Responses API output into ProviderResponse
let mut content_text = String::new();
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
if let Some(first) = output.get(0) {
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
for out in output {
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
for item in contents {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
if !content_text.is_empty() { content_text.push_str("\n"); }
@@ -275,6 +247,7 @@ impl super::Provider for OpenAIProvider {
tool_calls: None,
prompt_tokens,
completion_tokens,
reasoning_tokens: 0,
total_tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
@@ -312,6 +285,12 @@ impl super::Provider for OpenAIProvider {
&self,
request: UnifiedRequest,
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
// Allow proactive routing to Responses API based on heuristic
let model_lc = request.model.to_lowercase();
if model_lc.contains("gpt-5") || model_lc.contains("codex") {
return self.chat_responses_stream(request).await;
}
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut body = helpers::build_openai_body(&request, messages_json, true);
@@ -400,8 +379,41 @@ impl super::Provider for OpenAIProvider {
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
let mut input_parts = Vec::new();
for m in &messages_json {
let role = m["role"].as_str().unwrap_or("user");
let content = m.get("content").cloned().unwrap_or(serde_json::json!(""));
let mut role = m["role"].as_str().unwrap_or("user").to_string();
// Newer models (gpt-5, o1) prefer "developer" over "system"
if role == "system" {
role = "developer".to_string();
}
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
// Map content types based on role for Responses API
if let Some(content_array) = content.as_array_mut() {
for part in content_array {
if let Some(part_obj) = part.as_object_mut() {
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
match t {
"text" => {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
}
"image_url" => {
// Assistant typically doesn't have image_url in history this way, but for safety:
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
part_obj.insert("type".to_string(), serde_json::json!(new_type));
if let Some(img_url) = part_obj.remove("image_url") {
part_obj.insert("image".to_string(), img_url);
}
}
_ => {}
}
}
}
}
} else if let Some(text) = content.as_str() {
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
content = serde_json::json!([{ "type": new_type, "text": text }]);
}
input_parts.push(serde_json::json!({
"role": role,
@@ -409,12 +421,26 @@ impl super::Provider for OpenAIProvider {
}));
}
let body = serde_json::json!({
let mut body = serde_json::json!({
"model": request.model,
"input": input_parts,
"stream": true
"stream": true,
});
// Add standard parameters
if let Some(temp) = request.temperature {
body["temperature"] = serde_json::json!(temp);
}
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
if let Some(max_tokens) = request.max_tokens {
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
body["max_output_tokens"] = serde_json::json!(max_tokens);
} else {
body["max_tokens"] = serde_json::json!(max_tokens);
}
}
let url = format!("{}/responses", self.config.base_url);
let api_key = self.api_key.clone();
let model = request.model.clone();
@@ -425,6 +451,7 @@ impl super::Provider for OpenAIProvider {
self.client
.post(&url)
.header("Authorization", format!("Bearer {}", api_key))
.header("Accept", "text/event-stream")
.json(&body),
)
.map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?;
@@ -441,36 +468,44 @@ impl super::Provider for OpenAIProvider {
let chunk: serde_json::Value = serde_json::from_str(&msg.data)
.map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?;
// Try standard OpenAI parsing first
// Try standard OpenAI parsing first (choices/usage)
if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) {
yield p_chunk?;
} else {
// Responses API specific parsing for streaming
// Often it follows a similar structure to the non-streaming response but in chunks
let mut content = String::new();
let mut finish_reason = None;
// Check for output[0].content[0].text (similar to non-stream)
let event_type = chunk.get("type").and_then(|v| v.as_str()).unwrap_or("");
match event_type {
"response.output_text.delta" => {
if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
content.push_str(delta);
}
}
"response.output_text.done" => {
if let Some(text) = chunk.get("text").and_then(|v| v.as_str()) {
// Some implementations send the full text at the end
// We usually prefer deltas, but if we haven't seen them, this is the fallback.
// However, if we're already yielding deltas, we might not want this.
// For now, let's just use it as a signal that we're done.
finish_reason = Some("stop".to_string());
}
}
"response.done" => {
finish_reason = Some("stop".to_string());
}
_ => {
// Fallback to older nested structure if present
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
if let Some(first) = output.get(0) {
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
for out in output {
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
for item in contents {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
content.push_str(text);
}
}
}
}
}
// Check for candidates[0].content.parts[0].text (Gemini-like, which OpenAI sometimes uses for v1/responses)
if content.is_empty() {
if let Some(cands) = chunk.get("candidates").and_then(|c| c.as_array()) {
if let Some(c0) = cands.get(0) {
if let Some(content_obj) = c0.get("content") {
if let Some(parts) = content_obj.get("parts").and_then(|p| p.as_array()) {
for p in parts {
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
content.push_str(t);
} else if let Some(delta) = item.get("delta").and_then(|d| d.get("text")).and_then(|t| t.as_str()) {
content.push_str(delta);
}
}
}
@@ -479,11 +514,11 @@ impl super::Provider for OpenAIProvider {
}
}
if !content.is_empty() {
if !content.is_empty() || finish_reason.is_some() {
yield ProviderStreamChunk {
content,
reasoning_content: None,
finish_reason: None,
finish_reason,
tool_calls: None,
model: model.clone(),
usage: None,
@@ -497,19 +532,22 @@ impl super::Provider for OpenAIProvider {
let probe_resp = probe_client
.post(&url)
.header("Authorization", format!("Bearer {}", api_key))
.header("Accept", "application/json") // Ask for JSON during probe
.json(&probe_body)
.send()
.await;
match probe_resp {
Ok(r) if !r.status().is_success() => {
Ok(r) => {
let status = r.status();
let error_body = r.text().await.unwrap_or_default();
tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, error_body);
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, error_body)))?;
let body = r.text().await.unwrap_or_default();
if status.is_success() {
tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", body);
Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", body)))?;
} else {
tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, body);
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, body)))?;
}
Ok(_) => {
Err(AppError::ProviderError(format!("Responses stream error (probe returned 200): {}", e)))?;
}
Err(probe_err) => {
tracing::error!("OpenAI Responses Stream Error Probe failed: {}", probe_err);

View File

@@ -312,6 +312,14 @@ async fn chat_completions(
},
finish_reason: chunk.finish_reason,
}],
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
prompt_tokens: u.prompt_tokens,
completion_tokens: u.completion_tokens,
total_tokens: u.total_tokens,
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
}),
};
// Use axum's Event directly, wrap in Ok
@@ -383,6 +391,7 @@ async fn chat_completions(
model: response.model.clone(),
prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens,
reasoning_tokens: response.reasoning_tokens,
total_tokens: response.total_tokens,
cache_read_tokens: response.cache_read_tokens,
cache_write_tokens: response.cache_write_tokens,
@@ -423,6 +432,7 @@ async fn chat_completions(
prompt_tokens: response.prompt_tokens,
completion_tokens: response.completion_tokens,
total_tokens: response.total_tokens,
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
}),
@@ -452,6 +462,7 @@ async fn chat_completions(
model: model.clone(),
prompt_tokens: 0,
completion_tokens: 0,
reasoning_tokens: 0,
total_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,

View File

@@ -96,11 +96,12 @@ where
// Spawn a background task to log the completion
tokio::spawn(async move {
// Use real usage from the provider when available, otherwise fall back to estimates
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
if let Some(usage) = &real_usage {
(
usage.prompt_tokens,
usage.completion_tokens,
usage.reasoning_tokens,
usage.total_tokens,
usage.cache_read_tokens,
usage.cache_write_tokens,
@@ -109,6 +110,7 @@ where
(
estimated_prompt_tokens,
estimated_completion,
estimated_reasoning_tokens,
estimated_prompt_tokens + estimated_completion,
0u32,
0u32,
@@ -163,6 +165,7 @@ where
model,
prompt_tokens,
completion_tokens,
reasoning_tokens,
total_tokens,
cache_read_tokens,
cache_write_tokens,

View File

@@ -61,9 +61,9 @@
--text-white: var(--fg0);
/* Borders */
--border-color: var(--bg2);
--border-radius: 8px;
--border-radius-sm: 4px;
--border-color: var(--bg3);
--border-radius: 0px;
--border-radius-sm: 0px;
/* Spacing System */
--spacing-xs: 0.25rem;
@@ -72,15 +72,15 @@
--spacing-lg: 1.5rem;
--spacing-xl: 2rem;
/* Shadows */
--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.2);
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3);
--shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.4);
--shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.5);
/* Shadows - Retro Block Style */
--shadow-sm: 2px 2px 0px rgba(0, 0, 0, 0.4);
--shadow: 4px 4px 0px rgba(0, 0, 0, 0.5);
--shadow-md: 6px 6px 0px rgba(0, 0, 0, 0.6);
--shadow-lg: 8px 8px 0px rgba(0, 0, 0, 0.7);
}
body {
font-family: 'Inter', -apple-system, sans-serif;
font-family: 'JetBrains Mono', 'Fira Code', 'Courier New', monospace;
background-color: var(--bg-primary);
color: var(--text-primary);
line-height: 1.6;
@@ -105,12 +105,12 @@ body {
.login-card {
background: var(--bg1);
border-radius: 24px;
border-radius: var(--border-radius);
padding: 4rem 2.5rem 3rem;
width: 100%;
max-width: 440px;
box-shadow: var(--shadow-lg);
border: 1px solid var(--bg2);
border: 2px solid var(--bg3);
text-align: center;
animation: slideUp 0.6s cubic-bezier(0.34, 1.56, 0.64, 1);
position: relative;
@@ -191,7 +191,7 @@ body {
color: var(--fg3);
pointer-events: none;
transition: all 0.25s ease;
background: var(--bg1);
background: transparent;
padding: 0 0.375rem;
z-index: 2;
font-weight: 500;
@@ -202,30 +202,32 @@ body {
.form-group input:focus ~ label,
.form-group input:not(:placeholder-shown) ~ label {
top: -0.625rem;
top: 0;
left: 0.875rem;
font-size: 0.7rem;
font-size: 0.75rem;
color: var(--orange);
font-weight: 600;
transform: translateY(0);
transform: translateY(-50%);
background: linear-gradient(180deg, var(--bg1) 50%, var(--bg0) 50%);
}
.form-group input {
padding: 1rem 1.25rem;
background: var(--bg0);
border: 2px solid var(--bg3);
border-radius: 12px;
border-radius: var(--border-radius);
font-family: inherit;
font-size: 1rem;
color: var(--fg1);
transition: all 0.3s;
transition: all 0.2s;
width: 100%;
box-sizing: border-box;
}
.form-group input:focus {
border-color: var(--orange);
box-shadow: 0 0 0 4px rgba(214, 93, 14, 0.2);
outline: none;
box-shadow: 4px 4px 0px rgba(214, 93, 14, 0.4);
}
.login-btn {
@@ -732,11 +734,11 @@ body {
.stat-change.positive { color: var(--green-light); }
.stat-change.negative { color: var(--red-light); }
/* Generic Cards */
/* Cards */
.card {
background: var(--bg1);
border-radius: var(--border-radius);
border: 1px solid var(--bg2);
border: 1px solid var(--bg3);
box-shadow: var(--shadow-sm);
margin-bottom: 1.5rem;
display: flex;
@@ -749,6 +751,15 @@ body {
display: flex;
justify-content: space-between;
align-items: center;
flex-wrap: wrap;
gap: 1rem;
}
.card-actions {
display: flex;
align-items: center;
gap: 0.5rem;
flex-wrap: wrap;
}
.card-title {
@@ -817,25 +828,26 @@ body {
/* Badges */
.status-badge {
padding: 0.25rem 0.75rem;
border-radius: 9999px;
border-radius: var(--border-radius);
font-size: 0.7rem;
font-weight: 700;
text-transform: uppercase;
display: inline-flex;
align-items: center;
gap: 0.375rem;
border: 1px solid transparent;
}
.status-badge.online, .status-badge.success { background: rgba(184, 187, 38, 0.2); color: var(--green-light); }
.status-badge.offline, .status-badge.danger { background: rgba(251, 73, 52, 0.2); color: var(--red-light); }
.status-badge.warning { background: rgba(250, 189, 47, 0.2); color: var(--yellow-light); }
.status-badge.online, .status-badge.success { background: rgba(184, 187, 38, 0.2); color: var(--green-light); border-color: rgba(184, 187, 38, 0.4); }
.status-badge.offline, .status-badge.danger { background: rgba(251, 73, 52, 0.2); color: var(--red-light); border-color: rgba(251, 73, 52, 0.4); }
.status-badge.warning { background: rgba(250, 189, 47, 0.2); color: var(--yellow-light); border-color: rgba(250, 189, 47, 0.4); }
.badge-client {
background: var(--bg2);
color: var(--blue-light);
padding: 2px 8px;
border-radius: 6px;
font-family: monospace;
border-radius: var(--border-radius);
font-family: inherit;
font-size: 0.85rem;
border: 1px solid var(--bg3);
}
@@ -889,7 +901,7 @@ body {
width: 100%;
background: var(--bg0);
border: 1px solid var(--bg3);
border-radius: 8px;
border-radius: var(--border-radius);
padding: 0.75rem;
font-family: inherit;
font-size: 0.875rem;
@@ -900,7 +912,7 @@ body {
.form-control input:focus, .form-control textarea:focus, .form-control select:focus {
outline: none;
border-color: var(--orange);
box-shadow: 0 0 0 2px rgba(214, 93, 14, 0.2);
box-shadow: 2px 2px 0px rgba(214, 93, 14, 0.4);
}
.btn {
@@ -908,21 +920,27 @@ body {
align-items: center;
gap: 0.5rem;
padding: 0.625rem 1.25rem;
border-radius: 8px;
border-radius: var(--border-radius);
font-weight: 600;
font-size: 0.875rem;
cursor: pointer;
transition: all 0.2s;
transition: all 0.1s;
border: 1px solid transparent;
text-transform: uppercase;
}
.btn-primary { background: var(--orange); color: var(--bg0); }
.btn:active {
transform: translate(2px, 2px);
box-shadow: none !important;
}
.btn-primary { background: var(--orange); color: var(--bg0); box-shadow: 2px 2px 0px var(--bg4); }
.btn-primary:hover { background: var(--orange-light); }
.btn-secondary { background: var(--bg2); border-color: var(--bg3); color: var(--fg1); }
.btn-secondary { background: var(--bg2); border-color: var(--bg3); color: var(--fg1); box-shadow: 2px 2px 0px var(--bg0); }
.btn-secondary:hover { background: var(--bg3); color: var(--fg0); }
.btn-danger { background: var(--red); color: var(--fg0); }
.btn-danger { background: var(--red); color: var(--fg0); box-shadow: 2px 2px 0px var(--bg4); }
.btn-danger:hover { background: var(--red-light); }
/* Small inline action buttons (edit, delete, copy) */
@@ -981,13 +999,13 @@ body {
.modal-content {
background: var(--bg1);
border-radius: 16px;
border-radius: var(--border-radius);
width: 90%;
max-width: 500px;
box-shadow: var(--shadow-lg);
border: 1px solid var(--bg3);
border: 2px solid var(--bg3);
transform: translateY(20px);
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
transition: all 0.2s;
}
.modal.active .modal-content {

View File

@@ -4,11 +4,11 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Proxy Gateway - Admin Dashboard</title>
<link rel="stylesheet" href="/css/dashboard.css?v=7">
<link rel="stylesheet" href="/css/dashboard.css?v=11">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link rel="icon" href="img/logo-icon.png" type="image/png" sizes="any">
<link rel="apple-touch-icon" href="img/logo-icon.png">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=Fira+Code:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/luxon@3.4.4/build/global/luxon.min.js"></script>
</head>
@@ -17,8 +17,7 @@
<div id="login-screen" class="login-container">
<div class="login-card">
<div class="login-header">
<img src="img/logo-full.png" alt="LLM Proxy Logo" class="login-logo" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
<i class="fas fa-robot login-logo-fallback" style="display: none;"></i>
<i class="fas fa-terminal login-logo-fallback"></i>
<h1>LLM Proxy Gateway</h1>
<p class="login-subtitle">Admin Dashboard</p>
</div>

View File

@@ -284,7 +284,7 @@ class Dashboard {
<h3 class="card-title">Model Registry</h3>
<p class="card-subtitle">Manage model availability and custom pricing</p>
</div>
<div class="card-actions" style="display: flex; gap: 0.5rem; align-items: center; flex-wrap: wrap;">
<div class="card-actions">
<select id="model-provider-filter" class="form-control" style="margin-bottom: 0; padding: 4px 8px; width: auto;">
<option value="">All Providers</option>
<option value="openai">OpenAI</option>

View File

@@ -38,6 +38,24 @@ class LogsPage {
const statusClass = log.status === 'success' ? 'success' : 'danger';
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
let tokenDetails = `${log.tokens} total tokens`;
if (log.status === 'success') {
const parts = [];
parts.push(`${log.prompt_tokens} in`);
let completionStr = `${log.completion_tokens} out`;
if (log.reasoning_tokens > 0) {
completionStr += ` (${log.reasoning_tokens} reasoning)`;
}
parts.push(completionStr);
if (log.cache_read_tokens > 0) {
parts.push(`${log.cache_read_tokens} cache-hit`);
}
tokenDetails = parts.join(', ');
}
return `
<tr class="log-row">
<td class="whitespace-nowrap">${timestamp}</td>
@@ -55,7 +73,7 @@ class LogsPage {
<td>
<div class="log-message-container">
<code class="log-model">${log.model}</code>
<span class="log-tokens">${log.tokens} tokens</span>
<span class="log-tokens" title="${log.tokens} total tokens">${tokenDetails}</span>
<span class="log-duration">${log.duration}ms</span>
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
</div>