Compare commits
10 Commits
c2595f7a74
...
rust
| Author | SHA1 | Date | |
|---|---|---|---|
| 649371154f | |||
| 78fff61660 | |||
| b131094dfd | |||
| c3d81c1733 | |||
| e123f542f1 | |||
| 0d28241e39 | |||
| 754ee9cb84 | |||
| 5a9086b883 | |||
| cc5eba1957 | |||
| 3ab00fb188 |
Binary file not shown.
@@ -255,7 +255,10 @@ pub(super) async fn handle_system_logs(
|
|||||||
model,
|
model,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
|
cache_read_tokens,
|
||||||
|
cache_write_tokens,
|
||||||
cost,
|
cost,
|
||||||
status,
|
status,
|
||||||
error_message,
|
error_message,
|
||||||
@@ -279,6 +282,11 @@ pub(super) async fn handle_system_logs(
|
|||||||
"client_id": row.get::<String, _>("client_id"),
|
"client_id": row.get::<String, _>("client_id"),
|
||||||
"provider": row.get::<String, _>("provider"),
|
"provider": row.get::<String, _>("provider"),
|
||||||
"model": row.get::<String, _>("model"),
|
"model": row.get::<String, _>("model"),
|
||||||
|
"prompt_tokens": row.get::<i64, _>("prompt_tokens"),
|
||||||
|
"completion_tokens": row.get::<i64, _>("completion_tokens"),
|
||||||
|
"reasoning_tokens": row.get::<i64, _>("reasoning_tokens"),
|
||||||
|
"cache_read_tokens": row.get::<i64, _>("cache_read_tokens"),
|
||||||
|
"cache_write_tokens": row.get::<i64, _>("cache_write_tokens"),
|
||||||
"tokens": row.get::<i64, _>("total_tokens"),
|
"tokens": row.get::<i64, _>("total_tokens"),
|
||||||
"cost": row.get::<f64, _>("cost"),
|
"cost": row.get::<f64, _>("cost"),
|
||||||
"status": row.get::<String, _>("status"),
|
"status": row.get::<String, _>("status"),
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
model TEXT,
|
model TEXT,
|
||||||
prompt_tokens INTEGER,
|
prompt_tokens INTEGER,
|
||||||
completion_tokens INTEGER,
|
completion_tokens INTEGER,
|
||||||
|
reasoning_tokens INTEGER DEFAULT 0,
|
||||||
total_tokens INTEGER,
|
total_tokens INTEGER,
|
||||||
cost REAL,
|
cost REAL,
|
||||||
has_images BOOLEAN DEFAULT FALSE,
|
has_images BOOLEAN DEFAULT FALSE,
|
||||||
@@ -172,6 +173,9 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
|
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN cache_write_tokens INTEGER DEFAULT 0")
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await;
|
.await;
|
||||||
|
let _ = sqlx::query("ALTER TABLE llm_requests ADD COLUMN reasoning_tokens INTEGER DEFAULT 0")
|
||||||
|
.execute(pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
// Add billing_mode column if it doesn't exist (migration for existing DBs)
|
// Add billing_mode column if it doesn't exist (migration for existing DBs)
|
||||||
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")
|
let _ = sqlx::query("ALTER TABLE provider_configs ADD COLUMN billing_mode TEXT")
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ pub struct RequestLog {
|
|||||||
pub model: String,
|
pub model: String,
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
|
pub reasoning_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cache_read_tokens: u32,
|
pub cache_read_tokens: u32,
|
||||||
pub cache_write_tokens: u32,
|
pub cache_write_tokens: u32,
|
||||||
@@ -77,8 +78,8 @@ impl RequestLogger {
|
|||||||
sqlx::query(
|
sqlx::query(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO llm_requests
|
INSERT INTO llm_requests
|
||||||
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
|
(timestamp, client_id, provider, model, prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens, cost, has_images, status, error_message, duration_ms, request_body, response_body)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(log.timestamp)
|
.bind(log.timestamp)
|
||||||
@@ -87,6 +88,7 @@ impl RequestLogger {
|
|||||||
.bind(&log.model)
|
.bind(&log.model)
|
||||||
.bind(log.prompt_tokens as i64)
|
.bind(log.prompt_tokens as i64)
|
||||||
.bind(log.completion_tokens as i64)
|
.bind(log.completion_tokens as i64)
|
||||||
|
.bind(log.reasoning_tokens as i64)
|
||||||
.bind(log.total_tokens as i64)
|
.bind(log.total_tokens as i64)
|
||||||
.bind(log.cache_read_tokens as i64)
|
.bind(log.cache_read_tokens as i64)
|
||||||
.bind(log.cache_write_tokens as i64)
|
.bind(log.cache_write_tokens as i64)
|
||||||
|
|||||||
@@ -165,6 +165,8 @@ pub struct Usage {
|
|||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub reasoning_tokens: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub cache_read_tokens: Option<u32>,
|
pub cache_read_tokens: Option<u32>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub cache_write_tokens: Option<u32>,
|
pub cache_write_tokens: Option<u32>,
|
||||||
@@ -179,6 +181,8 @@ pub struct ChatCompletionStreamResponse {
|
|||||||
pub created: u64,
|
pub created: u64,
|
||||||
pub model: String,
|
pub model: String,
|
||||||
pub choices: Vec<ChatStreamChoice>,
|
pub choices: Vec<ChatStreamChoice>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub usage: Option<Usage>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|||||||
@@ -128,17 +128,36 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
cache_write_tokens: u32,
|
cache_write_tokens: u32,
|
||||||
registry: &crate::models::registry::ModelRegistry,
|
registry: &crate::models::registry::ModelRegistry,
|
||||||
) -> f64 {
|
) -> f64 {
|
||||||
helpers::calculate_cost_with_registry(
|
if let Some(metadata) = registry.find_model(model) {
|
||||||
model,
|
if metadata.cost.is_some() {
|
||||||
prompt_tokens,
|
return helpers::calculate_cost_with_registry(
|
||||||
completion_tokens,
|
model,
|
||||||
cache_read_tokens,
|
prompt_tokens,
|
||||||
cache_write_tokens,
|
completion_tokens,
|
||||||
registry,
|
cache_read_tokens,
|
||||||
&self.pricing,
|
cache_write_tokens,
|
||||||
0.14,
|
registry,
|
||||||
0.28,
|
&self.pricing,
|
||||||
)
|
0.28,
|
||||||
|
0.42,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom DeepSeek fallback that correctly handles cache hits
|
||||||
|
let (prompt_rate, completion_rate) = self
|
||||||
|
.pricing
|
||||||
|
.iter()
|
||||||
|
.find(|p| model.contains(&p.model))
|
||||||
|
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
||||||
|
.unwrap_or((0.28, 0.42)); // Default to DeepSeek's current API pricing
|
||||||
|
|
||||||
|
let cache_hit_rate = prompt_rate / 10.0;
|
||||||
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
|
||||||
|
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
|
||||||
|
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
|
||||||
|
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_completion_stream(
|
async fn chat_completion_stream(
|
||||||
|
|||||||
@@ -722,6 +722,10 @@ impl super::Provider for GeminiProvider {
|
|||||||
let reasoning_content = candidate
|
let reasoning_content = candidate
|
||||||
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
|
.and_then(|c| c.content.parts.iter().find_map(|p| p.thought.clone()));
|
||||||
|
|
||||||
|
let reasoning_tokens = reasoning_content.as_ref()
|
||||||
|
.map(|r| crate::utils::tokens::estimate_completion_tokens(r, &model))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
// Extract function calls → OpenAI tool_calls
|
// Extract function calls → OpenAI tool_calls
|
||||||
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
|
let tool_calls = candidate.and_then(|c| Self::extract_tool_calls(&c.content.parts));
|
||||||
|
|
||||||
@@ -752,6 +756,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
tool_calls,
|
tool_calls,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
|
cache_write_tokens: 0, // Gemini doesn't report cache writes separately
|
||||||
@@ -772,17 +777,36 @@ impl super::Provider for GeminiProvider {
|
|||||||
cache_write_tokens: u32,
|
cache_write_tokens: u32,
|
||||||
registry: &crate::models::registry::ModelRegistry,
|
registry: &crate::models::registry::ModelRegistry,
|
||||||
) -> f64 {
|
) -> f64 {
|
||||||
super::helpers::calculate_cost_with_registry(
|
if let Some(metadata) = registry.find_model(model) {
|
||||||
model,
|
if metadata.cost.is_some() {
|
||||||
prompt_tokens,
|
return super::helpers::calculate_cost_with_registry(
|
||||||
completion_tokens,
|
model,
|
||||||
cache_read_tokens,
|
prompt_tokens,
|
||||||
cache_write_tokens,
|
completion_tokens,
|
||||||
registry,
|
cache_read_tokens,
|
||||||
&self.pricing,
|
cache_write_tokens,
|
||||||
0.075,
|
registry,
|
||||||
0.30,
|
&self.pricing,
|
||||||
)
|
0.075,
|
||||||
|
0.30,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom Gemini fallback that correctly handles cache hits (25% of input cost)
|
||||||
|
let (prompt_rate, completion_rate) = self
|
||||||
|
.pricing
|
||||||
|
.iter()
|
||||||
|
.find(|p| model.contains(&p.model))
|
||||||
|
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
||||||
|
.unwrap_or((0.075, 0.30)); // Default to Gemini 1.5 Flash current API pricing
|
||||||
|
|
||||||
|
let cache_hit_rate = prompt_rate * 0.25;
|
||||||
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
|
||||||
|
(non_cached_prompt as f64 * prompt_rate / 1_000_000.0)
|
||||||
|
+ (cache_read_tokens as f64 * cache_hit_rate / 1_000_000.0)
|
||||||
|
+ (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_completion_stream(
|
async fn chat_completion_stream(
|
||||||
@@ -883,6 +907,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
super::StreamUsage {
|
super::StreamUsage {
|
||||||
prompt_tokens: u.prompt_token_count,
|
prompt_tokens: u.prompt_token_count,
|
||||||
completion_tokens: u.candidates_token_count,
|
completion_tokens: u.candidates_token_count,
|
||||||
|
reasoning_tokens: 0,
|
||||||
total_tokens: u.total_token_count,
|
total_tokens: u.total_token_count,
|
||||||
cache_read_tokens: u.cached_content_token_count,
|
cache_read_tokens: u.cached_content_token_count,
|
||||||
cache_write_tokens: 0,
|
cache_write_tokens: 0,
|
||||||
|
|||||||
@@ -254,6 +254,11 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
|||||||
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
let completion_tokens = usage["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
|
let total_tokens = usage["total_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
|
|
||||||
|
// Extract reasoning tokens
|
||||||
|
let reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
|
||||||
|
.as_u64()
|
||||||
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
|
// Extract cache tokens — try OpenAI/Grok format first, then DeepSeek format
|
||||||
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
|
let cache_read_tokens = usage["prompt_tokens_details"]["cached_tokens"]
|
||||||
.as_u64()
|
.as_u64()
|
||||||
@@ -261,9 +266,9 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
|||||||
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
|
.or_else(|| usage["prompt_cache_hit_tokens"].as_u64())
|
||||||
.unwrap_or(0) as u32;
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
// DeepSeek reports cache_write as prompt_cache_miss_tokens (tokens written to cache for future use).
|
// DeepSeek reports prompt_cache_miss_tokens which are just regular non-cached tokens.
|
||||||
// OpenAI doesn't report cache_write in this location, but may in the future.
|
// They do not incur a separate cache_write fee, so we don't map them here to avoid double-charging.
|
||||||
let cache_write_tokens = usage["prompt_cache_miss_tokens"].as_u64().unwrap_or(0) as u32;
|
let cache_write_tokens = 0;
|
||||||
|
|
||||||
Ok(ProviderResponse {
|
Ok(ProviderResponse {
|
||||||
content,
|
content,
|
||||||
@@ -271,6 +276,7 @@ pub fn parse_openai_response(resp_json: &Value, model: String) -> Result<Provide
|
|||||||
tool_calls,
|
tool_calls,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
@@ -295,18 +301,21 @@ pub fn parse_openai_stream_chunk(
|
|||||||
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
let completion_tokens = u["completion_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
|
let total_tokens = u["total_tokens"].as_u64().unwrap_or(0) as u32;
|
||||||
|
|
||||||
|
let reasoning_tokens = u["completion_tokens_details"]["reasoning_tokens"]
|
||||||
|
.as_u64()
|
||||||
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
|
let cache_read_tokens = u["prompt_tokens_details"]["cached_tokens"]
|
||||||
.as_u64()
|
.as_u64()
|
||||||
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
|
.or_else(|| u["prompt_cache_hit_tokens"].as_u64())
|
||||||
.unwrap_or(0) as u32;
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
let cache_write_tokens = u["prompt_cache_miss_tokens"]
|
let cache_write_tokens = 0;
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0) as u32;
|
|
||||||
|
|
||||||
Some(StreamUsage {
|
Some(StreamUsage {
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ pub struct ProviderResponse {
|
|||||||
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
|
pub tool_calls: Option<Vec<crate::models::ToolCall>>,
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
|
pub reasoning_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cache_read_tokens: u32,
|
pub cache_read_tokens: u32,
|
||||||
pub cache_write_tokens: u32,
|
pub cache_write_tokens: u32,
|
||||||
@@ -86,6 +87,7 @@ pub struct ProviderResponse {
|
|||||||
pub struct StreamUsage {
|
pub struct StreamUsage {
|
||||||
pub prompt_tokens: u32,
|
pub prompt_tokens: u32,
|
||||||
pub completion_tokens: u32,
|
pub completion_tokens: u32,
|
||||||
|
pub reasoning_tokens: u32,
|
||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cache_read_tokens: u32,
|
pub cache_read_tokens: u32,
|
||||||
pub cache_write_tokens: u32,
|
pub cache_write_tokens: u32,
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ impl OpenAIProvider {
|
|||||||
.timeout(std::time::Duration::from_secs(300))
|
.timeout(std::time::Duration::from_secs(300))
|
||||||
.pool_idle_timeout(std::time::Duration::from_secs(90))
|
.pool_idle_timeout(std::time::Duration::from_secs(90))
|
||||||
.pool_max_idle_per_host(4)
|
.pool_max_idle_per_host(4)
|
||||||
.tcp_keepalive(std::time::Duration::from_secs(30))
|
.tcp_keepalive(std::time::Duration::from_secs(15))
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
@@ -92,96 +92,7 @@ impl super::Provider for OpenAIProvider {
|
|||||||
// Read error body to diagnose. If the model requires the Responses
|
// Read error body to diagnose. If the model requires the Responses
|
||||||
// API (v1/responses), retry against that endpoint.
|
// API (v1/responses), retry against that endpoint.
|
||||||
if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") {
|
if error_text.to_lowercase().contains("v1/responses") || error_text.to_lowercase().contains("only supported in v1/responses") {
|
||||||
// Build a simple `input` string by concatenating message parts.
|
return self.chat_responses(request).await;
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
|
||||||
let mut inputs: Vec<String> = Vec::new();
|
|
||||||
for m in &messages_json {
|
|
||||||
let role = m["role"].as_str().unwrap_or("");
|
|
||||||
let parts = m.get("content").and_then(|c| c.as_array()).cloned().unwrap_or_default();
|
|
||||||
let mut text_parts = Vec::new();
|
|
||||||
for p in parts {
|
|
||||||
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
|
||||||
text_parts.push(t.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
inputs.push(format!("{}: {}", role, text_parts.join("")));
|
|
||||||
}
|
|
||||||
let input_text = inputs.join("\n");
|
|
||||||
|
|
||||||
let resp = self
|
|
||||||
.client
|
|
||||||
.post(format!("{}/responses", self.config.base_url))
|
|
||||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
|
||||||
.json(&serde_json::json!({ "model": request.model, "input": input_text }))
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
|
||||||
|
|
||||||
if !resp.status().is_success() {
|
|
||||||
let err = resp.text().await.unwrap_or_default();
|
|
||||||
return Err(AppError::ProviderError(format!("OpenAI Responses API error: {}", err)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
|
||||||
// Try to normalize: if it's chat-style, use existing parser
|
|
||||||
if resp_json.get("choices").is_some() {
|
|
||||||
return helpers::parse_openai_response(&resp_json, request.model);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Responses API: try to extract text from `output` or `candidates`
|
|
||||||
let mut content_text = String::new();
|
|
||||||
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
|
|
||||||
if let Some(first) = output.get(0) {
|
|
||||||
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
|
|
||||||
for item in contents {
|
|
||||||
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
|
||||||
content_text.push_str(text);
|
|
||||||
} else if let Some(parts) = item.get("parts").and_then(|p| p.as_array()) {
|
|
||||||
for p in parts {
|
|
||||||
if let Some(t) = p.as_str() {
|
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
|
||||||
content_text.push_str(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if content_text.is_empty() {
|
|
||||||
if let Some(cands) = resp_json.get("candidates").and_then(|c| c.as_array()) {
|
|
||||||
if let Some(c0) = cands.get(0) {
|
|
||||||
if let Some(content) = c0.get("content") {
|
|
||||||
if let Some(parts) = content.get("parts").and_then(|p| p.as_array()) {
|
|
||||||
for p in parts {
|
|
||||||
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
|
||||||
content_text.push_str(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let prompt_tokens = resp_json.get("usage").and_then(|u| u.get("prompt_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
|
||||||
let completion_tokens = resp_json.get("usage").and_then(|u| u.get("completion_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
|
||||||
let total_tokens = resp_json.get("usage").and_then(|u| u.get("total_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
|
||||||
|
|
||||||
return Ok(ProviderResponse {
|
|
||||||
content: content_text,
|
|
||||||
reasoning_content: None,
|
|
||||||
tool_calls: None,
|
|
||||||
prompt_tokens,
|
|
||||||
completion_tokens,
|
|
||||||
total_tokens,
|
|
||||||
cache_read_tokens: 0,
|
|
||||||
cache_write_tokens: 0,
|
|
||||||
model: request.model,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::error!("OpenAI API error ({}): {}", status, error_text);
|
tracing::error!("OpenAI API error ({}): {}", status, error_text);
|
||||||
@@ -201,8 +112,41 @@ impl super::Provider for OpenAIProvider {
|
|||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut input_parts = Vec::new();
|
let mut input_parts = Vec::new();
|
||||||
for m in &messages_json {
|
for m in &messages_json {
|
||||||
let role = m["role"].as_str().unwrap_or("user");
|
let mut role = m["role"].as_str().unwrap_or("user").to_string();
|
||||||
let content = m.get("content").cloned().unwrap_or(serde_json::json!(""));
|
// Newer models (gpt-5, o1) prefer "developer" over "system"
|
||||||
|
if role == "system" {
|
||||||
|
role = "developer".to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
|
||||||
|
|
||||||
|
// Map content types based on role for Responses API
|
||||||
|
if let Some(content_array) = content.as_array_mut() {
|
||||||
|
for part in content_array {
|
||||||
|
if let Some(part_obj) = part.as_object_mut() {
|
||||||
|
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
|
||||||
|
match t {
|
||||||
|
"text" => {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
}
|
||||||
|
"image_url" => {
|
||||||
|
// Assistant typically doesn't have image_url in history this way, but for safety:
|
||||||
|
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
if let Some(img_url) = part_obj.remove("image_url") {
|
||||||
|
part_obj.insert("image".to_string(), img_url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(text) = content.as_str() {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
content = serde_json::json!([{ "type": new_type, "text": text }]);
|
||||||
|
}
|
||||||
|
|
||||||
input_parts.push(serde_json::json!({
|
input_parts.push(serde_json::json!({
|
||||||
"role": role,
|
"role": role,
|
||||||
@@ -210,11 +154,34 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut body = serde_json::json!({
|
||||||
|
"model": request.model,
|
||||||
|
"input": input_parts,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add standard parameters
|
||||||
|
if let Some(temp) = request.temperature {
|
||||||
|
body["temperature"] = serde_json::json!(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
|
||||||
|
if let Some(max_tokens) = request.max_tokens {
|
||||||
|
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
|
||||||
|
body["max_output_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
} else {
|
||||||
|
body["max_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(tools) = &request.tools {
|
||||||
|
body["tools"] = serde_json::json!(tools);
|
||||||
|
}
|
||||||
|
|
||||||
let resp = self
|
let resp = self
|
||||||
.client
|
.client
|
||||||
.post(format!("{}/responses", self.config.base_url))
|
.post(format!("{}/responses", self.config.base_url))
|
||||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||||
.json(&serde_json::json!({ "model": request.model, "input": input_parts }))
|
.json(&body)
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
||||||
@@ -226,11 +193,16 @@ impl super::Provider for OpenAIProvider {
|
|||||||
|
|
||||||
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
let resp_json: serde_json::Value = resp.json().await.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Try to normalize: if it's chat-style, use existing parser
|
||||||
|
if resp_json.get("choices").is_some() {
|
||||||
|
return helpers::parse_openai_response(&resp_json, request.model);
|
||||||
|
}
|
||||||
|
|
||||||
// Normalize Responses API output into ProviderResponse
|
// Normalize Responses API output into ProviderResponse
|
||||||
let mut content_text = String::new();
|
let mut content_text = String::new();
|
||||||
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
|
if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
|
||||||
if let Some(first) = output.get(0) {
|
for out in output {
|
||||||
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
|
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
|
||||||
for item in contents {
|
for item in contents {
|
||||||
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
||||||
if !content_text.is_empty() { content_text.push_str("\n"); }
|
if !content_text.is_empty() { content_text.push_str("\n"); }
|
||||||
@@ -275,6 +247,7 @@ impl super::Provider for OpenAIProvider {
|
|||||||
tool_calls: None,
|
tool_calls: None,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens: 0,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens: 0,
|
cache_read_tokens: 0,
|
||||||
cache_write_tokens: 0,
|
cache_write_tokens: 0,
|
||||||
@@ -312,6 +285,12 @@ impl super::Provider for OpenAIProvider {
|
|||||||
&self,
|
&self,
|
||||||
request: UnifiedRequest,
|
request: UnifiedRequest,
|
||||||
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
||||||
|
// Allow proactive routing to Responses API based on heuristic
|
||||||
|
let model_lc = request.model.to_lowercase();
|
||||||
|
if model_lc.contains("gpt-5") || model_lc.contains("codex") {
|
||||||
|
return self.chat_responses_stream(request).await;
|
||||||
|
}
|
||||||
|
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut body = helpers::build_openai_body(&request, messages_json, true);
|
let mut body = helpers::build_openai_body(&request, messages_json, true);
|
||||||
|
|
||||||
@@ -400,8 +379,41 @@ impl super::Provider for OpenAIProvider {
|
|||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut input_parts = Vec::new();
|
let mut input_parts = Vec::new();
|
||||||
for m in &messages_json {
|
for m in &messages_json {
|
||||||
let role = m["role"].as_str().unwrap_or("user");
|
let mut role = m["role"].as_str().unwrap_or("user").to_string();
|
||||||
let content = m.get("content").cloned().unwrap_or(serde_json::json!(""));
|
// Newer models (gpt-5, o1) prefer "developer" over "system"
|
||||||
|
if role == "system" {
|
||||||
|
role = "developer".to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
|
||||||
|
|
||||||
|
// Map content types based on role for Responses API
|
||||||
|
if let Some(content_array) = content.as_array_mut() {
|
||||||
|
for part in content_array {
|
||||||
|
if let Some(part_obj) = part.as_object_mut() {
|
||||||
|
if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
|
||||||
|
match t {
|
||||||
|
"text" => {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
}
|
||||||
|
"image_url" => {
|
||||||
|
// Assistant typically doesn't have image_url in history this way, but for safety:
|
||||||
|
let new_type = if role == "assistant" { "output_image" } else { "input_image" };
|
||||||
|
part_obj.insert("type".to_string(), serde_json::json!(new_type));
|
||||||
|
if let Some(img_url) = part_obj.remove("image_url") {
|
||||||
|
part_obj.insert("image".to_string(), img_url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(text) = content.as_str() {
|
||||||
|
let new_type = if role == "assistant" { "output_text" } else { "input_text" };
|
||||||
|
content = serde_json::json!([{ "type": new_type, "text": text }]);
|
||||||
|
}
|
||||||
|
|
||||||
input_parts.push(serde_json::json!({
|
input_parts.push(serde_json::json!({
|
||||||
"role": role,
|
"role": role,
|
||||||
@@ -409,12 +421,26 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
let body = serde_json::json!({
|
let mut body = serde_json::json!({
|
||||||
"model": request.model,
|
"model": request.model,
|
||||||
"input": input_parts,
|
"input": input_parts,
|
||||||
"stream": true
|
"stream": true,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Add standard parameters
|
||||||
|
if let Some(temp) = request.temperature {
|
||||||
|
body["temperature"] = serde_json::json!(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newer models (gpt-5, o1) in Responses API use max_output_tokens
|
||||||
|
if let Some(max_tokens) = request.max_tokens {
|
||||||
|
if request.model.contains("gpt-5") || request.model.starts_with("o1-") || request.model.starts_with("o3-") {
|
||||||
|
body["max_output_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
} else {
|
||||||
|
body["max_tokens"] = serde_json::json!(max_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let url = format!("{}/responses", self.config.base_url);
|
let url = format!("{}/responses", self.config.base_url);
|
||||||
let api_key = self.api_key.clone();
|
let api_key = self.api_key.clone();
|
||||||
let model = request.model.clone();
|
let model = request.model.clone();
|
||||||
@@ -425,6 +451,7 @@ impl super::Provider for OpenAIProvider {
|
|||||||
self.client
|
self.client
|
||||||
.post(&url)
|
.post(&url)
|
||||||
.header("Authorization", format!("Bearer {}", api_key))
|
.header("Authorization", format!("Bearer {}", api_key))
|
||||||
|
.header("Accept", "text/event-stream")
|
||||||
.json(&body),
|
.json(&body),
|
||||||
)
|
)
|
||||||
.map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?;
|
.map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?;
|
||||||
@@ -441,36 +468,44 @@ impl super::Provider for OpenAIProvider {
|
|||||||
let chunk: serde_json::Value = serde_json::from_str(&msg.data)
|
let chunk: serde_json::Value = serde_json::from_str(&msg.data)
|
||||||
.map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?;
|
.map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?;
|
||||||
|
|
||||||
// Try standard OpenAI parsing first
|
// Try standard OpenAI parsing first (choices/usage)
|
||||||
if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) {
|
if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) {
|
||||||
yield p_chunk?;
|
yield p_chunk?;
|
||||||
} else {
|
} else {
|
||||||
// Responses API specific parsing for streaming
|
// Responses API specific parsing for streaming
|
||||||
// Often it follows a similar structure to the non-streaming response but in chunks
|
|
||||||
let mut content = String::new();
|
let mut content = String::new();
|
||||||
|
let mut finish_reason = None;
|
||||||
|
|
||||||
// Check for output[0].content[0].text (similar to non-stream)
|
let event_type = chunk.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
|
|
||||||
if let Some(first) = output.get(0) {
|
match event_type {
|
||||||
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
|
"response.output_text.delta" => {
|
||||||
for item in contents {
|
if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
|
||||||
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
content.push_str(delta);
|
||||||
content.push_str(text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
"response.output_text.done" => {
|
||||||
|
if let Some(text) = chunk.get("text").and_then(|v| v.as_str()) {
|
||||||
// Check for candidates[0].content.parts[0].text (Gemini-like, which OpenAI sometimes uses for v1/responses)
|
// Some implementations send the full text at the end
|
||||||
if content.is_empty() {
|
// We usually prefer deltas, but if we haven't seen them, this is the fallback.
|
||||||
if let Some(cands) = chunk.get("candidates").and_then(|c| c.as_array()) {
|
// However, if we're already yielding deltas, we might not want this.
|
||||||
if let Some(c0) = cands.get(0) {
|
// For now, let's just use it as a signal that we're done.
|
||||||
if let Some(content_obj) = c0.get("content") {
|
finish_reason = Some("stop".to_string());
|
||||||
if let Some(parts) = content_obj.get("parts").and_then(|p| p.as_array()) {
|
}
|
||||||
for p in parts {
|
}
|
||||||
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
"response.done" => {
|
||||||
content.push_str(t);
|
finish_reason = Some("stop".to_string());
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Fallback to older nested structure if present
|
||||||
|
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
|
||||||
|
for out in output {
|
||||||
|
if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
|
||||||
|
for item in contents {
|
||||||
|
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
||||||
|
content.push_str(text);
|
||||||
|
} else if let Some(delta) = item.get("delta").and_then(|d| d.get("text")).and_then(|t| t.as_str()) {
|
||||||
|
content.push_str(delta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -479,11 +514,11 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !content.is_empty() {
|
if !content.is_empty() || finish_reason.is_some() {
|
||||||
yield ProviderStreamChunk {
|
yield ProviderStreamChunk {
|
||||||
content,
|
content,
|
||||||
reasoning_content: None,
|
reasoning_content: None,
|
||||||
finish_reason: None,
|
finish_reason,
|
||||||
tool_calls: None,
|
tool_calls: None,
|
||||||
model: model.clone(),
|
model: model.clone(),
|
||||||
usage: None,
|
usage: None,
|
||||||
@@ -497,19 +532,22 @@ impl super::Provider for OpenAIProvider {
|
|||||||
let probe_resp = probe_client
|
let probe_resp = probe_client
|
||||||
.post(&url)
|
.post(&url)
|
||||||
.header("Authorization", format!("Bearer {}", api_key))
|
.header("Authorization", format!("Bearer {}", api_key))
|
||||||
|
.header("Accept", "application/json") // Ask for JSON during probe
|
||||||
.json(&probe_body)
|
.json(&probe_body)
|
||||||
.send()
|
.send()
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
match probe_resp {
|
match probe_resp {
|
||||||
Ok(r) if !r.status().is_success() => {
|
Ok(r) => {
|
||||||
let status = r.status();
|
let status = r.status();
|
||||||
let error_body = r.text().await.unwrap_or_default();
|
let body = r.text().await.unwrap_or_default();
|
||||||
tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, error_body);
|
if status.is_success() {
|
||||||
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, error_body)))?;
|
tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", body);
|
||||||
}
|
Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", body)))?;
|
||||||
Ok(_) => {
|
} else {
|
||||||
Err(AppError::ProviderError(format!("Responses stream error (probe returned 200): {}", e)))?;
|
tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, body);
|
||||||
|
Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, body)))?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(probe_err) => {
|
Err(probe_err) => {
|
||||||
tracing::error!("OpenAI Responses Stream Error Probe failed: {}", probe_err);
|
tracing::error!("OpenAI Responses Stream Error Probe failed: {}", probe_err);
|
||||||
|
|||||||
@@ -312,6 +312,14 @@ async fn chat_completions(
|
|||||||
},
|
},
|
||||||
finish_reason: chunk.finish_reason,
|
finish_reason: chunk.finish_reason,
|
||||||
}],
|
}],
|
||||||
|
usage: chunk.usage.as_ref().map(|u| crate::models::Usage {
|
||||||
|
prompt_tokens: u.prompt_tokens,
|
||||||
|
completion_tokens: u.completion_tokens,
|
||||||
|
total_tokens: u.total_tokens,
|
||||||
|
reasoning_tokens: if u.reasoning_tokens > 0 { Some(u.reasoning_tokens) } else { None },
|
||||||
|
cache_read_tokens: if u.cache_read_tokens > 0 { Some(u.cache_read_tokens) } else { None },
|
||||||
|
cache_write_tokens: if u.cache_write_tokens > 0 { Some(u.cache_write_tokens) } else { None },
|
||||||
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Use axum's Event directly, wrap in Ok
|
// Use axum's Event directly, wrap in Ok
|
||||||
@@ -383,6 +391,7 @@ async fn chat_completions(
|
|||||||
model: response.model.clone(),
|
model: response.model.clone(),
|
||||||
prompt_tokens: response.prompt_tokens,
|
prompt_tokens: response.prompt_tokens,
|
||||||
completion_tokens: response.completion_tokens,
|
completion_tokens: response.completion_tokens,
|
||||||
|
reasoning_tokens: response.reasoning_tokens,
|
||||||
total_tokens: response.total_tokens,
|
total_tokens: response.total_tokens,
|
||||||
cache_read_tokens: response.cache_read_tokens,
|
cache_read_tokens: response.cache_read_tokens,
|
||||||
cache_write_tokens: response.cache_write_tokens,
|
cache_write_tokens: response.cache_write_tokens,
|
||||||
@@ -423,6 +432,7 @@ async fn chat_completions(
|
|||||||
prompt_tokens: response.prompt_tokens,
|
prompt_tokens: response.prompt_tokens,
|
||||||
completion_tokens: response.completion_tokens,
|
completion_tokens: response.completion_tokens,
|
||||||
total_tokens: response.total_tokens,
|
total_tokens: response.total_tokens,
|
||||||
|
reasoning_tokens: if response.reasoning_tokens > 0 { Some(response.reasoning_tokens) } else { None },
|
||||||
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
|
cache_read_tokens: if response.cache_read_tokens > 0 { Some(response.cache_read_tokens) } else { None },
|
||||||
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
|
cache_write_tokens: if response.cache_write_tokens > 0 { Some(response.cache_write_tokens) } else { None },
|
||||||
}),
|
}),
|
||||||
@@ -452,6 +462,7 @@ async fn chat_completions(
|
|||||||
model: model.clone(),
|
model: model.clone(),
|
||||||
prompt_tokens: 0,
|
prompt_tokens: 0,
|
||||||
completion_tokens: 0,
|
completion_tokens: 0,
|
||||||
|
reasoning_tokens: 0,
|
||||||
total_tokens: 0,
|
total_tokens: 0,
|
||||||
cache_read_tokens: 0,
|
cache_read_tokens: 0,
|
||||||
cache_write_tokens: 0,
|
cache_write_tokens: 0,
|
||||||
|
|||||||
@@ -96,11 +96,12 @@ where
|
|||||||
// Spawn a background task to log the completion
|
// Spawn a background task to log the completion
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
// Use real usage from the provider when available, otherwise fall back to estimates
|
// Use real usage from the provider when available, otherwise fall back to estimates
|
||||||
let (prompt_tokens, completion_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
|
let (prompt_tokens, completion_tokens, reasoning_tokens, total_tokens, cache_read_tokens, cache_write_tokens) =
|
||||||
if let Some(usage) = &real_usage {
|
if let Some(usage) = &real_usage {
|
||||||
(
|
(
|
||||||
usage.prompt_tokens,
|
usage.prompt_tokens,
|
||||||
usage.completion_tokens,
|
usage.completion_tokens,
|
||||||
|
usage.reasoning_tokens,
|
||||||
usage.total_tokens,
|
usage.total_tokens,
|
||||||
usage.cache_read_tokens,
|
usage.cache_read_tokens,
|
||||||
usage.cache_write_tokens,
|
usage.cache_write_tokens,
|
||||||
@@ -109,6 +110,7 @@ where
|
|||||||
(
|
(
|
||||||
estimated_prompt_tokens,
|
estimated_prompt_tokens,
|
||||||
estimated_completion,
|
estimated_completion,
|
||||||
|
estimated_reasoning_tokens,
|
||||||
estimated_prompt_tokens + estimated_completion,
|
estimated_prompt_tokens + estimated_completion,
|
||||||
0u32,
|
0u32,
|
||||||
0u32,
|
0u32,
|
||||||
@@ -163,6 +165,7 @@ where
|
|||||||
model,
|
model,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
|
reasoning_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
cache_read_tokens,
|
cache_read_tokens,
|
||||||
cache_write_tokens,
|
cache_write_tokens,
|
||||||
|
|||||||
@@ -38,6 +38,24 @@ class LogsPage {
|
|||||||
const statusClass = log.status === 'success' ? 'success' : 'danger';
|
const statusClass = log.status === 'success' ? 'success' : 'danger';
|
||||||
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
|
const timestamp = luxon.DateTime.fromISO(log.timestamp).toFormat('yyyy-MM-dd HH:mm:ss');
|
||||||
|
|
||||||
|
let tokenDetails = `${log.tokens} total tokens`;
|
||||||
|
if (log.status === 'success') {
|
||||||
|
const parts = [];
|
||||||
|
parts.push(`${log.prompt_tokens} in`);
|
||||||
|
|
||||||
|
let completionStr = `${log.completion_tokens} out`;
|
||||||
|
if (log.reasoning_tokens > 0) {
|
||||||
|
completionStr += ` (${log.reasoning_tokens} reasoning)`;
|
||||||
|
}
|
||||||
|
parts.push(completionStr);
|
||||||
|
|
||||||
|
if (log.cache_read_tokens > 0) {
|
||||||
|
parts.push(`${log.cache_read_tokens} cache-hit`);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenDetails = parts.join(', ');
|
||||||
|
}
|
||||||
|
|
||||||
return `
|
return `
|
||||||
<tr class="log-row">
|
<tr class="log-row">
|
||||||
<td class="whitespace-nowrap">${timestamp}</td>
|
<td class="whitespace-nowrap">${timestamp}</td>
|
||||||
@@ -55,7 +73,7 @@ class LogsPage {
|
|||||||
<td>
|
<td>
|
||||||
<div class="log-message-container">
|
<div class="log-message-container">
|
||||||
<code class="log-model">${log.model}</code>
|
<code class="log-model">${log.model}</code>
|
||||||
<span class="log-tokens">${log.tokens} tokens</span>
|
<span class="log-tokens" title="${log.tokens} total tokens">${tokenDetails}</span>
|
||||||
<span class="log-duration">${log.duration}ms</span>
|
<span class="log-duration">${log.duration}ms</span>
|
||||||
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
|
${log.error ? `<div class="log-error-msg">${log.error}</div>` : ''}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user