Compare commits
2 Commits
633b69a07b
...
4be23629d8
| Author | SHA1 | Date | |
|---|---|---|---|
| 4be23629d8 | |||
| dd54c14ff8 |
6
.env
6
.env
@@ -19,4 +19,8 @@ LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
|||||||
LLM_PROXY__SERVER__PORT=8080
|
LLM_PROXY__SERVER__PORT=8080
|
||||||
|
|
||||||
# Database path (optional)
|
# Database path (optional)
|
||||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||||
|
|
||||||
|
SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0
|
||||||
|
|
||||||
|
LLM_PROXY__ENCRYPTION_KEY=eac0239bfc402c7eb888366dd76c314288a8693efd5b7457819aeaf1fe429ac2
|
||||||
|
|||||||
BIN
data/backups/llm_proxy.db.20260303T204435Z
Normal file
BIN
data/backups/llm_proxy.db.20260303T204435Z
Normal file
Binary file not shown.
BIN
data/backups/llm_proxy.db.20260303T205057Z
Normal file
BIN
data/backups/llm_proxy.db.20260303T205057Z
Normal file
Binary file not shown.
Binary file not shown.
@@ -88,7 +88,7 @@ pub(super) async fn handle_create_client(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Json(payload): Json<CreateClientRequest>,
|
Json(payload): Json<CreateClientRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -199,7 +199,7 @@ pub(super) async fn handle_update_client(
|
|||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
Json(payload): Json<UpdateClientPayload>,
|
Json(payload): Json<UpdateClientPayload>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -296,7 +296,7 @@ pub(super) async fn handle_delete_client(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -440,7 +440,7 @@ pub(super) async fn handle_create_client_token(
|
|||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
Json(payload): Json<CreateTokenRequest>,
|
Json(payload): Json<CreateTokenRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -489,7 +489,7 @@ pub(super) async fn handle_delete_client_token(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Path((client_id, token_id)): Path<(String, i64)>,
|
Path((client_id, token_id)): Path<(String, i64)>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ pub(super) async fn handle_update_model(
|
|||||||
Path(id): Path<String>,
|
Path(id): Path<String>,
|
||||||
Json(payload): Json<UpdateModelRequest>,
|
Json(payload): Json<UpdateModelRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -266,7 +266,7 @@ pub(super) async fn handle_update_provider(
|
|||||||
Path(name): Path<String>,
|
Path(name): Path<String>,
|
||||||
Json(payload): Json<UpdateProviderRequest>,
|
Json(payload): Json<UpdateProviderRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -279,7 +279,7 @@ pub(super) async fn handle_system_backup(
|
|||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -342,7 +342,7 @@ pub(super) async fn handle_update_settings(
|
|||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match super::auth::require_admin(&state, &headers).await {
|
let (_session, _) = match super::auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ pub(super) async fn handle_get_users(
|
|||||||
State(state): State<DashboardState>,
|
State(state): State<DashboardState>,
|
||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match auth::require_admin(&state, &headers).await {
|
let (_session, _) = match auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -67,7 +67,7 @@ pub(super) async fn handle_create_user(
|
|||||||
headers: axum::http::HeaderMap,
|
headers: axum::http::HeaderMap,
|
||||||
Json(payload): Json<CreateUserRequest>,
|
Json(payload): Json<CreateUserRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match auth::require_admin(&state, &headers).await {
|
let (_session, _) = match auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
@@ -149,7 +149,7 @@ pub(super) async fn handle_update_user(
|
|||||||
Path(id): Path<i64>,
|
Path(id): Path<i64>,
|
||||||
Json(payload): Json<UpdateUserRequest>,
|
Json(payload): Json<UpdateUserRequest>,
|
||||||
) -> Json<ApiResponse<serde_json::Value>> {
|
) -> Json<ApiResponse<serde_json::Value>> {
|
||||||
let (session, _) = match auth::require_admin(&state, &headers).await {
|
let (_session, _) = match auth::require_admin(&state, &headers).await {
|
||||||
Ok((session, new_token)) => (session, new_token),
|
Ok((session, new_token)) => (session, new_token),
|
||||||
Err(e) => return e,
|
Err(e) => return e,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -42,6 +42,16 @@ pub trait Provider: Send + Sync {
|
|||||||
request: UnifiedRequest,
|
request: UnifiedRequest,
|
||||||
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError>;
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError>;
|
||||||
|
|
||||||
|
/// Process a streaming chat request using provider-specific "responses" style endpoint
|
||||||
|
/// Default implementation falls back to `chat_completion_stream` for providers
|
||||||
|
/// that do not implement a dedicated responses endpoint.
|
||||||
|
async fn chat_responses_stream(
|
||||||
|
&self,
|
||||||
|
request: UnifiedRequest,
|
||||||
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
||||||
|
self.chat_completion_stream(request).await
|
||||||
|
}
|
||||||
|
|
||||||
/// Estimate token count for a request (for cost calculation)
|
/// Estimate token count for a request (for cost calculation)
|
||||||
fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32>;
|
fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32>;
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,13 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn supports_model(&self, model: &str) -> bool {
|
fn supports_model(&self, model: &str) -> bool {
|
||||||
model.starts_with("gpt-") || model.starts_with("o1-") || model.starts_with("o3-") || model.starts_with("o4-") || model == "gpt-5-nano"
|
model.starts_with("gpt-") ||
|
||||||
|
model.starts_with("o1-") ||
|
||||||
|
model.starts_with("o2-") ||
|
||||||
|
model.starts_with("o3-") ||
|
||||||
|
model.starts_with("o4-") ||
|
||||||
|
model.starts_with("o5-") ||
|
||||||
|
model.contains("gpt-5")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn supports_multimodal(&self) -> bool {
|
fn supports_multimodal(&self) -> bool {
|
||||||
@@ -53,6 +59,12 @@ impl super::Provider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn chat_completion(&self, request: UnifiedRequest) -> Result<ProviderResponse, AppError> {
|
async fn chat_completion(&self, request: UnifiedRequest) -> Result<ProviderResponse, AppError> {
|
||||||
|
// Allow proactive routing to Responses API based on heuristic
|
||||||
|
let model_lc = request.model.to_lowercase();
|
||||||
|
if model_lc.contains("gpt-5") || model_lc.contains("codex") {
|
||||||
|
return self.chat_responses(request).await;
|
||||||
|
}
|
||||||
|
|
||||||
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
let mut body = helpers::build_openai_body(&request, messages_json, false);
|
let mut body = helpers::build_openai_body(&request, messages_json, false);
|
||||||
|
|
||||||
@@ -383,4 +395,115 @@ impl super::Provider for OpenAIProvider {
|
|||||||
|
|
||||||
Ok(Box::pin(stream))
|
Ok(Box::pin(stream))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn chat_responses_stream(
|
||||||
|
&self,
|
||||||
|
request: UnifiedRequest,
|
||||||
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
||||||
|
// Build a simple `input` string by concatenating message parts.
|
||||||
|
let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
|
||||||
|
let mut inputs: Vec<String> = Vec::new();
|
||||||
|
for m in &messages_json {
|
||||||
|
let role = m["role"].as_str().unwrap_or("");
|
||||||
|
let parts = m.get("content").and_then(|c| c.as_array()).cloned().unwrap_or_default();
|
||||||
|
let mut text_parts = Vec::new();
|
||||||
|
for p in parts {
|
||||||
|
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
||||||
|
text_parts.push(t.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inputs.push(format!("{}: {}", role, text_parts.join("")));
|
||||||
|
}
|
||||||
|
let input_text = inputs.join("\n");
|
||||||
|
|
||||||
|
let body = serde_json::json!({
|
||||||
|
"model": request.model,
|
||||||
|
"input": input_text,
|
||||||
|
"stream": true
|
||||||
|
});
|
||||||
|
|
||||||
|
let url = format!("{}/responses", self.config.base_url);
|
||||||
|
let api_key = self.api_key.clone();
|
||||||
|
let model = request.model.clone();
|
||||||
|
|
||||||
|
let es = reqwest_eventsource::EventSource::new(
|
||||||
|
self.client
|
||||||
|
.post(&url)
|
||||||
|
.header("Authorization", format!("Bearer {}", api_key))
|
||||||
|
.json(&body),
|
||||||
|
)
|
||||||
|
.map_err(|e| AppError::ProviderError(format!("Failed to create EventSource for Responses API: {}", e)))?;
|
||||||
|
|
||||||
|
let stream = async_stream::try_stream! {
|
||||||
|
let mut es = es;
|
||||||
|
while let Some(event) = es.next().await {
|
||||||
|
match event {
|
||||||
|
Ok(reqwest_eventsource::Event::Message(msg)) => {
|
||||||
|
if msg.data == "[DONE]" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let chunk: serde_json::Value = serde_json::from_str(&msg.data)
|
||||||
|
.map_err(|e| AppError::ProviderError(format!("Failed to parse Responses stream chunk: {}", e)))?;
|
||||||
|
|
||||||
|
// Try standard OpenAI parsing first
|
||||||
|
if let Some(p_chunk) = helpers::parse_openai_stream_chunk(&chunk, &model, None) {
|
||||||
|
yield p_chunk?;
|
||||||
|
} else {
|
||||||
|
// Responses API specific parsing for streaming
|
||||||
|
// Often it follows a similar structure to the non-streaming response but in chunks
|
||||||
|
let mut content = String::new();
|
||||||
|
|
||||||
|
// Check for output[0].content[0].text (similar to non-stream)
|
||||||
|
if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
|
||||||
|
if let Some(first) = output.get(0) {
|
||||||
|
if let Some(contents) = first.get("content").and_then(|c| c.as_array()) {
|
||||||
|
for item in contents {
|
||||||
|
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
|
||||||
|
content.push_str(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for candidates[0].content.parts[0].text (Gemini-like, which OpenAI sometimes uses for v1/responses)
|
||||||
|
if content.is_empty() {
|
||||||
|
if let Some(cands) = chunk.get("candidates").and_then(|c| c.as_array()) {
|
||||||
|
if let Some(c0) = cands.get(0) {
|
||||||
|
if let Some(content_obj) = c0.get("content") {
|
||||||
|
if let Some(parts) = content_obj.get("parts").and_then(|p| p.as_array()) {
|
||||||
|
for p in parts {
|
||||||
|
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
|
||||||
|
content.push_str(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !content.is_empty() {
|
||||||
|
yield ProviderStreamChunk {
|
||||||
|
content,
|
||||||
|
reasoning_content: None,
|
||||||
|
finish_reason: None,
|
||||||
|
tool_calls: None,
|
||||||
|
model: model.clone(),
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(_) => continue,
|
||||||
|
Err(e) => {
|
||||||
|
Err(AppError::ProviderError(format!("Responses stream error: {}", e)))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Box::pin(stream))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -226,7 +226,15 @@ async fn chat_completions(
|
|||||||
let prompt_tokens = crate::utils::tokens::estimate_request_tokens(&model, &unified_request);
|
let prompt_tokens = crate::utils::tokens::estimate_request_tokens(&model, &unified_request);
|
||||||
|
|
||||||
// Handle streaming response
|
// Handle streaming response
|
||||||
let stream_result = provider.chat_completion_stream(unified_request).await;
|
// Allow provider-specific routing for streaming too
|
||||||
|
let use_responses = provider.name() == "openai"
|
||||||
|
&& crate::utils::registry::model_prefers_responses(&state.model_registry, &unified_request.model);
|
||||||
|
|
||||||
|
let stream_result = if use_responses {
|
||||||
|
provider.chat_responses_stream(unified_request).await
|
||||||
|
} else {
|
||||||
|
provider.chat_completion_stream(unified_request).await
|
||||||
|
};
|
||||||
|
|
||||||
match stream_result {
|
match stream_result {
|
||||||
Ok(stream) => {
|
Ok(stream) => {
|
||||||
|
|||||||
Reference in New Issue
Block a user