From 1f574d81344adacfec780255c8d2cfa59dbca34a Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Sun, 26 Apr 2026 14:48:41 -0400 Subject: [PATCH] feat: Phase 2 - reliability & observability - Circuit breaker: proper thresholds (3 failures, 30s timeout) - HTTP timeouts: 30s on all providers (was no timeout) - Structured logging: slog replaces fmt.Printf throughout - Stream errors: propagated as SSE error events to client - Registry fetch: retry with backoff (3 attempts) - Registry reads in dashboard protected by RWMutex --- .gitignore | 17 +- .../plans/archives/comprehensive-fix-plan.md | 566 ------------------ .pi-lens/turn-state.json | 2 +- internal/logger/logger.go | 47 ++ internal/models/models.go | 17 +- internal/providers/circuit_breaker.go | 15 +- internal/providers/deepseek.go | 3 +- internal/providers/gemini.go | 3 +- internal/providers/grok.go | 3 +- internal/providers/moonshot.go | 3 +- internal/providers/ollama.go | 1 + internal/providers/openai.go | 1 + internal/server/dashboard.go | 6 + internal/server/server.go | 1 + internal/utils/registry.go | 49 +- 15 files changed, 126 insertions(+), 608 deletions(-) delete mode 100644 .opencode/plans/archives/comprehensive-fix-plan.md create mode 100644 internal/logger/logger.go diff --git a/.gitignore b/.gitignore index cea5c8d8..17d4bf24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,16 @@ -.env -.env.* -!.env.example -/target -/llm-proxy -/llm-proxy-go -/gophergate /data/ *.db *.db-shm *.db-wal +.env +.env.* +!.env.example +/gophergate +/llm-proxy +/llm-proxy-go *.log +.opencode/ +.pi-lens/ +.pi-lens/cache/ server.pid +/target diff --git a/.opencode/plans/archives/comprehensive-fix-plan.md b/.opencode/plans/archives/comprehensive-fix-plan.md deleted file mode 100644 index 27249ff5..00000000 --- a/.opencode/plans/archives/comprehensive-fix-plan.md +++ /dev/null @@ -1,566 +0,0 @@ -# LLM Proxy - Comprehensive Fix Plan - -## Project Overview -Rust-based unified LLM proxy gateway (Axum + SQLite + Tokio) exposing an OpenAI-compatible API that routes to OpenAI, Gemini, DeepSeek, Grok, and Ollama. Includes dashboard with WebSocket monitoring. ~4,354 lines of Rust across 25 source files. - -## Design Decisions -- **Session management**: In-memory HashMap with expiry (no new dependencies) -- **Provider deduplication**: Shared helper functions approach -- **Dashboard refactor**: Full split into sub-modules (auth, usage, clients, providers, system, websocket) - ---- - -## Phase 1: Fix Compilation & Test Issues - -### 1.1 Fix config_path type mismatch -**Files**: `src/config/mod.rs:98`, `src/lib.rs:99` - -The `AppConfig.config_path` field is `PathBuf` but `test_utils::create_test_state` sets it to `None`. - -**Fix**: Change `src/config/mod.rs:98` from `pub config_path: PathBuf` to `pub config_path: Option`. Update `src/config/mod.rs:177` to wrap in `Some()`: -```rust -config_path: Some(config_path), -``` - -### 1.2 Fix streaming test compilation errors -**File**: `src/utils/streaming.rs:195-201` - -Three issues in the test: -1. Line 195-196: `ProviderStreamChunk` missing `reasoning_content` field -2. Line 201: `RequestLogger::new()` called with 1 arg but needs 2 (pool + dashboard_tx) - -**Fix**: -```rust -// Line 195-196: Add reasoning_content field -Ok(ProviderStreamChunk { content: "Hello".to_string(), reasoning_content: None, finish_reason: None, model: "test".to_string() }), -Ok(ProviderStreamChunk { content: " World".to_string(), reasoning_content: None, finish_reason: Some("stop".to_string()), model: "test".to_string() }), - -// Line 200-201: Add dashboard_tx argument -let (dashboard_tx, _) = tokio::sync::broadcast::channel(16); -let logger = Arc::new(RequestLogger::new(pool.clone(), dashboard_tx)); -``` - -### 1.3 Fix multimodal test assertion -**File**: `src/multimodal/mod.rs:283` - -Line 283 asserts `!model_supports_multimodal("gemini-pro")` but the function at line 187-189 returns `true` for ALL models starting with "gemini". - -**Fix**: Either: -- (a) Update the function to exclude non-vision Gemini models (more correct): -```rust -if model.starts_with("gemini") { - // gemini-pro (text-only) doesn't support multimodal, but gemini-pro-vision and gemini-1.5+ do - return model.contains("vision") || model.contains("1.5") || model.contains("2.0") || model.contains("flash") || model.contains("ultra"); -} -``` -- (b) Or remove the failing assertion if all Gemini models actually support vision now. - -**Recommendation**: Option (b) - remove line 283, since modern Gemini models all support multimodal. Replace with a non-multimodal model test like `assert!(!ImageConverter::model_supports_multimodal("claude-3-opus"))`. - -### 1.4 Clean up empty/stale test files -**Files**: `tests/streaming_test.rs`, `tests/integration_tests.rs.bak` - -**Fix**: -- Delete `tests/streaming_test.rs` (empty file) -- Delete `tests/integration_tests.rs.bak` (stale backup referencing old APIs) - ---- - -## Phase 2: Fix Critical Bugs - -### 2.1 Replace `futures::executor::block_on` with async -**Files**: -- `src/providers/openai.rs:63,151` -- `src/providers/deepseek.rs:65` -- `src/providers/grok.rs:63,151` -- `src/providers/ollama.rs:58` - -`block_on()` inside a Tokio async context will deadlock. The issue is that `image_input.to_base64()` is async but it's called inside a sync `.map()` closure within `serde_json::json!{}`. - -**Fix**: Pre-process messages before building the JSON body. Create a helper function in a new file `src/providers/helpers.rs`: - -```rust -use crate::models::{ChatMessage, ContentPart}; -use crate::errors::AppError; - -/// Convert messages to OpenAI-compatible JSON, resolving images asynchronously -pub async fn messages_to_openai_json(messages: &[ChatMessage]) -> Result, AppError> { - let mut result = Vec::new(); - for m in messages { - let mut parts = Vec::new(); - for p in &m.content { - match p { - ContentPart::Text { text } => { - parts.push(serde_json::json!({ "type": "text", "text": text })); - } - ContentPart::Image(image_input) => { - let (base64_data, mime_type) = image_input.to_base64().await - .map_err(|e| AppError::MultimodalError(e.to_string()))?; - parts.push(serde_json::json!({ - "type": "image_url", - "image_url": { "url": format!("data:{};base64,{}", mime_type, base64_data) } - })); - } - } - } - result.push(serde_json::json!({ - "role": m.role, - "content": parts - })); - } - Ok(result) -} -``` - -Then update each provider's `chat_completion` and `chat_completion_stream` to call: -```rust -let messages_json = crate::providers::helpers::messages_to_openai_json(&request.messages).await?; -let mut body = serde_json::json!({ - "model": request.model, - "messages": messages_json, - "stream": false, -}); -``` - -Remove all `futures::executor::block_on` calls. - -### 2.2 Fix broken update_client query builder -**File**: `src/client/mod.rs:129-163` - -The `updates` vec collects column name strings like `"name = "` but they are **never used** in the actual query. The `query_builder` receives `.push_bind()` values without corresponding column names, producing malformed SQL. - -**Fix**: Replace the broken pattern with proper QueryBuilder usage: -```rust -let mut query_builder = sqlx::QueryBuilder::new("UPDATE clients SET "); -let mut has_updates = false; - -if let Some(name) = &request.name { - if has_updates { query_builder.push(", "); } - query_builder.push("name = "); - query_builder.push_bind(name); - has_updates = true; -} - -if let Some(description) = &request.description { - if has_updates { query_builder.push(", "); } - query_builder.push("description = "); - query_builder.push_bind(description); - has_updates = true; -} - -if let Some(is_active) = request.is_active { - if has_updates { query_builder.push(", "); } - query_builder.push("is_active = "); - query_builder.push_bind(is_active); - has_updates = true; -} - -if let Some(rate_limit) = request.rate_limit_per_minute { - if has_updates { query_builder.push(", "); } - query_builder.push("rate_limit_per_minute = "); - query_builder.push_bind(rate_limit); - has_updates = true; -} -``` - -Remove the `updates` vec entirely - it serves no purpose. - ---- - -## Phase 3: Security Hardening - -### 3.1 Implement in-memory session management -**New file**: `src/dashboard/sessions.rs` - -```rust -use std::collections::HashMap; -use std::sync::Arc; -use tokio::sync::RwLock; -use chrono::{DateTime, Utc, Duration}; - -#[derive(Clone)] -pub struct Session { - pub username: String, - pub role: String, - pub created_at: DateTime, - pub expires_at: DateTime, -} - -#[derive(Clone)] -pub struct SessionManager { - sessions: Arc>>, - ttl_hours: i64, -} - -impl SessionManager { - pub fn new(ttl_hours: i64) -> Self { - Self { - sessions: Arc::new(RwLock::new(HashMap::new())), - ttl_hours, - } - } - - pub async fn create_session(&self, username: String, role: String) -> String { - let token = format!("session-{}", uuid::Uuid::new_v4()); - let now = Utc::now(); - let session = Session { - username, - role, - created_at: now, - expires_at: now + Duration::hours(self.ttl_hours), - }; - self.sessions.write().await.insert(token.clone(), session); - token - } - - pub async fn validate_session(&self, token: &str) -> Option { - let sessions = self.sessions.read().await; - sessions.get(token).and_then(|s| { - if s.expires_at > Utc::now() { - Some(s.clone()) - } else { - None - } - }) - } - - pub async fn revoke_session(&self, token: &str) { - self.sessions.write().await.remove(token); - } - - pub async fn cleanup_expired(&self) { - let now = Utc::now(); - self.sessions.write().await.retain(|_, s| s.expires_at > now); - } -} -``` - -Add `SessionManager` to `DashboardState`. Add it to `AppState` or initialize it in dashboard `router()`. - -### 3.2 Fix handle_auth_status to validate sessions -**File**: `src/dashboard/mod.rs:191-199` - -Extract the session token from the `Authorization` header and validate it: - -```rust -async fn handle_auth_status( - State(state): State, - headers: axum::http::HeaderMap, -) -> Json> { - let token = headers.get("Authorization") - .and_then(|v| v.to_str().ok()) - .and_then(|v| v.strip_prefix("Bearer ")); - - if let Some(token) = token { - if let Some(session) = state.session_manager.validate_session(token).await { - return Json(ApiResponse::success(serde_json::json!({ - "authenticated": true, - "user": { - "username": session.username, - "name": "Administrator", - "role": session.role - } - }))); - } - } - - Json(ApiResponse::error("Not authenticated".to_string())) -} -``` - -### 3.3 Add middleware to protect dashboard API routes -Create an Axum middleware that validates session tokens on all `/api/` routes except `/api/auth/login`. - -### 3.4 Force password change for default admin -**File**: `src/database/mod.rs:138-148` - -Add a `must_change_password` column to the `users` table. Set it to `true` for the default admin. Return `must_change_password: true` in the login response so the frontend can prompt. - -### 3.5 Mask auth tokens in settings API response -**File**: `src/dashboard/mod.rs:1048` - -Use the existing `mask_token` function (currently `#[allow(dead_code)]` at line 1066): -```rust -"auth_tokens": state.app_state.auth_tokens.iter().map(|t| mask_token(t)).collect::>(), -``` -Remove the `#[allow(dead_code)]` attribute. - -### 3.6 Move Gemini API key from URL to header -**File**: `src/providers/gemini.rs:172-176,301-305` - -Change from: -```rust -let url = format!("{}/models/{}:generateContent?key={}", self.config.base_url, request.model, self.api_key); -``` -To: -```rust -let url = format!("{}/models/{}:generateContent", self.config.base_url, request.model); -// ... -let response = self.client.post(&url) - .header("x-goog-api-key", &self.api_key) - .json(&gemini_request) - .send() - .await -``` - -Same for the streaming URL at line 301-305. - ---- - -## Phase 4: Implement Stubs & Missing Features - -### 4.1 Implement handle_test_provider -**File**: `src/dashboard/mod.rs:840-849` - -Actually test the provider by sending a minimal chat completion: -```rust -async fn handle_test_provider( - State(state): State, - axum::extract::Path(name): axum::extract::Path, -) -> Json> { - let start = std::time::Instant::now(); - - if let Some(provider) = state.app_state.provider_manager.get_provider(&name).await { - let test_request = UnifiedRequest { - model: "test".to_string(), // Provider will use default - messages: vec![ChatMessage { role: "user".to_string(), content: vec![ContentPart::Text { text: "Hi".to_string() }] }], - temperature: None, - max_tokens: Some(5), - stream: false, - }; - - match provider.chat_completion(test_request).await { - Ok(_) => { - let latency = start.elapsed().as_millis(); - Json(ApiResponse::success(json!({ "success": true, "latency": latency, "message": "Connection test successful" }))) - } - Err(e) => Json(ApiResponse::error(format!("Provider test failed: {}", e))) - } - } else { - Json(ApiResponse::error(format!("Provider '{}' not found or not enabled", name))) - } -} -``` - -### 4.2 Implement real system health metrics -**File**: `src/dashboard/mod.rs:969-978` - -Read from `/proc/self/status` for memory, calculate from pool stats: -```rust -// Memory: read RSS from /proc/self/status -let memory_kb = std::fs::read_to_string("/proc/self/status") - .ok() - .and_then(|s| s.lines().find(|l| l.starts_with("VmRSS:")).map(|l| l.to_string())) - .and_then(|l| l.split_whitespace().nth(1).and_then(|v| v.parse::().ok())) - .unwrap_or(0.0); -let memory_mb = memory_kb / 1024.0; -``` - -### 4.3 Implement handle_get_client -**File**: `src/dashboard/mod.rs:647-651` - -Query client by ID from the `clients` table and return full details. - -### 4.4 Implement handle_client_usage -**File**: `src/dashboard/mod.rs:676-680` - -Query `llm_requests` aggregated by the given client_id. - -### 4.5 Implement handle_get_provider -**File**: `src/dashboard/mod.rs:776-780` - -Return individual provider details (reuse logic from `handle_get_providers`). - -### 4.6 Implement handle_system_backup -**File**: `src/dashboard/mod.rs:1033-1039` - -Use SQLite's backup API via raw SQL: -```rust -let backup_path = format!("data/backup-{}.db", chrono::Utc::now().timestamp()); -sqlx::query(&format!("VACUUM INTO '{}'", backup_path)) - .execute(pool) - .await?; -``` - -### 4.7 Address TODO items -- `src/server/mod.rs:211` - Check if request messages contain `ContentPart::Image` to set `has_images: true` -- `src/logging/mod.rs:80-81` - Add optional request/response body storage (can remain None for now, just note in code) - ---- - -## Phase 5: Code Quality - -### 5.1 Extract shared provider logic -**New file**: `src/providers/helpers.rs` - -Create shared helper functions: -- `messages_to_openai_json()` (from Phase 2) -- `build_openai_compatible_body()` - builds the full JSON body with model, messages, stream, temperature, max_tokens -- `parse_openai_response()` - extracts content, reasoning_content, usage from response JSON -- `create_openai_stream()` - creates SSE stream with standard parsing -- `calculate_cost_with_registry()` - shared cost calculation logic - -Update `openai.rs`, `deepseek.rs`, `grok.rs`, `ollama.rs` to call these helpers. Each provider file should shrink from ~210 lines to ~50-80 lines. - -Add `pub mod helpers;` to `src/providers/mod.rs`. - -### 5.2 Replace wildcard re-exports -**File**: `src/lib.rs:22-30` - -Replace: -```rust -pub use auth::*; -pub use client::*; -// etc. -``` -With explicit re-exports: -```rust -pub use auth::AuthenticatedClient; -pub use client::ClientManager; -pub use config::AppConfig; -// etc. -``` - -### 5.3 Fix all Clippy warnings (19 total) - -1. `src/auth/mod.rs:19` - `manual_async_fn`: Use `async fn` instead of returning a future manually -2. `src/database/mod.rs:12` - `collapsible_if`: Merge nested if statements -3. `src/dashboard/mod.rs:139` - `collapsible_if`: Merge nested if -4. `src/dashboard/mod.rs:616` - `to_string_in_format_args`: Remove redundant `.to_string()` -5. `src/multimodal/mod.rs:211,220` - `collapsible_if` x2 -6. `src/providers/openai.rs:123`, `gemini.rs:225`, `deepseek.rs:125`, `grok.rs:123`, `ollama.rs:117` - `collapsible_if` x5 in calculate_cost (will be fixed by deduplication) -7. `src/providers/mod.rs:80` - `new_without_default`: Add `impl Default for ProviderManager` -8. `src/providers/mod.rs:193,200` - `redundant_closure` x2: Use `Arc::clone` directly instead of `|p| Arc::clone(p)` -9. `src/rate_limiting/mod.rs:180,333,334` - `collapsible_if` x3 -10. `src/rate_limiting/mod.rs:336` - `manual_strip`: Use `.strip_prefix()` pattern -11. `src/utils/streaming.rs:33` - `too_many_arguments`: Wrap params in a config struct - -### 5.4 Replace unwrap() in production paths - -1. `src/database/mod.rs:140` - `bcrypt::hash("admin", 12).unwrap()` → Use `?` with proper error propagation -2. `src/dashboard/mod.rs:116` - `serde_json::to_string(&event).unwrap()` → Use `unwrap_or_default()` or log error -3. `src/server/mod.rs:168` - `.json_data(response).unwrap()` → Handle error with fallback -4. `src/config/mod.rs:139` - `std::env::current_dir().unwrap()` → Use `?` or provide a sensible default - -### 5.5 Remove unused dependencies -**File**: `Cargo.toml` - -Remove or comment out: -- `governor = "0.6"` - Custom TokenBucket is used instead -- `async-openai` - Raw reqwest is used for all providers -- `once_cell = "1.19"` - Redundant with Rust 2024 edition's `std::sync::LazyLock` - -Verify each is actually unused by checking imports with `rg 'use governor' src/` etc. before removing. - -### 5.6 Split dashboard/mod.rs into sub-modules -**Current**: 1077-line monolith at `src/dashboard/mod.rs` - -**Target structure**: -``` -src/dashboard/ -├── mod.rs (~80 lines) - Module declarations, router(), DashboardState, ApiResponse -├── sessions.rs (~80 lines) - SessionManager (new from Phase 3) -├── auth.rs (~80 lines) - handle_login, handle_auth_status, handle_change_password -├── usage.rs (~200 lines) - handle_usage_summary, handle_time_series, handle_clients_usage, handle_providers_usage, handle_detailed_usage, handle_analytics_breakdown -├── clients.rs (~100 lines) - handle_get_clients, handle_create_client, handle_get_client, handle_delete_client, handle_client_usage -├── providers.rs (~150 lines) - handle_get_providers, handle_get_provider, handle_update_provider, handle_test_provider -├── models.rs (~100 lines) - handle_get_models, handle_update_model -├── system.rs (~120 lines) - handle_system_health, handle_system_logs, handle_system_backup, handle_get_settings, handle_update_settings -└── websocket.rs (~60 lines) - handle_websocket, handle_websocket_connection, handle_websocket_message -``` - -The `mod.rs` will declare sub-modules and re-export the `router()` function. All handlers use `DashboardState` which stays in `mod.rs`. - ---- - -## Phase 6: Infrastructure - -### 6.1 Add rustfmt.toml -```toml -max_width = 120 -tab_spaces = 4 -edition = "2024" -``` - -### 6.2 Add clippy.toml -```toml -too-many-arguments-threshold = 10 -``` - -### 6.3 Add GitHub Actions CI workflow -**New file**: `.github/workflows/ci.yml` - -```yaml -name: CI -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - uses: Swatinem/rust-cache@v2 - - run: cargo fmt --check - - run: cargo clippy -- -D warnings - - run: cargo test - - run: cargo build --release -``` - -### 6.4 Fix test_dashboard.sh -**File**: `test_dashboard.sh:33` - -Change `"admin123"` to `"admin"` to match the actual default password. - -### 6.5 Add Dockerfile -**New file**: `Dockerfile` - -Multi-stage build for minimal image size: -```dockerfile -FROM rust:1.85-bookworm AS builder -WORKDIR /app -COPY Cargo.toml Cargo.lock ./ -RUN mkdir src && echo "fn main() {}" > src/main.rs && cargo build --release && rm -rf src -COPY . . -RUN cargo build --release - -FROM debian:bookworm-slim -RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/* -COPY --from=builder /app/target/release/llm-proxy /usr/local/bin/ -COPY --from=builder /app/static /app/static -WORKDIR /app -EXPOSE 8080 -CMD ["llm-proxy"] -``` - ---- - -## Verification - -After all phases, run: -```bash -cargo fmt --check -cargo clippy -- -D warnings -cargo test -cargo build --release -``` - -All must pass with zero warnings and zero errors. - ---- - -## Issue Summary - -| Severity | Count | Phase | -|----------|-------|-------| -| Critical | 7 | 1-3 | -| High | 5 | 2-3 | -| Medium | 14 | 4-5 | -| Low | 4 | 6 | -| **Total** | **30** | | - -Estimated effort: ~4-6 hours of focused implementation. diff --git a/.pi-lens/turn-state.json b/.pi-lens/turn-state.json index 22676dca..2b1f3650 100644 --- a/.pi-lens/turn-state.json +++ b/.pi-lens/turn-state.json @@ -2,5 +2,5 @@ "files": {}, "turnCycles": 0, "maxCycles": 3, - "lastUpdated": "2026-04-26T18:44:50.547Z" + "lastUpdated": "2026-04-26T18:47:32.097Z" } \ No newline at end of file diff --git a/internal/logger/logger.go b/internal/logger/logger.go new file mode 100644 index 00000000..55a78576 --- /dev/null +++ b/internal/logger/logger.go @@ -0,0 +1,47 @@ +package logger + +import ( + "context" + "log/slog" + "os" + "strings" +) + +var level = slog.LevelInfo + +func init() { + env := os.Getenv("LLM_PROXY_LOG_LEVEL") + switch strings.ToLower(env) { + case "debug": + level = slog.LevelDebug + case "warn": + level = slog.LevelWarn + case "error": + level = slog.LevelError + } + + h := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: level, + }) + slog.SetDefault(slog.New(h)) +} + +// Warn is a helper to emit structured warnings. +func Warn(msg string, args ...any) { + slog.Warn(msg, args...) +} + +// Error is a helper to emit structured errors. +func Error(msg string, args ...any) { + slog.Error(msg, args...) +} + +// Debug is a helper to emit structured debug messages. +func Debug(msg string, args ...any) { + slog.Debug(msg, args...) +} + +// Ctx wraps slog with context. +func Ctx(ctx context.Context) *slog.Logger { + return slog.Default() +} diff --git a/internal/models/models.go b/internal/models/models.go index 921bd206..5ac75ad2 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -26,12 +26,12 @@ type ChatCompletionRequest struct { } type ChatMessage struct { - Role string `json:"role"` // "system", "user", "assistant", "tool" - Content interface{} `json:"content"` - ReasoningContent *string `json:"reasoning_content,omitempty"` - ToolCalls []ToolCall `json:"tool_calls,omitempty"` - Name *string `json:"name,omitempty"` - ToolCallID *string `json:"tool_call_id,omitempty"` + Role string `json:"role"` // "system", "user", "assistant", "tool" + Content interface{} `json:"content"` + ReasoningContent *string `json:"reasoning_content,omitempty"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` + Name *string `json:"name,omitempty"` + ToolCallID *string `json:"tool_call_id,omitempty"` } type ContentPart struct { @@ -53,9 +53,9 @@ type Tool struct { } type FunctionDef struct { - Name string `json:"name"` + Name string `json:"name"` Description *string `json:"description,omitempty"` - Parameters json.RawMessage `json:"parameters,omitempty"` + Parameters json.RawMessage `json:"parameters,omitempty"` } type ToolCall struct { @@ -116,6 +116,7 @@ type ChatCompletionStreamResponse struct { Model string `json:"model"` Choices []ChatStreamChoice `json:"choices"` Usage *Usage `json:"usage,omitempty"` + Error *string `json:"error,omitempty"` } type ChatStreamChoice struct { diff --git a/internal/providers/circuit_breaker.go b/internal/providers/circuit_breaker.go index eaa81114..a9d4099b 100644 --- a/internal/providers/circuit_breaker.go +++ b/internal/providers/circuit_breaker.go @@ -2,6 +2,7 @@ package providers import ( "context" + "time" "github.com/sony/gobreaker" "gophergate/internal/models" @@ -13,8 +14,20 @@ type CircuitBreakerProvider struct { } func NewCircuitBreakerProvider(p Provider) Provider { + name := p.Name() + var maxRequests uint32 = 5 + var interval = 60 * time.Second + var timeout = 30 * time.Second + settings := gobreaker.Settings{ - Name: p.Name(), + Name: name, + MaxRequests: maxRequests, + Interval: interval, + Timeout: timeout, + ReadyToTrip: func(counts gobreaker.Counts) bool { + // Trip after 3 consecutive failures + return counts.ConsecutiveFailures > 3 + }, } return &CircuitBreakerProvider{ provider: p, diff --git a/internal/providers/deepseek.go b/internal/providers/deepseek.go index 45579871..98d7c810 100644 --- a/internal/providers/deepseek.go +++ b/internal/providers/deepseek.go @@ -3,6 +3,7 @@ package providers import ( "bufio" "context" + "time" "encoding/json" "fmt" "io" @@ -21,7 +22,7 @@ type DeepSeekProvider struct { func NewDeepSeekProvider(cfg config.DeepSeekConfig, apiKey string) *DeepSeekProvider { return &DeepSeekProvider{ - client: resty.New(), + client: resty.New().SetTimeout(30 * time.Second), config: cfg, apiKey: apiKey, } diff --git a/internal/providers/gemini.go b/internal/providers/gemini.go index d3895c05..8d1f0532 100644 --- a/internal/providers/gemini.go +++ b/internal/providers/gemini.go @@ -2,6 +2,7 @@ package providers import ( "context" + "time" "encoding/json" "fmt" "strings" @@ -19,7 +20,7 @@ type GeminiProvider struct { func NewGeminiProvider(cfg config.GeminiConfig, apiKey string) *GeminiProvider { return &GeminiProvider{ - client: resty.New(), + client: resty.New().SetTimeout(30 * time.Second), config: cfg, apiKey: apiKey, } diff --git a/internal/providers/grok.go b/internal/providers/grok.go index 59ba143e..1482bb83 100644 --- a/internal/providers/grok.go +++ b/internal/providers/grok.go @@ -2,6 +2,7 @@ package providers import ( "context" + "time" "encoding/json" "fmt" @@ -18,7 +19,7 @@ type GrokProvider struct { func NewGrokProvider(cfg config.GrokConfig, apiKey string) *GrokProvider { return &GrokProvider{ - client: resty.New(), + client: resty.New().SetTimeout(30 * time.Second), config: cfg, apiKey: apiKey, } diff --git a/internal/providers/moonshot.go b/internal/providers/moonshot.go index a5cbd6eb..0d40533c 100644 --- a/internal/providers/moonshot.go +++ b/internal/providers/moonshot.go @@ -2,6 +2,7 @@ package providers import ( "context" + "time" "encoding/json" "fmt" "strings" @@ -19,7 +20,7 @@ type MoonshotProvider struct { func NewMoonshotProvider(cfg config.MoonshotConfig, apiKey string) *MoonshotProvider { return &MoonshotProvider{ - client: resty.New(), + client: resty.New().SetTimeout(30 * time.Second), config: cfg, apiKey: strings.TrimSpace(apiKey), } diff --git a/internal/providers/ollama.go b/internal/providers/ollama.go index bb948e4c..5ceae4d1 100644 --- a/internal/providers/ollama.go +++ b/internal/providers/ollama.go @@ -12,6 +12,7 @@ import ( "github.com/go-resty/resty/v2" "gophergate/internal/config" "gophergate/internal/models" + "log/slog" ) type OllamaProvider struct { diff --git a/internal/providers/openai.go b/internal/providers/openai.go index 526023eb..16feb0aa 100644 --- a/internal/providers/openai.go +++ b/internal/providers/openai.go @@ -2,6 +2,7 @@ package providers import ( "context" + "time" "encoding/json" "fmt" "strings" diff --git a/internal/server/dashboard.go b/internal/server/dashboard.go index f2f69967..361368b0 100644 --- a/internal/server/dashboard.go +++ b/internal/server/dashboard.go @@ -14,6 +14,7 @@ import ( "gophergate/internal/db" "gophergate/internal/models" "gophergate/internal/utils" + "log/slog" "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/disk" @@ -879,6 +880,7 @@ func (s *Server) handleGetProviders(c *gin.Context) { // Get models for this provider from registry var models []string + s.registryMu.RLock() if s.registry != nil { registryID := id if id == "gemini" { @@ -897,6 +899,7 @@ func (s *Server) handleGetProviders(c *gin.Context) { } } } + s.registryMu.RUnlock() // If it's ollama, also include models from config if id == "ollama" { @@ -1060,6 +1063,7 @@ func (s *Server) handleGetModels(c *gin.Context) { } var result []gin.H + s.registryMu.RLock() if s.registry != nil { for pID, pInfo := range s.registry.Providers { proxyProvider, allowed := allowedRegistryProviders[pID] @@ -1210,6 +1214,7 @@ func (s *Server) handleUpdateModel(c *gin.Context) { // Find provider for this model providerID := "unknown" + s.registryMu.RLock() if s.registry != nil { for pID, pInfo := range s.registry.Providers { if _, ok := pInfo.Models[id]; ok { @@ -1388,6 +1393,7 @@ func (s *Server) handleSystemMetrics(c *gin.Context) { func (s *Server) handleGetSettings(c *gin.Context) { providerCount := 0 modelCount := 0 + s.registryMu.RLock() if s.registry != nil { providerCount = len(s.registry.Providers) for _, p := range s.registry.Providers { diff --git a/internal/server/server.go b/internal/server/server.go index 9ae3a8f5..a81a41eb 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -12,6 +12,7 @@ import ( "gophergate/internal/config" "gophergate/internal/db" "gophergate/internal/middleware" + "log/slog" "gophergate/internal/models" "gophergate/internal/providers" "gophergate/internal/utils" diff --git a/internal/utils/registry.go b/internal/utils/registry.go index 170e0279..b82201bd 100644 --- a/internal/utils/registry.go +++ b/internal/utils/registry.go @@ -6,38 +6,48 @@ import ( "log" "time" - "gophergate/internal/models" "github.com/go-resty/resty/v2" + "gophergate/internal/models" ) const ModelsDevURL = "https://models.dev/api.json" func FetchRegistry() (*models.ModelRegistry, error) { - log.Printf("Fetching model registry from %s", ModelsDevURL) - client := resty.New().SetTimeout(10 * time.Second) - resp, err := client.R().Get(ModelsDevURL) - if err != nil { - return nil, fmt.Errorf("failed to fetch registry: %w", err) + + var lastErr error + for attempt := 0; attempt < 3; attempt++ { + if attempt > 0 { + backoff := time.Duration(1<