refactor: comprehensive audit — fix bugs, harden security, deduplicate providers, add CI/Docker

Phase 1: Fix compilation (config_path Option<PathBuf>, streaming test, stale test cleanup) Phase 2: Fix critical bugs (remove block_on deadlocks in 4 providers, fix broken SQL query builder) Phase 3: Security hardening (session manager, real auth, token masking, Gemini key to header, password policy) Phase 4: Implement stubs (real provider test, /proc health metrics, client/provider/backup endpoints, has_images) Phase 5: Code quality (shared provider helpers, explicit re-exports, all Clippy warnings fixed, unwrap removal, 6 unused deps removed, dashboard split into 7 sub-modules) Phase 6: Infrastructure (GitHub Actions CI, multi-stage Dockerfile, rustfmt.toml, clippy.toml, script fixes)
2026-03-02 00:35:45 -05:00
parent ba643dd2b0
commit 2cdc49d7f2
42 changed files with 2800 additions and 2747 deletions
--- a/src/dashboard/system.rs
+++ b/src/dashboard/system.rs
@@ -0,0 +1,193 @@
+use axum::{extract::State, response::Json};
+use chrono;
+use serde_json;
+use sqlx::Row;
+use std::collections::HashMap;
+use tracing::warn;
+
+use super::{ApiResponse, DashboardState};
+
+pub(super) async fn handle_system_health(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
+    let mut components = HashMap::new();
+    components.insert("api_server".to_string(), "online".to_string());
+    components.insert("database".to_string(), "online".to_string());
+
+    // Check provider health via circuit breakers
+    let provider_ids: Vec<String> = state
+        .app_state
+        .provider_manager
+        .get_all_providers()
+        .await
+        .iter()
+        .map(|p| p.name().to_string())
+        .collect();
+
+    for p_id in provider_ids {
+        if state
+            .app_state
+            .rate_limit_manager
+            .check_provider_request(&p_id)
+            .await
+            .unwrap_or(true)
+        {
+            components.insert(p_id, "online".to_string());
+        } else {
+            components.insert(p_id, "degraded".to_string());
+        }
+    }
+
+    // Read real memory usage from /proc/self/status
+    let memory_mb = std::fs::read_to_string("/proc/self/status")
+        .ok()
+        .and_then(|s| s.lines().find(|l| l.starts_with("VmRSS:")).map(|l| l.to_string()))
+        .and_then(|l| l.split_whitespace().nth(1).and_then(|v| v.parse::<f64>().ok()))
+        .map(|kb| kb / 1024.0)
+        .unwrap_or(0.0);
+
+    // Get real database pool stats
+    let db_pool_size = state.app_state.db_pool.size();
+    let db_pool_idle = state.app_state.db_pool.num_idle();
+
+    Json(ApiResponse::success(serde_json::json!({
+        "status": "healthy",
+        "timestamp": chrono::Utc::now().to_rfc3339(),
+        "components": components,
+        "metrics": {
+            "memory_usage_mb": (memory_mb * 10.0).round() / 10.0,
+            "db_connections_active": db_pool_size - db_pool_idle as u32,
+            "db_connections_idle": db_pool_idle,
+        }
+    })))
+}
+
+pub(super) async fn handle_system_logs(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
+    let pool = &state.app_state.db_pool;
+
+    let result = sqlx::query(
+        r#"
+        SELECT 
+            id,
+            timestamp,
+            client_id,
+            provider,
+            model,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
+            cost,
+            status,
+            error_message,
+            duration_ms
+        FROM llm_requests
+        ORDER BY timestamp DESC
+        LIMIT 100
+        "#,
+    )
+    .fetch_all(pool)
+    .await;
+
+    match result {
+        Ok(rows) => {
+            let logs: Vec<serde_json::Value> = rows
+                .into_iter()
+                .map(|row| {
+                    serde_json::json!({
+                        "id": row.get::<i64, _>("id"),
+                        "timestamp": row.get::<chrono::DateTime<chrono::Utc>, _>("timestamp"),
+                        "client_id": row.get::<String, _>("client_id"),
+                        "provider": row.get::<String, _>("provider"),
+                        "model": row.get::<String, _>("model"),
+                        "tokens": row.get::<i64, _>("total_tokens"),
+                        "cost": row.get::<f64, _>("cost"),
+                        "status": row.get::<String, _>("status"),
+                        "error": row.get::<Option<String>, _>("error_message"),
+                        "duration": row.get::<i64, _>("duration_ms"),
+                    })
+                })
+                .collect();
+
+            Json(ApiResponse::success(serde_json::json!(logs)))
+        }
+        Err(e) => {
+            warn!("Failed to fetch system logs: {}", e);
+            Json(ApiResponse::error("Failed to fetch system logs".to_string()))
+        }
+    }
+}
+
+pub(super) async fn handle_system_backup(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
+    let pool = &state.app_state.db_pool;
+    let backup_id = format!("backup-{}", chrono::Utc::now().timestamp());
+    let backup_path = format!("data/{}.db", backup_id);
+
+    // Ensure the data directory exists
+    if let Err(e) = std::fs::create_dir_all("data") {
+        return Json(ApiResponse::error(format!("Failed to create backup directory: {}", e)));
+    }
+
+    // Use SQLite VACUUM INTO for a consistent backup
+    let result = sqlx::query(&format!("VACUUM INTO '{}'", backup_path))
+        .execute(pool)
+        .await;
+
+    match result {
+        Ok(_) => {
+            // Get backup file size
+            let size_bytes = std::fs::metadata(&backup_path).map(|m| m.len()).unwrap_or(0);
+
+            Json(ApiResponse::success(serde_json::json!({
+                "success": true,
+                "message": "Backup completed successfully",
+                "backup_id": backup_id,
+                "backup_path": backup_path,
+                "size_bytes": size_bytes,
+            })))
+        }
+        Err(e) => {
+            warn!("Database backup failed: {}", e);
+            Json(ApiResponse::error(format!("Backup failed: {}", e)))
+        }
+    }
+}
+
+pub(super) async fn handle_get_settings(State(state): State<DashboardState>) -> Json<ApiResponse<serde_json::Value>> {
+    let registry = &state.app_state.model_registry;
+    let provider_count = registry.providers.len();
+    let model_count: usize = registry.providers.values().map(|p| p.models.len()).sum();
+
+    Json(ApiResponse::success(serde_json::json!({
+        "server": {
+            "auth_tokens": state.app_state.auth_tokens.iter().map(|t| mask_token(t)).collect::<Vec<_>>(),
+            "version": env!("CARGO_PKG_VERSION"),
+        },
+        "registry": {
+            "provider_count": provider_count,
+            "model_count": model_count,
+        },
+        "database": {
+            "type": "SQLite",
+        }
+    })))
+}
+
+pub(super) async fn handle_update_settings(
+    State(_state): State<DashboardState>,
+) -> Json<ApiResponse<serde_json::Value>> {
+    Json(ApiResponse::error(
+        "Changing settings at runtime is not yet supported. Please update your config file and restart the server."
+            .to_string(),
+    ))
+}
+
+// Helper functions
+fn mask_token(token: &str) -> String {
+    if token.len() <= 8 {
+        return "*****".to_string();
+    }
+
+    let masked_len = token.len().min(12);
+    let visible_len = 4;
+    let mask_len = masked_len - visible_len;
+
+    format!("{}{}", "*".repeat(mask_len), &token[token.len() - visible_len..])
+}