feat(security): implement AES-256-GCM encryption for API keys and HMAC-signed session tokens

This commit introduces: - AES-256-GCM encryption for LLM provider API keys in the database. - HMAC-SHA256 signed session tokens with activity-based refresh logic. - Standardized frontend XSS protection using a global escapeHtml utility. - Hardened security headers and request body size limits. - Improved database integrity with foreign key enforcement and atomic transactions. - Integration tests for the full encrypted key storage and proxy usage lifecycle.
2026-03-06 14:17:56 -05:00
parent 149a7c3a29
commit 9b8483e797
28 changed files with 1260 additions and 227 deletions
--- a/src/rate_limiting/mod.rs
+++ b/src/rate_limiting/mod.rs
@@ -6,12 +6,15 @@
 //! 3. Global rate limiting for overall system protection

 use anyhow::Result;
+use governor::{Quota, RateLimiter, DefaultDirectRateLimiter};
 use std::collections::HashMap;
+use std::num::NonZeroU32;
 use std::sync::Arc;
-use std::time::Instant;
 use tokio::sync::RwLock;
 use tracing::{info, warn};

+type GovRateLimiter = DefaultDirectRateLimiter;
+
 /// Rate limiter configuration
 #[derive(Debug, Clone)]
 pub struct RateLimiterConfig {
@@ -65,45 +68,7 @@ impl Default for CircuitBreakerConfig {
    }
 }

-/// Simple token bucket rate limiter for a single client
-#[derive(Debug)]
-struct TokenBucket {
-    tokens: f64,
-    capacity: f64,
-    refill_rate: f64, // tokens per second
-    last_refill: Instant,
-}

-impl TokenBucket {
-    fn new(capacity: f64, refill_rate: f64) -> Self {
-        Self {
-            tokens: capacity,
-            capacity,
-            refill_rate,
-            last_refill: Instant::now(),
-        }
-    }
-
-    fn refill(&mut self) {
-        let now = Instant::now();
-        let elapsed = now.duration_since(self.last_refill).as_secs_f64();
-        let new_tokens = elapsed * self.refill_rate;
-
-        self.tokens = (self.tokens + new_tokens).min(self.capacity);
-        self.last_refill = now;
-    }
-
-    fn try_acquire(&mut self, tokens: f64) -> bool {
-        self.refill();
-
-        if self.tokens >= tokens {
-            self.tokens -= tokens;
-            true
-        } else {
-            false
-        }
-    }
-}

 /// Circuit breaker for a provider
 #[derive(Debug)]
@@ -209,8 +174,8 @@ impl ProviderCircuitBreaker {
 /// Rate limiting and circuit breaking manager
 #[derive(Debug)]
 pub struct RateLimitManager {
-    client_buckets: Arc<RwLock<HashMap<String, TokenBucket>>>,
-    global_bucket: Arc<RwLock<TokenBucket>>,
+    client_buckets: Arc<RwLock<HashMap<String, GovRateLimiter>>>,
+    global_bucket: Arc<GovRateLimiter>,
    circuit_breakers: Arc<RwLock<HashMap<String, ProviderCircuitBreaker>>>,
    config: RateLimiterConfig,
    circuit_config: CircuitBreakerConfig,
@@ -218,15 +183,16 @@ pub struct RateLimitManager {

 impl RateLimitManager {
    pub fn new(config: RateLimiterConfig, circuit_config: CircuitBreakerConfig) -> Self {
-        // Convert requests per minute to tokens per second
-        let global_refill_rate = config.global_requests_per_minute as f64 / 60.0;
+        // Create global rate limiter quota
+        let global_quota = Quota::per_minute(
+            NonZeroU32::new(config.global_requests_per_minute).expect("global_requests_per_minute must be positive")
+        )
+        .allow_burst(NonZeroU32::new(config.burst_size).expect("burst_size must be positive"));
+        let global_bucket = RateLimiter::direct(global_quota);

        Self {
            client_buckets: Arc::new(RwLock::new(HashMap::new())),
-            global_bucket: Arc::new(RwLock::new(TokenBucket::new(
-                config.burst_size as f64,
-                global_refill_rate,
-            ))),
+            global_bucket: Arc::new(global_bucket),
            circuit_breakers: Arc::new(RwLock::new(HashMap::new())),
            config,
            circuit_config,
@@ -236,24 +202,22 @@ impl RateLimitManager {
    /// Check if a client request is allowed
    pub async fn check_client_request(&self, client_id: &str) -> Result<bool> {
        // Check global rate limit first (1 token per request)
-        {
-            let mut global_bucket = self.global_bucket.write().await;
-            if !global_bucket.try_acquire(1.0) {
-                warn!("Global rate limit exceeded");
-                return Ok(false);
-            }
+        if self.global_bucket.check().is_err() {
+            warn!("Global rate limit exceeded");
+            return Ok(false);
        }

        // Check client-specific rate limit
        let mut buckets = self.client_buckets.write().await;
        let bucket = buckets.entry(client_id.to_string()).or_insert_with(|| {
-            TokenBucket::new(
-                self.config.burst_size as f64,
-                self.config.requests_per_minute as f64 / 60.0,
+            let quota = Quota::per_minute(
+                NonZeroU32::new(self.config.requests_per_minute).expect("requests_per_minute must be positive")
            )
+            .allow_burst(NonZeroU32::new(self.config.burst_size).expect("burst_size must be positive"));
+            RateLimiter::direct(quota)
        });

-        Ok(bucket.try_acquire(1.0))
+        Ok(bucket.check().is_ok())
    }

    /// Check if provider requests are allowed (circuit breaker)