Compare commits

..

2 Commits

Author SHA1 Message Date
1067ceaecd style: remove unused sqlx import in server/mod.rs
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled
2026-03-05 18:45:25 +00:00
fc5d3ed636 refactor: unify authentication state and improve middleware efficiency
- Introduce AuthInfo struct for shared auth state.
- Populate AuthInfo in rate_limit_middleware and store in request extensions.
- Update AuthenticatedClient extractor to use pre-resolved AuthInfo.
- Simplify chat_completions by removing redundant DB lookups.
2026-03-05 18:44:41 +00:00
3 changed files with 57 additions and 49 deletions

View File

@@ -1,33 +1,40 @@
use axum::{extract::FromRequestParts, http::request::Parts}; use axum::{extract::FromRequestParts, http::request::Parts};
use axum_extra::TypedHeader;
use axum_extra::headers::Authorization;
use headers::authorization::Bearer;
use crate::errors::AppError; use crate::errors::AppError;
pub struct AuthenticatedClient { #[derive(Debug, Clone)]
pub struct AuthInfo {
pub token: String, pub token: String,
pub client_id: String, pub client_id: String,
} }
pub struct AuthenticatedClient {
pub info: AuthInfo,
}
impl<S> FromRequestParts<S> for AuthenticatedClient impl<S> FromRequestParts<S> for AuthenticatedClient
where where
S: Send + Sync, S: Send + Sync,
{ {
type Rejection = AppError; type Rejection = AppError;
async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> { async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
// Extract bearer token from Authorization header // Retrieve AuthInfo from request extensions, where it was placed by rate_limit_middleware
let TypedHeader(Authorization(bearer)) = TypedHeader::<Authorization<Bearer>>::from_request_parts(parts, state) let info = parts
.await .extensions
.map_err(|_| AppError::AuthError("Missing or invalid bearer token".to_string()))?; .get::<AuthInfo>()
.cloned()
.ok_or_else(|| AppError::AuthError("Authentication info not found in request".to_string()))?;
let token = bearer.token().to_string(); Ok(AuthenticatedClient { info })
}
}
// Derive client_id from the token prefix impl std::ops::Deref for AuthenticatedClient {
let client_id = format!("client_{}", &token[..8.min(token.len())]); type Target = AuthInfo;
Ok(AuthenticatedClient { token, client_id }) fn deref(&self) -> &Self::Target {
&self.info
} }
} }

View File

@@ -299,6 +299,7 @@ pub mod middleware {
use super::*; use super::*;
use crate::errors::AppError; use crate::errors::AppError;
use crate::state::AppState; use crate::state::AppState;
use crate::auth::AuthInfo;
use axum::{ use axum::{
extract::{Request, State}, extract::{Request, State},
middleware::Next, middleware::Next,
@@ -309,20 +310,24 @@ pub mod middleware {
/// Rate limiting middleware /// Rate limiting middleware
pub async fn rate_limit_middleware( pub async fn rate_limit_middleware(
State(state): State<AppState>, State(state): State<AppState>,
request: Request, mut request: Request,
next: Next, next: Next,
) -> Result<Response, AppError> { ) -> Result<Response, AppError> {
// Extract token synchronously from headers (avoids holding &Request across await) // Extract token synchronously from headers (avoids holding &Request across await)
let token = extract_bearer_token(&request); let token = extract_bearer_token(&request);
// Resolve client_id: DB token lookup, then prefix fallback // Resolve client_id and populate AuthInfo: DB token lookup, then prefix fallback
let client_id = resolve_client_id(token, &state).await; let auth_info = resolve_auth_info(token, &state).await;
let client_id = auth_info.client_id.clone();
// Check rate limits // Check rate limits
if !state.rate_limit_manager.check_client_request(&client_id).await? { if !state.rate_limit_manager.check_client_request(&client_id).await? {
return Err(AppError::RateLimitError("Rate limit exceeded".to_string())); return Err(AppError::RateLimitError("Rate limit exceeded".to_string()));
} }
// Store AuthInfo in request extensions for extractors and downstream handlers
request.extensions_mut().insert(auth_info);
Ok(next.run(request).await) Ok(next.run(request).await)
} }
@@ -334,26 +339,39 @@ pub mod middleware {
.map(|t| t.to_string()) .map(|t| t.to_string())
} }
/// Resolve client ID: try DB token first, then fall back to token-prefix derivation /// Resolve auth info: try DB token first, then fall back to token-prefix derivation
async fn resolve_client_id(token: Option<String>, state: &AppState) -> String { async fn resolve_auth_info(token: Option<String>, state: &AppState) -> AuthInfo {
if let Some(token) = token { if let Some(token) = token {
// Try DB token lookup first // Try DB token lookup first
if let Ok(Some(cid)) = sqlx::query_scalar::<_, String>( match sqlx::query_scalar::<_, String>(
"SELECT client_id FROM client_tokens WHERE token = ? AND is_active = TRUE", "UPDATE client_tokens SET last_used_at = CURRENT_TIMESTAMP WHERE token = ? AND is_active = TRUE RETURNING client_id",
) )
.bind(&token) .bind(&token)
.fetch_optional(&state.db_pool) .fetch_optional(&state.db_pool)
.await .await
{ {
return cid; Ok(Some(cid)) => {
return AuthInfo {
token,
client_id: cid,
};
}
Err(e) => {
warn!("DB error during token lookup: {}", e);
}
_ => {}
} }
// Fallback to token-prefix derivation (env tokens / permissive mode) // Fallback to token-prefix derivation (env tokens / permissive mode)
return format!("client_{}", &token[..8.min(token.len())]); let client_id = format!("client_{}", &token[..8.min(token.len())]);
return AuthInfo { token, client_id };
} }
// No token — anonymous // No token — anonymous
"anonymous".to_string() AuthInfo {
token: String::new(),
client_id: "anonymous".to_string(),
}
} }
/// Circuit breaker middleware for provider requests /// Circuit breaker middleware for provider requests

View File

@@ -7,7 +7,6 @@ use axum::{
}; };
use futures::StreamExt; use futures::StreamExt;
use sqlx;
use std::sync::Arc; use std::sync::Arc;
use uuid::Uuid; use uuid::Uuid;
use tracing::{info, warn}; use tracing::{info, warn};
@@ -122,32 +121,16 @@ async fn chat_completions(
auth: AuthenticatedClient, auth: AuthenticatedClient,
Json(mut request): Json<ChatCompletionRequest>, Json(mut request): Json<ChatCompletionRequest>,
) -> Result<axum::response::Response, AppError> { ) -> Result<axum::response::Response, AppError> {
// Resolve client_id: try DB token first, then env tokens, then permissive fallback let client_id = auth.client_id.clone();
let db_client_id: Option<String> = sqlx::query_scalar::<_, String>( let token = auth.token.clone();
"SELECT client_id FROM client_tokens WHERE token = ? AND is_active = TRUE",
)
.bind(&auth.token)
.fetch_optional(&state.db_pool)
.await
.unwrap_or(None);
let client_id = if let Some(cid) = db_client_id { // Verify token if env tokens are configured
// Update last_used_at in background (fire-and-forget) if !state.auth_tokens.is_empty() && !state.auth_tokens.contains(&token) {
let pool = state.db_pool.clone(); // If not in env tokens, check if it was a DB token (client_id wouldn't be client_XXXX prefix)
let token = auth.token.clone(); if client_id.starts_with("client_") {
tokio::spawn(async move { return Err(AppError::AuthError("Invalid authentication token".to_string()));
let _ = sqlx::query("UPDATE client_tokens SET last_used_at = CURRENT_TIMESTAMP WHERE token = ?") }
.bind(&token) }
.execute(&pool)
.await;
});
cid
} else if state.auth_tokens.is_empty() || state.auth_tokens.contains(&auth.token) {
// Env token match or permissive mode (no env tokens configured)
auth.client_id.clone()
} else {
return Err(AppError::AuthError("Invalid authentication token".to_string()));
};
let start_time = std::time::Instant::now(); let start_time = std::time::Instant::now();
let model = request.model.clone(); let model = request.model.clone();