refactor: comprehensive audit — fix bugs, harden security, deduplicate providers, add CI/Docker
Some checks failed
CI / Check (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Formatting (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release Build (push) Has been cancelled

Phase 1: Fix compilation (config_path Option<PathBuf>, streaming test, stale test cleanup)
Phase 2: Fix critical bugs (remove block_on deadlocks in 4 providers, fix broken SQL query builder)
Phase 3: Security hardening (session manager, real auth, token masking, Gemini key to header, password policy)
Phase 4: Implement stubs (real provider test, /proc health metrics, client/provider/backup endpoints, has_images)
Phase 5: Code quality (shared provider helpers, explicit re-exports, all Clippy warnings fixed, unwrap removal, 6 unused deps removed, dashboard split into 7 sub-modules)
Phase 6: Infrastructure (GitHub Actions CI, multi-stage Dockerfile, rustfmt.toml, clippy.toml, script fixes)
This commit is contained in:
2026-03-02 00:35:45 -05:00
parent ba643dd2b0
commit 2cdc49d7f2
42 changed files with 2800 additions and 2747 deletions

View File

@@ -1,5 +1,5 @@
//! Multimodal support for image processing and conversion
//!
//!
//! This module handles:
//! 1. Image format detection and conversion
//! 2. Base64 encoding/decoding
@@ -7,24 +7,18 @@
//! 4. Provider-specific image format conversion
use anyhow::{Context, Result};
use base64::{engine::general_purpose, Engine as _};
use base64::{Engine as _, engine::general_purpose};
use tracing::{info, warn};
/// Supported image formats for multimodal input
#[derive(Debug, Clone)]
pub enum ImageInput {
/// Base64-encoded image data with MIME type
Base64 {
data: String,
mime_type: String,
},
Base64 { data: String, mime_type: String },
/// URL to fetch image from
Url(String),
/// Raw bytes with MIME type
Bytes {
data: Vec<u8>,
mime_type: String,
},
Bytes { data: Vec<u8>, mime_type: String },
}
impl ImageInput {
@@ -32,17 +26,17 @@ impl ImageInput {
pub fn from_base64(data: String, mime_type: String) -> Self {
Self::Base64 { data, mime_type }
}
/// Create ImageInput from URL
pub fn from_url(url: String) -> Self {
Self::Url(url)
}
/// Create ImageInput from raw bytes
pub fn from_bytes(data: Vec<u8>, mime_type: String) -> Self {
Self::Bytes { data, mime_type }
}
/// Get MIME type if available
pub fn mime_type(&self) -> Option<&str> {
match self {
@@ -51,7 +45,7 @@ impl ImageInput {
Self::Url(_) => None,
}
}
/// Convert to base64 if not already
pub async fn to_base64(&self) -> Result<(String, String)> {
match self {
@@ -63,56 +57,56 @@ impl ImageInput {
Self::Url(url) => {
// Fetch image from URL
info!("Fetching image from URL: {}", url);
let response = reqwest::get(url)
.await
.context("Failed to fetch image from URL")?;
let response = reqwest::get(url).await.context("Failed to fetch image from URL")?;
if !response.status().is_success() {
anyhow::bail!("Failed to fetch image: HTTP {}", response.status());
}
let mime_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|h| h.to_str().ok())
.unwrap_or("image/jpeg")
.to_string();
let bytes = response.bytes().await.context("Failed to read image bytes")?;
let base64_data = general_purpose::STANDARD.encode(&bytes);
Ok((base64_data, mime_type))
}
}
}
/// Get image dimensions (width, height)
pub async fn get_dimensions(&self) -> Result<(u32, u32)> {
let bytes = match self {
Self::Base64 { data, .. } => {
general_purpose::STANDARD.decode(data).context("Failed to decode base64")?
}
Self::Base64 { data, .. } => general_purpose::STANDARD
.decode(data)
.context("Failed to decode base64")?,
Self::Bytes { data, .. } => data.clone(),
Self::Url(_) => {
let (base64_data, _) = self.to_base64().await?;
general_purpose::STANDARD.decode(&base64_data).context("Failed to decode base64")?
general_purpose::STANDARD
.decode(&base64_data)
.context("Failed to decode base64")?
}
};
let img = image::load_from_memory(&bytes).context("Failed to load image from bytes")?;
Ok((img.width(), img.height()))
}
/// Validate image size and format
pub async fn validate(&self, max_size_mb: f64) -> Result<()> {
let (width, height) = self.get_dimensions().await?;
// Check dimensions
if width > 4096 || height > 4096 {
warn!("Image dimensions too large: {}x{}", width, height);
// Continue anyway, but log warning
}
// Check file size
let size_bytes = match self {
Self::Base64 { data, .. } => {
@@ -126,12 +120,12 @@ impl ImageInput {
return Ok(());
}
};
let size_mb = size_bytes as f64 / (1024.0 * 1024.0);
if size_mb > max_size_mb {
anyhow::bail!("Image too large: {:.2}MB > {:.2}MB limit", size_mb, max_size_mb);
}
Ok(())
}
}
@@ -143,10 +137,10 @@ impl ImageConverter {
/// Convert image to OpenAI-compatible format
pub async fn to_openai_format(image: &ImageInput) -> Result<serde_json::Value> {
let (base64_data, mime_type) = image.to_base64().await?;
// OpenAI expects data URL format: "data:image/jpeg;base64,{data}"
let data_url = format!("data:{};base64,{}", mime_type, base64_data);
Ok(serde_json::json!({
"type": "image_url",
"image_url": {
@@ -155,11 +149,11 @@ impl ImageConverter {
}
}))
}
/// Convert image to Gemini-compatible format
pub async fn to_gemini_format(image: &ImageInput) -> Result<serde_json::Value> {
let (base64_data, mime_type) = image.to_base64().await?;
// Gemini expects inline data format
Ok(serde_json::json!({
"inline_data": {
@@ -168,32 +162,34 @@ impl ImageConverter {
}
}))
}
/// Convert image to DeepSeek-compatible format
pub async fn to_deepseek_format(image: &ImageInput) -> Result<serde_json::Value> {
// DeepSeek uses OpenAI-compatible format for vision models
Self::to_openai_format(image).await
}
/// Detect if a model supports multimodal input
pub fn model_supports_multimodal(model: &str) -> bool {
// OpenAI vision models
if (model.starts_with("gpt-4") && (model.contains("vision") || model.contains("-v") || model.contains("4o"))) ||
model.starts_with("o1-") || model.starts_with("o3-") {
if (model.starts_with("gpt-4") && (model.contains("vision") || model.contains("-v") || model.contains("4o")))
|| model.starts_with("o1-")
|| model.starts_with("o3-")
{
return true;
}
// Gemini vision models
if model.starts_with("gemini") {
// Most Gemini models support vision
return true;
}
// DeepSeek vision models
if model.starts_with("deepseek-vl") {
return true;
}
false
}
}
@@ -201,47 +197,47 @@ impl ImageConverter {
/// Parse OpenAI-compatible multimodal message content
pub fn parse_openai_content(content: &serde_json::Value) -> Result<Vec<(String, Option<ImageInput>)>> {
let mut parts = Vec::new();
if let Some(content_str) = content.as_str() {
// Simple text content
parts.push((content_str.to_string(), None));
} else if let Some(content_array) = content.as_array() {
// Array of content parts (text and/or images)
for part in content_array {
if let Some(part_obj) = part.as_object() {
if let Some(part_type) = part_obj.get("type").and_then(|t| t.as_str()) {
match part_type {
"text" => {
if let Some(text) = part_obj.get("text").and_then(|t| t.as_str()) {
parts.push((text.to_string(), None));
}
if let Some(part_obj) = part.as_object()
&& let Some(part_type) = part_obj.get("type").and_then(|t| t.as_str())
{
match part_type {
"text" => {
if let Some(text) = part_obj.get("text").and_then(|t| t.as_str()) {
parts.push((text.to_string(), None));
}
"image_url" => {
if let Some(image_url_obj) = part_obj.get("image_url").and_then(|o| o.as_object()) {
if let Some(url) = image_url_obj.get("url").and_then(|u| u.as_str()) {
if url.starts_with("data:") {
// Parse data URL
if let Some((mime_type, data)) = parse_data_url(url) {
let image_input = ImageInput::from_base64(data, mime_type);
parts.push(("".to_string(), Some(image_input)));
}
} else {
// Regular URL
let image_input = ImageInput::from_url(url.to_string());
parts.push(("".to_string(), Some(image_input)));
}
}
"image_url" => {
if let Some(image_url_obj) = part_obj.get("image_url").and_then(|o| o.as_object())
&& let Some(url) = image_url_obj.get("url").and_then(|u| u.as_str())
{
if url.starts_with("data:") {
// Parse data URL
if let Some((mime_type, data)) = parse_data_url(url) {
let image_input = ImageInput::from_base64(data, mime_type);
parts.push(("".to_string(), Some(image_input)));
}
} else {
// Regular URL
let image_input = ImageInput::from_url(url.to_string());
parts.push(("".to_string(), Some(image_input)));
}
}
_ => {
warn!("Unknown content part type: {}", part_type);
}
}
_ => {
warn!("Unknown content part type: {}", part_type);
}
}
}
}
}
Ok(parts)
}
@@ -250,36 +246,38 @@ fn parse_data_url(data_url: &str) -> Option<(String, String)> {
if !data_url.starts_with("data:") {
return None;
}
let parts: Vec<&str> = data_url[5..].split(";base64,").collect();
if parts.len() != 2 {
return None;
}
let mime_type = parts[0].to_string();
let data = parts[1].to_string();
Some((mime_type, data))
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_parse_data_url() {
let test_url = "data:image/jpeg;base64,SGVsbG8gV29ybGQ="; // "Hello World" in base64
let (mime_type, data) = parse_data_url(test_url).unwrap();
assert_eq!(mime_type, "image/jpeg");
assert_eq!(data, "SGVsbG8gV29ybGQ=");
}
#[tokio::test]
async fn test_model_supports_multimodal() {
assert!(ImageConverter::model_supports_multimodal("gpt-4-vision-preview"));
assert!(ImageConverter::model_supports_multimodal("gpt-4o"));
assert!(ImageConverter::model_supports_multimodal("gemini-pro-vision"));
assert!(ImageConverter::model_supports_multimodal("gemini-pro"));
assert!(!ImageConverter::model_supports_multimodal("gpt-3.5-turbo"));
assert!(!ImageConverter::model_supports_multimodal("gemini-pro"));
assert!(!ImageConverter::model_supports_multimodal("claude-3-opus"));
}
}
}