303 lines
13 KiB
Rust
303 lines
13 KiB
Rust
use async_trait::async_trait;
|
|
use anyhow::Result;
|
|
use async_openai::{Client, config::OpenAIConfig};
|
|
use async_openai::types::chat::{CreateChatCompletionRequestArgs, ChatCompletionRequestMessage, ChatCompletionRequestUserMessage, ChatCompletionRequestSystemMessage, ChatCompletionRequestAssistantMessage, ChatCompletionRequestUserMessageContent, ChatCompletionRequestSystemMessageContent, ChatCompletionRequestAssistantMessageContent};
|
|
use futures::stream::{BoxStream, StreamExt};
|
|
|
|
use crate::{
|
|
models::UnifiedRequest,
|
|
errors::AppError,
|
|
config::AppConfig,
|
|
};
|
|
use super::{ProviderResponse, ProviderStreamChunk};
|
|
|
|
pub struct DeepSeekProvider {
|
|
client: Client<OpenAIConfig>, // DeepSeek uses OpenAI-compatible API
|
|
_config: crate::config::DeepSeekConfig,
|
|
pricing: Vec<crate::config::ModelPricing>,
|
|
}
|
|
|
|
impl DeepSeekProvider {
|
|
pub fn new(config: &crate::config::DeepSeekConfig, app_config: &AppConfig) -> Result<Self> {
|
|
let api_key = app_config.get_api_key("deepseek")?;
|
|
|
|
// Create OpenAIConfig with api key and base url
|
|
let openai_config = OpenAIConfig::default()
|
|
.with_api_key(api_key)
|
|
.with_api_base(&config.base_url);
|
|
|
|
let client = Client::with_config(openai_config);
|
|
|
|
Ok(Self {
|
|
client,
|
|
_config: config.clone(),
|
|
pricing: app_config.pricing.deepseek.clone(),
|
|
})
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl super::Provider for DeepSeekProvider {
|
|
fn name(&self) -> &str {
|
|
"deepseek"
|
|
}
|
|
|
|
fn supports_model(&self, model: &str) -> bool {
|
|
model.starts_with("deepseek-") || model.contains("deepseek")
|
|
}
|
|
|
|
fn supports_multimodal(&self) -> bool {
|
|
false // DeepSeek doesn't support general vision (only OCR)
|
|
}
|
|
|
|
async fn chat_completion(
|
|
&self,
|
|
request: UnifiedRequest,
|
|
) -> Result<ProviderResponse, AppError> {
|
|
use async_openai::types::chat::{ChatCompletionRequestUserMessageContentPart, ChatCompletionRequestMessageContentPartText, ChatCompletionRequestMessageContentPartImage, ImageUrl, ImageDetail};
|
|
|
|
// Convert UnifiedRequest messages to OpenAI-compatible messages
|
|
let mut messages = Vec::with_capacity(request.messages.len());
|
|
|
|
for msg in request.messages {
|
|
let mut parts = Vec::with_capacity(msg.content.len());
|
|
|
|
for part in msg.content {
|
|
match part {
|
|
crate::models::ContentPart::Text { text } => {
|
|
parts.push(ChatCompletionRequestUserMessageContentPart::Text(ChatCompletionRequestMessageContentPartText {
|
|
text,
|
|
}));
|
|
}
|
|
crate::models::ContentPart::Image(image_input) => {
|
|
let (base64_data, mime_type) = image_input.to_base64().await
|
|
.map_err(|e| AppError::ProviderError(format!("Failed to convert image: {}", e)))?;
|
|
let data_url = format!("data:{};base64,{}", mime_type, base64_data);
|
|
|
|
parts.push(ChatCompletionRequestUserMessageContentPart::ImageUrl(ChatCompletionRequestMessageContentPartImage {
|
|
image_url: ImageUrl {
|
|
url: data_url,
|
|
detail: Some(ImageDetail::Auto),
|
|
}
|
|
}));
|
|
}
|
|
}
|
|
}
|
|
|
|
let message = match msg.role.as_str() {
|
|
"system" => ChatCompletionRequestMessage::System(
|
|
ChatCompletionRequestSystemMessage {
|
|
content: ChatCompletionRequestSystemMessageContent::Text(
|
|
parts.iter().filter_map(|p| if let ChatCompletionRequestUserMessageContentPart::Text(t) = p { Some(t.text.clone()) } else { None }).collect::<Vec<_>>().join("\n")
|
|
),
|
|
name: None,
|
|
}
|
|
),
|
|
"assistant" => ChatCompletionRequestMessage::Assistant(
|
|
ChatCompletionRequestAssistantMessage {
|
|
content: Some(ChatCompletionRequestAssistantMessageContent::Text(
|
|
parts.iter().filter_map(|p| if let ChatCompletionRequestUserMessageContentPart::Text(t) = p { Some(t.text.clone()) } else { None }).collect::<Vec<_>>().join("\n")
|
|
)),
|
|
name: None,
|
|
tool_calls: None,
|
|
refusal: None,
|
|
audio: None,
|
|
#[allow(deprecated)]
|
|
function_call: None,
|
|
}
|
|
),
|
|
_ => ChatCompletionRequestMessage::User(
|
|
ChatCompletionRequestUserMessage {
|
|
content: ChatCompletionRequestUserMessageContent::Array(parts),
|
|
name: None,
|
|
}
|
|
),
|
|
};
|
|
messages.push(message);
|
|
}
|
|
|
|
if messages.is_empty() {
|
|
return Err(AppError::ProviderError("No valid text messages to send".to_string()));
|
|
}
|
|
|
|
// Build request using builder pattern
|
|
let mut builder = CreateChatCompletionRequestArgs::default();
|
|
builder.model(request.model.clone());
|
|
builder.messages(messages);
|
|
|
|
// Add optional parameters
|
|
if let Some(temp) = request.temperature {
|
|
builder.temperature(temp as f32);
|
|
}
|
|
|
|
if let Some(max_tokens) = request.max_tokens {
|
|
builder.max_tokens(max_tokens as u16);
|
|
}
|
|
|
|
// Execute API call
|
|
let response = self.client
|
|
.chat()
|
|
.create(builder.build().map_err(|e| AppError::ProviderError(e.to_string()))?)
|
|
.await
|
|
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
|
|
|
// Extract content from response
|
|
let content = response
|
|
.choices
|
|
.first()
|
|
.and_then(|choice| choice.message.content.clone())
|
|
.unwrap_or_default();
|
|
|
|
// Extract token usage
|
|
let prompt_tokens = response.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or(0) as u32;
|
|
let completion_tokens = response.usage.as_ref().map(|u| u.completion_tokens).unwrap_or(0) as u32;
|
|
let total_tokens = response.usage.as_ref().map(|u| u.total_tokens).unwrap_or(0) as u32;
|
|
|
|
Ok(ProviderResponse {
|
|
content,
|
|
prompt_tokens,
|
|
completion_tokens,
|
|
total_tokens,
|
|
model: request.model,
|
|
})
|
|
}
|
|
|
|
fn estimate_tokens(&self, request: &UnifiedRequest) -> Result<u32> {
|
|
Ok(crate::utils::tokens::estimate_request_tokens(&request.model, request))
|
|
}
|
|
|
|
fn calculate_cost(&self, model: &str, prompt_tokens: u32, completion_tokens: u32, registry: &crate::models::registry::ModelRegistry) -> f64 {
|
|
if let Some(metadata) = registry.find_model(model) {
|
|
if let Some(cost) = &metadata.cost {
|
|
return (prompt_tokens as f64 * cost.input / 1_000_000.0) +
|
|
(completion_tokens as f64 * cost.output / 1_000_000.0);
|
|
}
|
|
}
|
|
|
|
let (prompt_rate, completion_rate) = self.pricing.iter()
|
|
.find(|p| model.contains(&p.model))
|
|
.map(|p| (p.prompt_tokens_per_million, p.completion_tokens_per_million))
|
|
.unwrap_or((0.14, 0.28)); // Default to DeepSeek V3 price if not found
|
|
|
|
(prompt_tokens as f64 * prompt_rate / 1_000_000.0) + (completion_tokens as f64 * completion_rate / 1_000_000.0)
|
|
}
|
|
|
|
async fn chat_completion_stream(
|
|
&self,
|
|
request: UnifiedRequest,
|
|
) -> Result<BoxStream<'static, Result<ProviderStreamChunk, AppError>>, AppError> {
|
|
use async_openai::types::chat::{ChatCompletionRequestUserMessageContentPart, ChatCompletionRequestMessageContentPartText, ChatCompletionRequestMessageContentPartImage, ImageUrl, ImageDetail};
|
|
|
|
// Convert UnifiedRequest messages to OpenAI-compatible messages
|
|
let mut messages = Vec::with_capacity(request.messages.len());
|
|
|
|
for msg in request.messages {
|
|
let mut parts = Vec::with_capacity(msg.content.len());
|
|
|
|
for part in msg.content {
|
|
match part {
|
|
crate::models::ContentPart::Text { text } => {
|
|
parts.push(ChatCompletionRequestUserMessageContentPart::Text(ChatCompletionRequestMessageContentPartText {
|
|
text,
|
|
}));
|
|
}
|
|
crate::models::ContentPart::Image(image_input) => {
|
|
let (base64_data, mime_type) = image_input.to_base64().await
|
|
.map_err(|e| AppError::ProviderError(format!("Failed to convert image: {}", e)))?;
|
|
let data_url = format!("data:{};base64,{}", mime_type, base64_data);
|
|
|
|
parts.push(ChatCompletionRequestUserMessageContentPart::ImageUrl(ChatCompletionRequestMessageContentPartImage {
|
|
image_url: ImageUrl {
|
|
url: data_url,
|
|
detail: Some(ImageDetail::Auto),
|
|
}
|
|
}));
|
|
}
|
|
}
|
|
}
|
|
|
|
let message = match msg.role.as_str() {
|
|
"system" => ChatCompletionRequestMessage::System(
|
|
ChatCompletionRequestSystemMessage {
|
|
content: ChatCompletionRequestSystemMessageContent::Text(
|
|
parts.iter().filter_map(|p| if let ChatCompletionRequestUserMessageContentPart::Text(t) = p { Some(t.text.clone()) } else { None }).collect::<Vec<_>>().join("\n")
|
|
),
|
|
name: None,
|
|
}
|
|
),
|
|
"assistant" => ChatCompletionRequestMessage::Assistant(
|
|
ChatCompletionRequestAssistantMessage {
|
|
content: Some(ChatCompletionRequestAssistantMessageContent::Text(
|
|
parts.iter().filter_map(|p| if let ChatCompletionRequestUserMessageContentPart::Text(t) = p { Some(t.text.clone()) } else { None }).collect::<Vec<_>>().join("\n")
|
|
)),
|
|
name: None,
|
|
tool_calls: None,
|
|
refusal: None,
|
|
audio: None,
|
|
#[allow(deprecated)]
|
|
function_call: None,
|
|
}
|
|
),
|
|
_ => ChatCompletionRequestMessage::User(
|
|
ChatCompletionRequestUserMessage {
|
|
content: ChatCompletionRequestUserMessageContent::Array(parts),
|
|
name: None,
|
|
}
|
|
),
|
|
};
|
|
messages.push(message);
|
|
}
|
|
|
|
if messages.is_empty() {
|
|
return Err(AppError::ProviderError("No valid text messages to send".to_string()));
|
|
}
|
|
|
|
// Build request using builder pattern
|
|
let mut builder = CreateChatCompletionRequestArgs::default();
|
|
builder.model(request.model.clone());
|
|
builder.messages(messages);
|
|
builder.stream(true); // Enable streaming
|
|
|
|
// Add optional parameters
|
|
if let Some(temp) = request.temperature {
|
|
builder.temperature(temp as f32);
|
|
}
|
|
|
|
if let Some(max_tokens) = request.max_tokens {
|
|
builder.max_tokens(max_tokens as u16);
|
|
}
|
|
|
|
// Execute streaming API call
|
|
let stream = self.client
|
|
.chat()
|
|
.create_stream(builder.build().map_err(|e| AppError::ProviderError(e.to_string()))?)
|
|
.await
|
|
.map_err(|e| AppError::ProviderError(e.to_string()))?;
|
|
|
|
// Convert OpenAI stream to our stream format
|
|
let model = request.model.clone();
|
|
let stream = stream.map(move |chunk_result| {
|
|
match chunk_result {
|
|
Ok(chunk) => {
|
|
// Extract content from chunk
|
|
let content = chunk.choices.first()
|
|
.and_then(|choice| choice.delta.content.clone())
|
|
.unwrap_or_default();
|
|
|
|
let finish_reason = chunk.choices.first()
|
|
.and_then(|choice| choice.finish_reason.clone())
|
|
.map(|reason| format!("{:?}", reason));
|
|
|
|
Ok(ProviderStreamChunk {
|
|
content,
|
|
finish_reason,
|
|
model: model.clone(),
|
|
})
|
|
}
|
|
Err(e) => Err(AppError::ProviderError(e.to_string())),
|
|
}
|
|
});
|
|
|
|
Ok(Box::pin(stream))
|
|
}
|
|
} |