477a811999
The 40-character truncation of tool call IDs in helper.go caused collisions when models (like deepseek-v4-flash) generated longer IDs, leading to "Duplicate value for 'tool_call_id'" errors. Removed the limit to allow full unique IDs. DeepSeek: updated reasoning_content injection to use an empty string instead of a space, better matching provider expectations for history. Improved API error reporting across all providers by capturing raw body content when response parsing fails or returns empty strings.
467 lines
12 KiB
Go
467 lines
12 KiB
Go
package providers
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
|
|
"gophergate/internal/models"
|
|
)
|
|
|
|
// MessagesToOpenAIJSON converts unified messages to OpenAI-compatible JSON, including tools and images.
|
|
func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, error) {
|
|
var result []interface{}
|
|
for _, m := range messages {
|
|
role := strings.ToLower(m.Role)
|
|
if role == "model" {
|
|
role = "assistant"
|
|
}
|
|
|
|
if role == "tool" || role == "function" {
|
|
text := ""
|
|
if len(m.Content) > 0 {
|
|
text = m.Content[0].Text
|
|
}
|
|
msg := map[string]interface{}{
|
|
"role": "tool",
|
|
"content": text,
|
|
}
|
|
id := "unknown"
|
|
if m.ToolCallID != nil {
|
|
id = *m.ToolCallID
|
|
}
|
|
msg["tool_call_id"] = id
|
|
|
|
if m.Name != nil {
|
|
msg["name"] = *m.Name
|
|
}
|
|
result = append(result, msg)
|
|
continue
|
|
}
|
|
|
|
var parts []interface{}
|
|
for _, p := range m.Content {
|
|
if p.Type == "text" {
|
|
parts = append(parts, map[string]interface{}{
|
|
"type": "text",
|
|
"text": p.Text,
|
|
})
|
|
} else if p.Image != nil {
|
|
base64Data, mimeType, err := p.Image.ToBase64()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to convert image to base64: %w", err)
|
|
}
|
|
parts = append(parts, map[string]interface{}{
|
|
"type": "image_url",
|
|
"image_url": map[string]interface{}{
|
|
"url": fmt.Sprintf("data:%s;base64,%s", mimeType, base64Data),
|
|
},
|
|
})
|
|
}
|
|
}
|
|
|
|
var finalContent interface{}
|
|
if len(parts) == 0 {
|
|
finalContent = nil
|
|
} else if len(parts) == 1 {
|
|
if p, ok := parts[0].(map[string]interface{}); ok && p["type"] == "text" {
|
|
finalContent = p["text"]
|
|
} else {
|
|
finalContent = parts
|
|
}
|
|
} else {
|
|
finalContent = parts
|
|
}
|
|
|
|
msg := map[string]interface{}{
|
|
"role": role,
|
|
"content": finalContent,
|
|
}
|
|
|
|
if m.ReasoningContent != nil {
|
|
msg["reasoning_content"] = *m.ReasoningContent
|
|
}
|
|
|
|
if len(m.ToolCalls) > 0 {
|
|
sanitizedCalls := make([]models.ToolCall, len(m.ToolCalls))
|
|
copy(sanitizedCalls, m.ToolCalls)
|
|
for i := range sanitizedCalls {
|
|
if sanitizedCalls[i].Type == "" {
|
|
sanitizedCalls[i].Type = "function"
|
|
}
|
|
}
|
|
msg["tool_calls"] = sanitizedCalls
|
|
msg["content"] = "" // OpenAI requirement: content must be string if tool_calls present
|
|
}
|
|
|
|
if m.Name != nil {
|
|
msg["name"] = *m.Name
|
|
}
|
|
result = append(result, msg)
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func BuildOpenAIBody(request *models.UnifiedRequest, messagesJSON []interface{}, stream bool) map[string]interface{} {
|
|
body := map[string]interface{}{
|
|
"model": request.Model,
|
|
"messages": messagesJSON,
|
|
"stream": stream,
|
|
}
|
|
|
|
if stream {
|
|
body["stream_options"] = map[string]interface{}{
|
|
"include_usage": true,
|
|
}
|
|
}
|
|
|
|
if request.Temperature != nil {
|
|
body["temperature"] = *request.Temperature
|
|
}
|
|
if request.MaxTokens != nil {
|
|
body["max_tokens"] = *request.MaxTokens
|
|
}
|
|
if len(request.Tools) > 0 {
|
|
body["tools"] = request.Tools
|
|
}
|
|
if request.ToolChoice != nil {
|
|
var toolChoice interface{}
|
|
if err := json.Unmarshal(request.ToolChoice, &toolChoice); err == nil {
|
|
body["tool_choice"] = toolChoice
|
|
}
|
|
}
|
|
|
|
return body
|
|
}
|
|
|
|
// BuildOpenAIResponsesBody builds the request body for the Responses API endpoint.
|
|
func BuildOpenAIResponsesBody(req *models.ResponsesRequest, stream bool) map[string]interface{} {
|
|
body := map[string]interface{}{
|
|
"model": req.Model,
|
|
"stream": stream,
|
|
}
|
|
|
|
// The input field can be a string or a structured array.
|
|
// Try to preserve the original format.
|
|
if req.Input != nil {
|
|
// Try as string first
|
|
var inputStr string
|
|
if err := json.Unmarshal(req.Input, &inputStr); err == nil {
|
|
body["input"] = inputStr
|
|
} else {
|
|
// Try as array of messages
|
|
var inputArr []interface{}
|
|
if err := json.Unmarshal(req.Input, &inputArr); err == nil {
|
|
body["input"] = inputArr
|
|
}
|
|
}
|
|
}
|
|
|
|
if req.Instructions != "" {
|
|
body["instructions"] = req.Instructions
|
|
}
|
|
if req.Temperature != nil {
|
|
body["temperature"] = *req.Temperature
|
|
}
|
|
if req.MaxOutputTokens != nil {
|
|
body["max_output_tokens"] = *req.MaxOutputTokens
|
|
}
|
|
if req.TopP != nil {
|
|
body["top_p"] = *req.TopP
|
|
}
|
|
if req.Tools != nil {
|
|
var tools interface{}
|
|
if err := json.Unmarshal(req.Tools, &tools); err == nil {
|
|
body["tools"] = tools
|
|
}
|
|
}
|
|
if req.ToolChoice != nil {
|
|
var toolChoice interface{}
|
|
if err := json.Unmarshal(req.ToolChoice, &toolChoice); err == nil {
|
|
body["tool_choice"] = toolChoice
|
|
}
|
|
}
|
|
if req.Store != nil {
|
|
body["store"] = *req.Store
|
|
}
|
|
|
|
if stream {
|
|
body["stream_options"] = map[string]interface{}{
|
|
"include_usage": true,
|
|
}
|
|
}
|
|
|
|
return body
|
|
}
|
|
|
|
// ParseOpenAIResponsesResponse parses a raw JSON map into a ResponsesResponse.
|
|
func ParseOpenAIResponsesResponse(respJSON map[string]interface{}, model string) (*models.ResponsesResponse, error) {
|
|
data, err := json.Marshal(respJSON)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var resp models.ResponsesResponse
|
|
if err := json.Unmarshal(data, &resp); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Re-parse usage with the detailed tokens
|
|
if usageData, ok := respJSON["usage"]; ok {
|
|
var responsesUsage models.ResponsesUsage
|
|
usageBytes, _ := json.Marshal(usageData)
|
|
if err := json.Unmarshal(usageBytes, &responsesUsage); err == nil {
|
|
resp.Usage = &responsesUsage
|
|
}
|
|
}
|
|
|
|
return &resp, nil
|
|
}
|
|
|
|
// ParseOpenAIResponsesStreamChunk parses a single SSE line into a ResponsesStreamChunk.
|
|
// Returns the chunk, whether this is the [DONE] signal, and any error.
|
|
func ParseOpenAIResponsesStreamChunk(line string) (*models.ResponsesStreamChunk, bool, error) {
|
|
if line == "" {
|
|
return nil, false, nil
|
|
}
|
|
if !strings.HasPrefix(line, "data: ") {
|
|
return nil, false, nil
|
|
}
|
|
|
|
data := strings.TrimPrefix(line, "data: ")
|
|
if data == "[DONE]" {
|
|
return nil, true, nil
|
|
}
|
|
|
|
var chunk models.ResponsesStreamChunk
|
|
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
|
|
return nil, false, fmt.Errorf("failed to unmarshal responses stream chunk: %w", err)
|
|
}
|
|
|
|
return &chunk, false, nil
|
|
}
|
|
|
|
// StreamOpenAIResponses reads SSE chunks from the body and sends them to the channel.
|
|
func StreamOpenAIResponses(ctx io.ReadCloser, ch chan<- *models.ResponsesStreamChunk) error {
|
|
defer ctx.Close()
|
|
scanner := bufio.NewScanner(ctx)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
chunk, done, err := ParseOpenAIResponsesStreamChunk(line)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if done {
|
|
break
|
|
}
|
|
if chunk != nil {
|
|
ch <- chunk
|
|
}
|
|
}
|
|
return scanner.Err()
|
|
}
|
|
|
|
type openAIUsage struct {
|
|
PromptTokens uint32 `json:"prompt_tokens"`
|
|
CompletionTokens uint32 `json:"completion_tokens"`
|
|
TotalTokens uint32 `json:"total_tokens"`
|
|
PromptTokensDetails *struct {
|
|
CachedTokens uint32 `json:"cached_tokens"`
|
|
} `json:"prompt_tokens_details"`
|
|
CompletionTokensDetails *struct {
|
|
ReasoningTokens uint32 `json:"reasoning_tokens"`
|
|
} `json:"completion_tokens_details"`
|
|
}
|
|
|
|
func (u *openAIUsage) ToUnified() *models.Usage {
|
|
usage := &models.Usage{
|
|
PromptTokens: u.PromptTokens,
|
|
CompletionTokens: u.CompletionTokens,
|
|
TotalTokens: u.TotalTokens,
|
|
}
|
|
if u.PromptTokensDetails != nil && u.PromptTokensDetails.CachedTokens > 0 {
|
|
usage.CacheReadTokens = &u.PromptTokensDetails.CachedTokens
|
|
}
|
|
if u.CompletionTokensDetails != nil && u.CompletionTokensDetails.ReasoningTokens > 0 {
|
|
usage.ReasoningTokens = &u.CompletionTokensDetails.ReasoningTokens
|
|
}
|
|
return usage
|
|
}
|
|
|
|
func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models.ChatCompletionResponse, error) {
|
|
data, err := json.Marshal(respJSON)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var resp models.ChatCompletionResponse
|
|
if err := json.Unmarshal(data, &resp); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Manually fix usage because ChatCompletionResponse uses the unified Usage struct
|
|
// but the provider might have returned more details.
|
|
if usageData, ok := respJSON["usage"]; ok {
|
|
var oUsage openAIUsage
|
|
usageBytes, _ := json.Marshal(usageData)
|
|
if err := json.Unmarshal(usageBytes, &oUsage); err == nil {
|
|
resp.Usage = oUsage.ToUnified()
|
|
}
|
|
}
|
|
|
|
return &resp, nil
|
|
}
|
|
|
|
// Streaming support
|
|
|
|
func ParseOpenAIStreamChunk(line string) (*models.ChatCompletionStreamResponse, bool, error) {
|
|
if line == "" {
|
|
return nil, false, nil
|
|
}
|
|
if !strings.HasPrefix(line, "data: ") {
|
|
return nil, false, nil
|
|
}
|
|
|
|
data := strings.TrimPrefix(line, "data: ")
|
|
if data == "[DONE]" {
|
|
return nil, true, nil
|
|
}
|
|
|
|
var chunk models.ChatCompletionStreamResponse
|
|
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
|
|
return nil, false, fmt.Errorf("failed to unmarshal stream chunk: %w", err)
|
|
}
|
|
|
|
// Handle specialized usage in stream chunks
|
|
var rawChunk struct {
|
|
Usage *openAIUsage `json:"usage"`
|
|
}
|
|
if err := json.Unmarshal([]byte(data), &rawChunk); err == nil && rawChunk.Usage != nil {
|
|
chunk.Usage = rawChunk.Usage.ToUnified()
|
|
}
|
|
|
|
return &chunk, false, nil
|
|
}
|
|
|
|
func StreamOpenAI(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse) error {
|
|
defer ctx.Close()
|
|
scanner := bufio.NewScanner(ctx)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
chunk, done, err := ParseOpenAIStreamChunk(line)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if done {
|
|
break
|
|
}
|
|
if chunk != nil {
|
|
ch <- chunk
|
|
}
|
|
}
|
|
return scanner.Err()
|
|
}
|
|
|
|
func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
|
|
defer ctx.Close()
|
|
|
|
dec := json.NewDecoder(ctx)
|
|
|
|
t, err := dec.Token()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if delim, ok := t.(json.Delim); ok && delim == '[' {
|
|
for dec.More() {
|
|
var geminiChunk struct {
|
|
Candidates []struct {
|
|
Content struct {
|
|
Parts []struct {
|
|
Text string `json:"text,omitempty"`
|
|
Thought string `json:"thought,omitempty"`
|
|
} `json:"parts"`
|
|
} `json:"content"`
|
|
FinishReason string `json:"finishReason"`
|
|
} `json:"candidates"`
|
|
UsageMetadata struct {
|
|
PromptTokenCount uint32 `json:"promptTokenCount"`
|
|
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
|
|
TotalTokenCount uint32 `json:"totalTokenCount"`
|
|
CachedContentTokenCount uint32 `json:"cachedContentTokenCount"`
|
|
} `json:"usageMetadata"`
|
|
}
|
|
|
|
if err := dec.Decode(&geminiChunk); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
|
|
content := ""
|
|
var reasoning *string
|
|
if len(geminiChunk.Candidates) > 0 {
|
|
for _, p := range geminiChunk.Candidates[0].Content.Parts {
|
|
if p.Text != "" {
|
|
content += p.Text
|
|
}
|
|
if p.Thought != "" {
|
|
if reasoning == nil {
|
|
reasoning = new(string)
|
|
}
|
|
*reasoning += p.Thought
|
|
}
|
|
}
|
|
}
|
|
|
|
var finishReason *string
|
|
if len(geminiChunk.Candidates) > 0 {
|
|
fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
|
|
finishReason = &fr
|
|
}
|
|
|
|
ch <- &models.ChatCompletionStreamResponse{
|
|
ID: "gemini-stream",
|
|
Object: "chat.completion.chunk",
|
|
Created: 0,
|
|
Model: model,
|
|
Choices: []models.ChatStreamChoice{
|
|
{
|
|
Index: 0,
|
|
Delta: models.ChatStreamDelta{
|
|
Content: &content,
|
|
ReasoningContent: reasoning,
|
|
},
|
|
FinishReason: finishReason,
|
|
},
|
|
},
|
|
Usage: &models.Usage{
|
|
PromptTokens: geminiChunk.UsageMetadata.PromptTokenCount,
|
|
CompletionTokens: geminiChunk.UsageMetadata.CandidatesTokenCount,
|
|
TotalTokens: geminiChunk.UsageMetadata.TotalTokenCount,
|
|
CacheReadTokens: uint32Ptr(geminiChunk.UsageMetadata.CachedContentTokenCount),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ensureEnglish injects a system message instructing the model to respond in
|
|
// English when no system prompt is already present. Some providers (e.g. DeepSeek)
|
|
// default to Chinese for certain prompts.
|
|
func ensureEnglish(req *models.UnifiedRequest) {
|
|
if len(req.Messages) > 0 && req.Messages[0].Role == "system" {
|
|
return // already has a system prompt, don't interfere
|
|
}
|
|
enMsg := models.UnifiedMessage{
|
|
Role: "system",
|
|
Content: []models.UnifiedContentPart{
|
|
{Type: "text", Text: "You are a helpful assistant. Always respond in English."},
|
|
},
|
|
}
|
|
req.Messages = append([]models.UnifiedMessage{enMsg}, req.Messages...)
|
|
}
|