fix: remove tool call ID truncation and improve DeepSeek reasoning handling
The 40-character truncation of tool call IDs in helper.go caused collisions when models (like deepseek-v4-flash) generated longer IDs, leading to "Duplicate value for 'tool_call_id'" errors. Removed the limit to allow full unique IDs. DeepSeek: updated reasoning_content injection to use an empty string instead of a space, better matching provider expectations for history. Improved API error reporting across all providers by capturing raw body content when response parsing fails or returns empty strings.
This commit is contained in:
@@ -72,19 +72,26 @@ func (p *DeepSeekProvider) ChatCompletion(ctx context.Context, req *models.Unifi
|
|||||||
|
|
||||||
body := BuildOpenAIBody(req, messagesJSON, false)
|
body := BuildOpenAIBody(req, messagesJSON, false)
|
||||||
|
|
||||||
// Sanitize for deepseek-reasoner
|
// Sanitize for models that support reasoning/thinking mode
|
||||||
if req.Model == "deepseek-reasoner" {
|
isReasoner := strings.Contains(req.Model, "reasoner") || strings.Contains(req.Model, "v4") || strings.Contains(req.Model, "r1")
|
||||||
|
|
||||||
|
if isReasoner {
|
||||||
|
// deepseek-reasoner (R1) does not support these parameters
|
||||||
|
if req.Model == "deepseek-reasoner" || strings.HasPrefix(req.Model, "deepseek-r1") {
|
||||||
delete(body, "temperature")
|
delete(body, "temperature")
|
||||||
delete(body, "top_p")
|
delete(body, "top_p")
|
||||||
delete(body, "presence_penalty")
|
delete(body, "presence_penalty")
|
||||||
delete(body, "frequency_penalty")
|
delete(body, "frequency_penalty")
|
||||||
|
}
|
||||||
|
|
||||||
if msgs, ok := body["messages"].([]interface{}); ok {
|
if msgs, ok := body["messages"].([]interface{}); ok {
|
||||||
for _, m := range msgs {
|
for _, m := range msgs {
|
||||||
if msg, ok := m.(map[string]interface{}); ok {
|
if msg, ok := m.(map[string]interface{}); ok {
|
||||||
if msg["role"] == "assistant" {
|
if msg["role"] == "assistant" {
|
||||||
|
// DeepSeek requires reasoning_content to be passed back in history
|
||||||
|
// if the model is in thinking mode.
|
||||||
if msg["reasoning_content"] == nil {
|
if msg["reasoning_content"] == nil {
|
||||||
msg["reasoning_content"] = " "
|
msg["reasoning_content"] = ""
|
||||||
}
|
}
|
||||||
if msg["content"] == nil || msg["content"] == "" {
|
if msg["content"] == nil || msg["content"] == "" {
|
||||||
msg["content"] = ""
|
msg["content"] = ""
|
||||||
@@ -106,7 +113,15 @@ func (p *DeepSeekProvider) ChatCompletion(ctx context.Context, req *models.Unifi
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), resp.String())
|
var msg string
|
||||||
|
if resp.RawBody() != nil {
|
||||||
|
bodyBytes, _ := io.ReadAll(resp.RawBody())
|
||||||
|
msg = string(bodyBytes)
|
||||||
|
}
|
||||||
|
if msg == "" {
|
||||||
|
msg = resp.String()
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var respJSON map[string]interface{}
|
var respJSON map[string]interface{}
|
||||||
@@ -141,19 +156,26 @@ func (p *DeepSeekProvider) ChatCompletionStream(ctx context.Context, req *models
|
|||||||
|
|
||||||
body := BuildOpenAIBody(req, messagesJSON, true)
|
body := BuildOpenAIBody(req, messagesJSON, true)
|
||||||
|
|
||||||
// Sanitize for deepseek-reasoner
|
// Sanitize for models that support reasoning/thinking mode
|
||||||
if req.Model == "deepseek-reasoner" {
|
isReasoner := strings.Contains(req.Model, "reasoner") || strings.Contains(req.Model, "v4") || strings.Contains(req.Model, "r1")
|
||||||
|
|
||||||
|
if isReasoner {
|
||||||
|
// deepseek-reasoner (R1) does not support these parameters
|
||||||
|
if req.Model == "deepseek-reasoner" || strings.HasPrefix(req.Model, "deepseek-r1") {
|
||||||
delete(body, "temperature")
|
delete(body, "temperature")
|
||||||
delete(body, "top_p")
|
delete(body, "top_p")
|
||||||
delete(body, "presence_penalty")
|
delete(body, "presence_penalty")
|
||||||
delete(body, "frequency_penalty")
|
delete(body, "frequency_penalty")
|
||||||
|
}
|
||||||
|
|
||||||
if msgs, ok := body["messages"].([]interface{}); ok {
|
if msgs, ok := body["messages"].([]interface{}); ok {
|
||||||
for _, m := range msgs {
|
for _, m := range msgs {
|
||||||
if msg, ok := m.(map[string]interface{}); ok {
|
if msg, ok := m.(map[string]interface{}); ok {
|
||||||
if msg["role"] == "assistant" {
|
if msg["role"] == "assistant" {
|
||||||
|
// DeepSeek requires reasoning_content to be passed back in history
|
||||||
|
// if the model is in thinking mode.
|
||||||
if msg["reasoning_content"] == nil {
|
if msg["reasoning_content"] == nil {
|
||||||
msg["reasoning_content"] = " "
|
msg["reasoning_content"] = ""
|
||||||
}
|
}
|
||||||
if msg["content"] == nil || msg["content"] == "" {
|
if msg["content"] == nil || msg["content"] == "" {
|
||||||
msg["content"] = ""
|
msg["content"] = ""
|
||||||
@@ -176,7 +198,15 @@ func (p *DeepSeekProvider) ChatCompletionStream(ctx context.Context, req *models
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), resp.String())
|
var msg string
|
||||||
|
if resp.RawBody() != nil {
|
||||||
|
bodyBytes, _ := io.ReadAll(resp.RawBody())
|
||||||
|
msg = string(bodyBytes)
|
||||||
|
}
|
||||||
|
if msg == "" {
|
||||||
|
msg = resp.String()
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -125,7 +126,13 @@ func (p *GeminiProvider) ImageGeneration(ctx context.Context, req *models.ImageG
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse Imagen response
|
// Parse Imagen response
|
||||||
@@ -363,11 +370,17 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), msg)
|
||||||
// Also log the request body for debugging (careful with API keys if logged elsewhere)
|
// Also log the request body for debugging (careful with API keys if logged elsewhere)
|
||||||
reqJSON, _ := json.Marshal(body)
|
reqJSON, _ := json.Marshal(body)
|
||||||
fmt.Printf("[Gemini] Request Body: %s\n", string(reqJSON))
|
fmt.Printf("[Gemini] Request Body: %s\n", string(reqJSON))
|
||||||
return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String())
|
return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse Gemini response and convert to OpenAI format
|
// Parse Gemini response and convert to OpenAI format
|
||||||
@@ -599,7 +612,13 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-resty/resty/v2"
|
"github.com/go-resty/resty/v2"
|
||||||
@@ -48,7 +49,13 @@ func (p *GrokProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var respJSON map[string]interface{}
|
var respJSON map[string]interface{}
|
||||||
@@ -79,7 +86,13 @@ func (p *GrokProvider) ChatCompletionStream(ctx context.Context, req *models.Uni
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|||||||
@@ -14,7 +14,12 @@ import (
|
|||||||
func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, error) {
|
func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, error) {
|
||||||
var result []interface{}
|
var result []interface{}
|
||||||
for _, m := range messages {
|
for _, m := range messages {
|
||||||
if m.Role == "tool" {
|
role := strings.ToLower(m.Role)
|
||||||
|
if role == "model" {
|
||||||
|
role = "assistant"
|
||||||
|
}
|
||||||
|
|
||||||
|
if role == "tool" || role == "function" {
|
||||||
text := ""
|
text := ""
|
||||||
if len(m.Content) > 0 {
|
if len(m.Content) > 0 {
|
||||||
text = m.Content[0].Text
|
text = m.Content[0].Text
|
||||||
@@ -23,13 +28,12 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
|
|||||||
"role": "tool",
|
"role": "tool",
|
||||||
"content": text,
|
"content": text,
|
||||||
}
|
}
|
||||||
|
id := "unknown"
|
||||||
if m.ToolCallID != nil {
|
if m.ToolCallID != nil {
|
||||||
id := *m.ToolCallID
|
id = *m.ToolCallID
|
||||||
if len(id) > 40 {
|
|
||||||
id = id[:40]
|
|
||||||
}
|
}
|
||||||
msg["tool_call_id"] = id
|
msg["tool_call_id"] = id
|
||||||
}
|
|
||||||
if m.Name != nil {
|
if m.Name != nil {
|
||||||
msg["name"] = *m.Name
|
msg["name"] = *m.Name
|
||||||
}
|
}
|
||||||
@@ -59,7 +63,9 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
var finalContent interface{}
|
var finalContent interface{}
|
||||||
if len(parts) == 1 {
|
if len(parts) == 0 {
|
||||||
|
finalContent = nil
|
||||||
|
} else if len(parts) == 1 {
|
||||||
if p, ok := parts[0].(map[string]interface{}); ok && p["type"] == "text" {
|
if p, ok := parts[0].(map[string]interface{}); ok && p["type"] == "text" {
|
||||||
finalContent = p["text"]
|
finalContent = p["text"]
|
||||||
} else {
|
} else {
|
||||||
@@ -70,7 +76,7 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
msg := map[string]interface{}{
|
msg := map[string]interface{}{
|
||||||
"role": m.Role,
|
"role": role,
|
||||||
"content": finalContent,
|
"content": finalContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,20 +88,17 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
|
|||||||
sanitizedCalls := make([]models.ToolCall, len(m.ToolCalls))
|
sanitizedCalls := make([]models.ToolCall, len(m.ToolCalls))
|
||||||
copy(sanitizedCalls, m.ToolCalls)
|
copy(sanitizedCalls, m.ToolCalls)
|
||||||
for i := range sanitizedCalls {
|
for i := range sanitizedCalls {
|
||||||
if len(sanitizedCalls[i].ID) > 40 {
|
if sanitizedCalls[i].Type == "" {
|
||||||
sanitizedCalls[i].ID = sanitizedCalls[i].ID[:40]
|
sanitizedCalls[i].Type = "function"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
msg["tool_calls"] = sanitizedCalls
|
msg["tool_calls"] = sanitizedCalls
|
||||||
if len(parts) == 0 {
|
msg["content"] = "" // OpenAI requirement: content must be string if tool_calls present
|
||||||
msg["content"] = ""
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if m.Name != nil {
|
if m.Name != nil {
|
||||||
msg["name"] = *m.Name
|
msg["name"] = *m.Name
|
||||||
}
|
}
|
||||||
|
|
||||||
result = append(result, msg)
|
result = append(result, msg)
|
||||||
}
|
}
|
||||||
return result, nil
|
return result, nil
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -59,7 +60,13 @@ func (p *MoonshotProvider) ChatCompletion(ctx context.Context, req *models.Unifi
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var respJSON map[string]interface{}
|
var respJSON map[string]interface{}
|
||||||
@@ -100,7 +107,13 @@ func (p *MoonshotProvider) ChatCompletionStream(ctx context.Context, req *models
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|||||||
@@ -56,7 +56,13 @@ func (p *OllamaProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var respJSON map[string]interface{}
|
var respJSON map[string]interface{}
|
||||||
@@ -86,7 +92,13 @@ func (p *OllamaProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -38,6 +40,17 @@ func (p *OpenAIProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
|
|
||||||
body := BuildOpenAIBody(req, messagesJSON, false)
|
body := BuildOpenAIBody(req, messagesJSON, false)
|
||||||
|
|
||||||
|
// Debug message sequence
|
||||||
|
for i, m := range messagesJSON {
|
||||||
|
mMap, _ := m.(map[string]interface{})
|
||||||
|
role, _ := mMap["role"].(string)
|
||||||
|
hasToolCalls := false
|
||||||
|
if tc, ok := mMap["tool_calls"]; ok && tc != nil {
|
||||||
|
hasToolCalls = true
|
||||||
|
}
|
||||||
|
log.Printf("[DEBUG] OpenAI Msg[%d]: role=%s, hasToolCalls=%v", i, role, hasToolCalls)
|
||||||
|
}
|
||||||
|
|
||||||
// Transition: Newer models require max_completion_tokens
|
// Transition: Newer models require max_completion_tokens
|
||||||
if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") {
|
if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") {
|
||||||
if maxTokens, ok := body["max_tokens"]; ok {
|
if maxTokens, ok := body["max_tokens"]; ok {
|
||||||
@@ -57,7 +70,14 @@ func (p *OpenAIProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("OpenAI API Error (%d): %s", resp.StatusCode(), msg)
|
||||||
|
return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var respJSON map[string]interface{}
|
var respJSON map[string]interface{}
|
||||||
@@ -104,7 +124,13 @@ func (p *OpenAIProvider) ImageGeneration(ctx context.Context, req *models.ImageG
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("OpenAI image API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("OpenAI image API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var result models.ImageGenerationResponse
|
var result models.ImageGenerationResponse
|
||||||
@@ -123,6 +149,17 @@ func (p *OpenAIProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
|
|
||||||
body := BuildOpenAIBody(req, messagesJSON, true)
|
body := BuildOpenAIBody(req, messagesJSON, true)
|
||||||
|
|
||||||
|
// Debug message sequence
|
||||||
|
for i, m := range messagesJSON {
|
||||||
|
mMap, _ := m.(map[string]interface{})
|
||||||
|
role, _ := mMap["role"].(string)
|
||||||
|
hasToolCalls := false
|
||||||
|
if tc, ok := mMap["tool_calls"]; ok && tc != nil {
|
||||||
|
hasToolCalls = true
|
||||||
|
}
|
||||||
|
log.Printf("[DEBUG] OpenAI Stream Msg[%d]: role=%s, hasToolCalls=%v", i, role, hasToolCalls)
|
||||||
|
}
|
||||||
|
|
||||||
// Transition: Newer models require max_completion_tokens
|
// Transition: Newer models require max_completion_tokens
|
||||||
if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") {
|
if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") {
|
||||||
if maxTokens, ok := body["max_tokens"]; ok {
|
if maxTokens, ok := body["max_tokens"]; ok {
|
||||||
@@ -143,7 +180,14 @@ func (p *OpenAIProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("OpenAI API Error (%d): %s", resp.StatusCode(), msg)
|
||||||
|
return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
"gophergate/internal/models"
|
"gophergate/internal/models"
|
||||||
)
|
)
|
||||||
@@ -26,7 +27,13 @@ func (p *OpenAIProvider) Responses(ctx context.Context, req *models.ResponsesReq
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var respJSON map[string]interface{}
|
var respJSON map[string]interface{}
|
||||||
@@ -53,7 +60,13 @@ func (p *OpenAIProvider) ResponsesStream(ctx context.Context, req *models.Respon
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !resp.IsSuccess() {
|
if !resp.IsSuccess() {
|
||||||
return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), resp.String())
|
msg := resp.String()
|
||||||
|
if msg == "" {
|
||||||
|
if body, err := io.ReadAll(resp.RawBody()); err == nil {
|
||||||
|
msg = string(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ResponsesStreamChunk)
|
ch := make(chan *models.ResponsesStreamChunk)
|
||||||
|
|||||||
+17
-13
@@ -545,6 +545,9 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
|||||||
|
|
||||||
// Resolve model groups to concrete models (hierarchical — groups can target groups)
|
// Resolve model groups to concrete models (hierarchical — groups can target groups)
|
||||||
modelGroup := ""
|
modelGroup := ""
|
||||||
|
for i, m := range req.Messages {
|
||||||
|
log.Printf("[DEBUG] Incoming Msg[%d]: role=%s, hasToolCalls=%v, hasContent=%v", i, m.Role, len(m.ToolCalls) > 0, m.Content != nil)
|
||||||
|
}
|
||||||
if s.modelRouter != nil {
|
if s.modelRouter != nil {
|
||||||
userMessage := extractUserMessage(req.Messages)
|
userMessage := extractUserMessage(req.Messages)
|
||||||
decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage)
|
decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage)
|
||||||
@@ -582,27 +585,28 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
|||||||
ToolChoice: req.ToolChoice,
|
ToolChoice: req.ToolChoice,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Inject max_tokens from model registry when client doesn't specify one.
|
// Inject or cap max_tokens from model registry.
|
||||||
// Prevents providers from applying a low default output cap.
|
|
||||||
// DEBUG: Trace max_tokens through the proxy
|
|
||||||
clientMaxTokens := "nil"
|
|
||||||
if unifiedReq.MaxTokens != nil {
|
|
||||||
clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
|
|
||||||
}
|
|
||||||
log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
|
|
||||||
if unifiedReq.MaxTokens == nil {
|
|
||||||
s.registryMu.RLock()
|
s.registryMu.RLock()
|
||||||
meta := s.registry.FindModel(modelID)
|
meta := s.registry.FindModel(modelID)
|
||||||
s.registryMu.RUnlock()
|
s.registryMu.RUnlock()
|
||||||
|
|
||||||
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||||
|
if unifiedReq.MaxTokens == nil {
|
||||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||||
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
|
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
|
||||||
|
} else if *unifiedReq.MaxTokens > meta.Limit.Output {
|
||||||
|
log.Printf("[DEBUG] %s: capping client max_tokens (%d) to registry limit (%d)", modelID, *unifiedReq.MaxTokens, meta.Limit.Output)
|
||||||
|
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||||
} else {
|
} else {
|
||||||
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
|
log.Printf("[DEBUG] %s: using client max_tokens (%d)", modelID, *unifiedReq.MaxTokens)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if unifiedReq.MaxTokens == nil {
|
||||||
|
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil", modelID)
|
||||||
|
} else {
|
||||||
|
log.Printf("[DEBUG] %s: using client max_tokens (%d), no registry limit to cap", modelID, *unifiedReq.MaxTokens)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle Stop sequences
|
// Handle Stop sequences
|
||||||
if req.Stop != nil {
|
if req.Stop != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user