fix: remove tool call ID truncation and improve DeepSeek reasoning handling

The 40-character truncation of tool call IDs in helper.go caused collisions when models (like deepseek-v4-flash) generated longer IDs, leading to "Duplicate value for 'tool_call_id'" errors. Removed the limit to allow full unique IDs. DeepSeek: updated reasoning_content injection to use an empty string instead of a space, better matching provider expectations for history. Improved API error reporting across all providers by capturing raw body content when response parsing fails or returns empty strings.
2026-05-11 03:12:38 +00:00
parent d2b9da89d9
commit aeffeb8c03
11 changed files with 72990 additions and 60 deletions
@@ -72,17 +72,24 @@ func (p *DeepSeekProvider) ChatCompletion(ctx context.Context, req *models.Unifi
 	body := BuildOpenAIBody(req, messagesJSON, false)
-	// Sanitize for deepseek-reasoner
+	// Sanitize for models that support reasoning/thinking mode
-	if req.Model == "deepseek-reasoner" {
+	isReasoner := strings.Contains(req.Model, "reasoner") || strings.Contains(req.Model, "v4") || strings.Contains(req.Model, "r1")
 	if isReasoner {
 		// deepseek-reasoner (R1) does not support these parameters
 		if req.Model == "deepseek-reasoner" || strings.HasPrefix(req.Model, "deepseek-r1") {
 			delete(body, "temperature")
 			delete(body, "top_p")
 			delete(body, "presence_penalty")
 			delete(body, "frequency_penalty")
 		}
 		if msgs, ok := body["messages"].([]interface{}); ok {
 			for _, m := range msgs {
 				if msg, ok := m.(map[string]interface{}); ok {
 					if msg["role"] == "assistant" {
 						// DeepSeek requires reasoning_content to be passed back in history
 						// if the model is in thinking mode.
 						if msg["reasoning_content"] == nil {
 							msg["reasoning_content"] = ""
 						}
@@ -106,7 +113,15 @@ func (p *DeepSeekProvider) ChatCompletion(ctx context.Context, req *models.Unifi
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), resp.String())
+		var msg string
 		if resp.RawBody() != nil {
 			bodyBytes, _ := io.ReadAll(resp.RawBody())
 			msg = string(bodyBytes)
 		}
 		if msg == "" {
 			msg = resp.String()
 		}
 		return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var respJSON map[string]interface{}
@@ -141,17 +156,24 @@ func (p *DeepSeekProvider) ChatCompletionStream(ctx context.Context, req *models
 	body := BuildOpenAIBody(req, messagesJSON, true)
-	// Sanitize for deepseek-reasoner
+	// Sanitize for models that support reasoning/thinking mode
-	if req.Model == "deepseek-reasoner" {
+	isReasoner := strings.Contains(req.Model, "reasoner") || strings.Contains(req.Model, "v4") || strings.Contains(req.Model, "r1")
 	if isReasoner {
 		// deepseek-reasoner (R1) does not support these parameters
 		if req.Model == "deepseek-reasoner" || strings.HasPrefix(req.Model, "deepseek-r1") {
 			delete(body, "temperature")
 			delete(body, "top_p")
 			delete(body, "presence_penalty")
 			delete(body, "frequency_penalty")
 		}
 		if msgs, ok := body["messages"].([]interface{}); ok {
 			for _, m := range msgs {
 				if msg, ok := m.(map[string]interface{}); ok {
 					if msg["role"] == "assistant" {
 						// DeepSeek requires reasoning_content to be passed back in history
 						// if the model is in thinking mode.
 						if msg["reasoning_content"] == nil {
 							msg["reasoning_content"] = ""
 						}
@@ -176,7 +198,15 @@ func (p *DeepSeekProvider) ChatCompletionStream(ctx context.Context, req *models
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), resp.String())
+		var msg string
 		if resp.RawBody() != nil {
 			bodyBytes, _ := io.ReadAll(resp.RawBody())
 			msg = string(bodyBytes)
 		}
 		if msg == "" {
 			msg = resp.String()
 		}
 		return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ChatCompletionStreamResponse)
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"strings"
 	"time"
@@ -125,7 +126,13 @@ func (p *GeminiProvider) ImageGeneration(ctx context.Context, req *models.ImageG
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), msg)
 	}
 	// Parse Imagen response
@@ -363,11 +370,17 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 	}
 	if !resp.IsSuccess() {
-		fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), msg)
 		// Also log the request body for debugging (careful with API keys if logged elsewhere)
 		reqJSON, _ := json.Marshal(body)
 		fmt.Printf("[Gemini] Request Body: %s\n", string(reqJSON))
-		return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String())
+		return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), msg)
 	}
 	// Parse Gemini response and convert to OpenAI format
@@ -599,7 +612,13 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ChatCompletionStreamResponse)
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"time"
 	"github.com/go-resty/resty/v2"
@@ -48,7 +49,13 @@ func (p *GrokProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRe
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var respJSON map[string]interface{}
@@ -79,7 +86,13 @@ func (p *GrokProvider) ChatCompletionStream(ctx context.Context, req *models.Uni
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ChatCompletionStreamResponse)
@@ -14,7 +14,12 @@ import (
 func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, error) {
 	var result []interface{}
 	for _, m := range messages {
-		if m.Role == "tool" {
+		role := strings.ToLower(m.Role)
 		if role == "model" {
 			role = "assistant"
 		}
 		if role == "tool" || role == "function" {
 			text := ""
 			if len(m.Content) > 0 {
 				text = m.Content[0].Text
@@ -23,13 +28,12 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
 				"role":    "tool",
 				"content": text,
 			}
 			id := "unknown"
 			if m.ToolCallID != nil {
-				id := *m.ToolCallID
+				id = *m.ToolCallID
 				if len(id) > 40 {
 					id = id[:40]
 			}
 			msg["tool_call_id"] = id
-			}
+
 			if m.Name != nil {
 				msg["name"] = *m.Name
 			}
@@ -59,7 +63,9 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
 		}
 		var finalContent interface{}
-		if len(parts) == 1 {
+		if len(parts) == 0 {
 			finalContent = nil
 		} else if len(parts) == 1 {
 			if p, ok := parts[0].(map[string]interface{}); ok && p["type"] == "text" {
 				finalContent = p["text"]
 			} else {
@@ -70,7 +76,7 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
 		}
 		msg := map[string]interface{}{
-			"role":    m.Role,
+			"role":    role,
 			"content": finalContent,
 		}
@@ -82,20 +88,17 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro
 			sanitizedCalls := make([]models.ToolCall, len(m.ToolCalls))
 			copy(sanitizedCalls, m.ToolCalls)
 			for i := range sanitizedCalls {
-				if len(sanitizedCalls[i].ID) > 40 {
+				if sanitizedCalls[i].Type == "" {
-					sanitizedCalls[i].ID = sanitizedCalls[i].ID[:40]
+					sanitizedCalls[i].Type = "function"
 				}
 			}
 			msg["tool_calls"] = sanitizedCalls
-			if len(parts) == 0 {
+			msg["content"] = "" // OpenAI requirement: content must be string if tool_calls present
 				msg["content"] = ""
 			}
 		}
 		if m.Name != nil {
 			msg["name"] = *m.Name
 		}
 		result = append(result, msg)
 	}
 	return result, nil
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"strings"
 	"time"
@@ -59,7 +60,13 @@ func (p *MoonshotProvider) ChatCompletion(ctx context.Context, req *models.Unifi
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var respJSON map[string]interface{}
@@ -100,7 +107,13 @@ func (p *MoonshotProvider) ChatCompletionStream(ctx context.Context, req *models
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ChatCompletionStreamResponse)
@@ -56,7 +56,13 @@ func (p *OllamaProvider) ChatCompletion(ctx context.Context, req *models.Unified
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var respJSON map[string]interface{}
@@ -86,7 +92,13 @@ func (p *OllamaProvider) ChatCompletionStream(ctx context.Context, req *models.U
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ChatCompletionStreamResponse)
@@ -4,6 +4,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log"
 	"strings"
 	"time"
@@ -38,6 +40,17 @@ func (p *OpenAIProvider) ChatCompletion(ctx context.Context, req *models.Unified
 	body := BuildOpenAIBody(req, messagesJSON, false)
 	// Debug message sequence
 	for i, m := range messagesJSON {
 		mMap, _ := m.(map[string]interface{})
 		role, _ := mMap["role"].(string)
 		hasToolCalls := false
 		if tc, ok := mMap["tool_calls"]; ok && tc != nil {
 			hasToolCalls = true
 		}
 		log.Printf("[DEBUG] OpenAI Msg[%d]: role=%s, hasToolCalls=%v", i, role, hasToolCalls)
 	}
 	// Transition: Newer models require max_completion_tokens
 	if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") {
 		if maxTokens, ok := body["max_tokens"]; ok {
@@ -57,7 +70,14 @@ func (p *OpenAIProvider) ChatCompletion(ctx context.Context, req *models.Unified
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		log.Printf("OpenAI API Error (%d): %s", resp.StatusCode(), msg)
 		return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var respJSON map[string]interface{}
@@ -104,7 +124,13 @@ func (p *OpenAIProvider) ImageGeneration(ctx context.Context, req *models.ImageG
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("OpenAI image API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("OpenAI image API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var result models.ImageGenerationResponse
@@ -123,6 +149,17 @@ func (p *OpenAIProvider) ChatCompletionStream(ctx context.Context, req *models.U
 	body := BuildOpenAIBody(req, messagesJSON, true)
 	// Debug message sequence
 	for i, m := range messagesJSON {
 		mMap, _ := m.(map[string]interface{})
 		role, _ := mMap["role"].(string)
 		hasToolCalls := false
 		if tc, ok := mMap["tool_calls"]; ok && tc != nil {
 			hasToolCalls = true
 		}
 		log.Printf("[DEBUG] OpenAI Stream Msg[%d]: role=%s, hasToolCalls=%v", i, role, hasToolCalls)
 	}
 	// Transition: Newer models require max_completion_tokens
 	if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") {
 		if maxTokens, ok := body["max_tokens"]; ok {
@@ -143,7 +180,14 @@ func (p *OpenAIProvider) ChatCompletionStream(ctx context.Context, req *models.U
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		log.Printf("OpenAI API Error (%d): %s", resp.StatusCode(), msg)
 		return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ChatCompletionStreamResponse)
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"gophergate/internal/models"
 )
@@ -26,7 +27,13 @@ func (p *OpenAIProvider) Responses(ctx context.Context, req *models.ResponsesReq
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), msg)
 	}
 	var respJSON map[string]interface{}
@@ -53,7 +60,13 @@ func (p *OpenAIProvider) ResponsesStream(ctx context.Context, req *models.Respon
 	}
 	if !resp.IsSuccess() {
-		return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), resp.String())
+		msg := resp.String()
 		if msg == "" {
 			if body, err := io.ReadAll(resp.RawBody()); err == nil {
 				msg = string(body)
 			}
 		}
 		return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), msg)
 	}
 	ch := make(chan *models.ResponsesStreamChunk)
@@ -545,6 +545,9 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
 	// Resolve model groups to concrete models (hierarchical — groups can target groups)
 	modelGroup := ""
 	for i, m := range req.Messages {
 		log.Printf("[DEBUG] Incoming Msg[%d]: role=%s, hasToolCalls=%v, hasContent=%v", i, m.Role, len(m.ToolCalls) > 0, m.Content != nil)
 	}
 	if s.modelRouter != nil {
 		userMessage := extractUserMessage(req.Messages)
 		decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage)
@@ -582,26 +585,27 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
 	ToolChoice:       req.ToolChoice,
 }
-// Inject max_tokens from model registry when client doesn't specify one.
+	// Inject or cap max_tokens from model registry.
 // Prevents providers from applying a low default output cap.
 // DEBUG: Trace max_tokens through the proxy
 clientMaxTokens := "nil"
 if unifiedReq.MaxTokens != nil {
 	clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
 }
 log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
 if unifiedReq.MaxTokens == nil {
 	s.registryMu.RLock()
 	meta := s.registry.FindModel(modelID)
 	s.registryMu.RUnlock()
 	if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
 		if unifiedReq.MaxTokens == nil {
 			unifiedReq.MaxTokens = &meta.Limit.Output
 			log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
 		} else if *unifiedReq.MaxTokens > meta.Limit.Output {
 			log.Printf("[DEBUG] %s: capping client max_tokens (%d) to registry limit (%d)", modelID, *unifiedReq.MaxTokens, meta.Limit.Output)
 			unifiedReq.MaxTokens = &meta.Limit.Output
 		} else {
-		log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
+			log.Printf("[DEBUG] %s: using client max_tokens (%d)", modelID, *unifiedReq.MaxTokens)
 		}
 	} else {
-	log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
+		if unifiedReq.MaxTokens == nil {
 			log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil", modelID)
 		} else {
 			log.Printf("[DEBUG] %s: using client max_tokens (%d), no registry limit to cap", modelID, *unifiedReq.MaxTokens)
 		}
 	}
 	// Handle Stop sequences