fix: remove tool call ID truncation and improve DeepSeek reasoning handling

The 40-character truncation of tool call IDs in helper.go caused collisions when models (like deepseek-v4-flash) generated longer IDs, leading to "Duplicate value for 'tool_call_id'" errors. Removed the limit to allow full unique IDs. DeepSeek: updated reasoning_content injection to use an empty string instead of a space, better matching provider expectations for history. Improved API error reporting across all providers by capturing raw body content when response parsing fails or returns empty strings.
2026-05-11 03:12:38 +00:00
parent d2b9da89d9
commit aeffeb8c03
11 changed files with 72990 additions and 60 deletions
@@ -545,6 +545,9 @@ func (s *Server) handleChatCompletions(c *gin.Context) {

 	// Resolve model groups to concrete models (hierarchical — groups can target groups)
 	modelGroup := ""
+	for i, m := range req.Messages {
+		log.Printf("[DEBUG] Incoming Msg[%d]: role=%s, hasToolCalls=%v, hasContent=%v", i, m.Role, len(m.ToolCalls) > 0, m.Content != nil)
+	}
 	if s.modelRouter != nil {
 		userMessage := extractUserMessage(req.Messages)
 		decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage)
@@ -582,27 +585,28 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
 	ToolChoice:       req.ToolChoice,
 }

-// Inject max_tokens from model registry when client doesn't specify one.
-// Prevents providers from applying a low default output cap.
-// DEBUG: Trace max_tokens through the proxy
-clientMaxTokens := "nil"
-if unifiedReq.MaxTokens != nil {
-	clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
-}
-log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
-if unifiedReq.MaxTokens == nil {
+	// Inject or cap max_tokens from model registry.
 	s.registryMu.RLock()
 	meta := s.registry.FindModel(modelID)
 	s.registryMu.RUnlock()
+
 	if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
-		unifiedReq.MaxTokens = &meta.Limit.Output
-		log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
+		if unifiedReq.MaxTokens == nil {
+			unifiedReq.MaxTokens = &meta.Limit.Output
+			log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
+		} else if *unifiedReq.MaxTokens > meta.Limit.Output {
+			log.Printf("[DEBUG] %s: capping client max_tokens (%d) to registry limit (%d)", modelID, *unifiedReq.MaxTokens, meta.Limit.Output)
+			unifiedReq.MaxTokens = &meta.Limit.Output
+		} else {
+			log.Printf("[DEBUG] %s: using client max_tokens (%d)", modelID, *unifiedReq.MaxTokens)
+		}
 	} else {
-		log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
+		if unifiedReq.MaxTokens == nil {
+			log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil", modelID)
+		} else {
+			log.Printf("[DEBUG] %s: using client max_tokens (%d), no registry limit to cap", modelID, *unifiedReq.MaxTokens)
+		}
 	}
-} else {
-	log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
-}

 	// Handle Stop sequences
 	if req.Stop != nil {