fix: remove tool call ID truncation and improve DeepSeek reasoning handling
The 40-character truncation of tool call IDs in helper.go caused collisions when models (like deepseek-v4-flash) generated longer IDs, leading to "Duplicate value for 'tool_call_id'" errors. Removed the limit to allow full unique IDs. DeepSeek: updated reasoning_content injection to use an empty string instead of a space, better matching provider expectations for history. Improved API error reporting across all providers by capturing raw body content when response parsing fails or returns empty strings.
This commit is contained in:
+19
-15
@@ -545,6 +545,9 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
||||
|
||||
// Resolve model groups to concrete models (hierarchical — groups can target groups)
|
||||
modelGroup := ""
|
||||
for i, m := range req.Messages {
|
||||
log.Printf("[DEBUG] Incoming Msg[%d]: role=%s, hasToolCalls=%v, hasContent=%v", i, m.Role, len(m.ToolCalls) > 0, m.Content != nil)
|
||||
}
|
||||
if s.modelRouter != nil {
|
||||
userMessage := extractUserMessage(req.Messages)
|
||||
decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage)
|
||||
@@ -582,27 +585,28 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
||||
ToolChoice: req.ToolChoice,
|
||||
}
|
||||
|
||||
// Inject max_tokens from model registry when client doesn't specify one.
|
||||
// Prevents providers from applying a low default output cap.
|
||||
// DEBUG: Trace max_tokens through the proxy
|
||||
clientMaxTokens := "nil"
|
||||
if unifiedReq.MaxTokens != nil {
|
||||
clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
|
||||
}
|
||||
log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
|
||||
if unifiedReq.MaxTokens == nil {
|
||||
// Inject or cap max_tokens from model registry.
|
||||
s.registryMu.RLock()
|
||||
meta := s.registry.FindModel(modelID)
|
||||
s.registryMu.RUnlock()
|
||||
|
||||
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
|
||||
if unifiedReq.MaxTokens == nil {
|
||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
|
||||
} else if *unifiedReq.MaxTokens > meta.Limit.Output {
|
||||
log.Printf("[DEBUG] %s: capping client max_tokens (%d) to registry limit (%d)", modelID, *unifiedReq.MaxTokens, meta.Limit.Output)
|
||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||
} else {
|
||||
log.Printf("[DEBUG] %s: using client max_tokens (%d)", modelID, *unifiedReq.MaxTokens)
|
||||
}
|
||||
} else {
|
||||
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
|
||||
if unifiedReq.MaxTokens == nil {
|
||||
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil", modelID)
|
||||
} else {
|
||||
log.Printf("[DEBUG] %s: using client max_tokens (%d), no registry limit to cap", modelID, *unifiedReq.MaxTokens)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
|
||||
}
|
||||
|
||||
// Handle Stop sequences
|
||||
if req.Stop != nil {
|
||||
|
||||
Reference in New Issue
Block a user