fix: remove tool call ID truncation and improve DeepSeek reasoning handling
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

The 40-character truncation of tool call IDs in helper.go caused collisions
when models (like deepseek-v4-flash) generated longer IDs, leading to
"Duplicate value for 'tool_call_id'" errors. Removed the limit to allow
full unique IDs.

DeepSeek: updated reasoning_content injection to use an empty string
instead of a space, better matching provider expectations for history.

Improved API error reporting across all providers by capturing raw body
content when response parsing fails or returns empty strings.
This commit is contained in:
2026-05-11 03:12:38 +00:00
parent d2b9da89d9
commit aeffeb8c03
11 changed files with 72990 additions and 60 deletions
+19 -15
View File
@@ -545,6 +545,9 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
// Resolve model groups to concrete models (hierarchical — groups can target groups)
modelGroup := ""
for i, m := range req.Messages {
log.Printf("[DEBUG] Incoming Msg[%d]: role=%s, hasToolCalls=%v, hasContent=%v", i, m.Role, len(m.ToolCalls) > 0, m.Content != nil)
}
if s.modelRouter != nil {
userMessage := extractUserMessage(req.Messages)
decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage)
@@ -582,27 +585,28 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
ToolChoice: req.ToolChoice,
}
// Inject max_tokens from model registry when client doesn't specify one.
// Prevents providers from applying a low default output cap.
// DEBUG: Trace max_tokens through the proxy
clientMaxTokens := "nil"
if unifiedReq.MaxTokens != nil {
clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
}
log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
if unifiedReq.MaxTokens == nil {
// Inject or cap max_tokens from model registry.
s.registryMu.RLock()
meta := s.registry.FindModel(modelID)
s.registryMu.RUnlock()
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
unifiedReq.MaxTokens = &meta.Limit.Output
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
if unifiedReq.MaxTokens == nil {
unifiedReq.MaxTokens = &meta.Limit.Output
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
} else if *unifiedReq.MaxTokens > meta.Limit.Output {
log.Printf("[DEBUG] %s: capping client max_tokens (%d) to registry limit (%d)", modelID, *unifiedReq.MaxTokens, meta.Limit.Output)
unifiedReq.MaxTokens = &meta.Limit.Output
} else {
log.Printf("[DEBUG] %s: using client max_tokens (%d)", modelID, *unifiedReq.MaxTokens)
}
} else {
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
if unifiedReq.MaxTokens == nil {
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil", modelID)
} else {
log.Printf("[DEBUG] %s: using client max_tokens (%d), no registry limit to cap", modelID, *unifiedReq.MaxTokens)
}
}
} else {
log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
}
// Handle Stop sequences
if req.Stop != nil {