debug: add max_tokens trace logging to chat completions handler
Logs what max_tokens the client sends, whether gophergate injects one from the registry, and the final value forwarded to the provider. Helps trace output truncation issues.
This commit is contained in:
+28
-16
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -373,24 +374,35 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
||||
TopP: req.TopP,
|
||||
TopK: req.TopK,
|
||||
N: req.N,
|
||||
MaxTokens: req.MaxTokens,
|
||||
PresencePenalty: req.PresencePenalty,
|
||||
FrequencyPenalty: req.FrequencyPenalty,
|
||||
Stream: req.Stream != nil && *req.Stream,
|
||||
Tools: req.Tools,
|
||||
ToolChoice: req.ToolChoice,
|
||||
}
|
||||
MaxTokens: req.MaxTokens,
|
||||
PresencePenalty: req.PresencePenalty,
|
||||
FrequencyPenalty: req.FrequencyPenalty,
|
||||
Stream: req.Stream != nil && *req.Stream,
|
||||
Tools: req.Tools,
|
||||
ToolChoice: req.ToolChoice,
|
||||
}
|
||||
|
||||
// Inject max_tokens from model registry when client doesn't specify one.
|
||||
// Prevents providers from applying a low default output cap.
|
||||
if unifiedReq.MaxTokens == nil {
|
||||
s.registryMu.RLock()
|
||||
meta := s.registry.FindModel(modelID)
|
||||
s.registryMu.RUnlock()
|
||||
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||
}
|
||||
// Inject max_tokens from model registry when client doesn't specify one.
|
||||
// Prevents providers from applying a low default output cap.
|
||||
// DEBUG: Trace max_tokens through the proxy
|
||||
clientMaxTokens := "nil"
|
||||
if unifiedReq.MaxTokens != nil {
|
||||
clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
|
||||
}
|
||||
log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
|
||||
if unifiedReq.MaxTokens == nil {
|
||||
s.registryMu.RLock()
|
||||
meta := s.registry.FindModel(modelID)
|
||||
s.registryMu.RUnlock()
|
||||
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
|
||||
} else {
|
||||
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
|
||||
}
|
||||
} else {
|
||||
log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
|
||||
}
|
||||
|
||||
// Handle Stop sequences
|
||||
if req.Stop != nil {
|
||||
|
||||
Reference in New Issue
Block a user