feat: inject max_tokens from models.dev registry when not specified in request
When a client omits max_tokens, providers (DeepSeek, etc.) apply a low server-side default output cap. Now gophergate looks up the model in the models.dev registry and injects the model's output limit, preventing silent truncation.
This commit is contained in:
@@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
||||
ToolChoice: req.ToolChoice,
|
||||
}
|
||||
|
||||
// Inject max_tokens from model registry when client doesn't specify one.
|
||||
// Prevents providers from applying a low default output cap.
|
||||
if unifiedReq.MaxTokens == nil {
|
||||
s.registryMu.RLock()
|
||||
meta := s.registry.FindModel(modelID)
|
||||
s.registryMu.RUnlock()
|
||||
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||
}
|
||||
}
|
||||
|
||||
// Handle Stop sequences
|
||||
if req.Stop != nil {
|
||||
var stop []string
|
||||
|
||||
Reference in New Issue
Block a user