feat: inject max_tokens from models.dev registry when not specified in request
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

When a client omits max_tokens, providers (DeepSeek, etc.) apply
a low server-side default output cap. Now gophergate looks up the
model in the models.dev registry and injects the model's output
limit, preventing silent truncation.
This commit is contained in:
2026-04-28 15:36:06 -04:00
parent 7446f3463d
commit d46a333249
+11
View File
@@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
ToolChoice: req.ToolChoice,
}
// Inject max_tokens from model registry when client doesn't specify one.
// Prevents providers from applying a low default output cap.
if unifiedReq.MaxTokens == nil {
s.registryMu.RLock()
meta := s.registry.FindModel(modelID)
s.registryMu.RUnlock()
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
unifiedReq.MaxTokens = &meta.Limit.Output
}
}
// Handle Stop sequences
if req.Stop != nil {
var stop []string