diff --git a/internal/server/server.go b/internal/server/server.go index ced8c25e..d695088b 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) { ToolChoice: req.ToolChoice, } + // Inject max_tokens from model registry when client doesn't specify one. + // Prevents providers from applying a low default output cap. + if unifiedReq.MaxTokens == nil { + s.registryMu.RLock() + meta := s.registry.FindModel(modelID) + s.registryMu.RUnlock() + if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 { + unifiedReq.MaxTokens = &meta.Limit.Output + } + } + // Handle Stop sequences if req.Stop != nil { var stop []string