From d46a333249b5ea9eb19f0bb2763f89117123b06b Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Tue, 28 Apr 2026 15:36:06 -0400 Subject: [PATCH] feat: inject max_tokens from models.dev registry when not specified in request When a client omits max_tokens, providers (DeepSeek, etc.) apply a low server-side default output cap. Now gophergate looks up the model in the models.dev registry and injects the model's output limit, preventing silent truncation. --- internal/server/server.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/server/server.go b/internal/server/server.go index ced8c25e..d695088b 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) { ToolChoice: req.ToolChoice, } + // Inject max_tokens from model registry when client doesn't specify one. + // Prevents providers from applying a low default output cap. + if unifiedReq.MaxTokens == nil { + s.registryMu.RLock() + meta := s.registry.FindModel(modelID) + s.registryMu.RUnlock() + if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 { + unifiedReq.MaxTokens = &meta.Limit.Output + } + } + // Handle Stop sequences if req.Stop != nil { var stop []string