feat: inject max_tokens from models.dev registry when not specified in request

When a client omits max_tokens, providers (DeepSeek, etc.) apply a low server-side default output cap. Now gophergate looks up the model in the models.dev registry and injects the model's output limit, preventing silent truncation.
2026-04-28 15:36:06 -04:00
parent 7446f3463d
commit d46a333249
1 changed files with 11 additions and 0 deletions
@@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
 		ToolChoice:       req.ToolChoice,
 	}

+	// Inject max_tokens from model registry when client doesn't specify one.
+	// Prevents providers from applying a low default output cap.
+	if unifiedReq.MaxTokens == nil {
+		s.registryMu.RLock()
+		meta := s.registry.FindModel(modelID)
+		s.registryMu.RUnlock()
+		if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
+			unifiedReq.MaxTokens = &meta.Limit.Output
+		}
+	}
+
 	// Handle Stop sequences
 	if req.Stop != nil {
 		var stop []string