feat: inject max_tokens from models.dev registry when not specified in request

When a client omits max_tokens, providers (DeepSeek, etc.) apply a low server-side default output cap. Now gophergate looks up the model in the models.dev registry and injects the model's output limit, preventing silent truncation.
2026-04-28 15:36:06 -04:00
parent 7446f3463d
commit d46a333249
1 changed files with 11 additions and 0 deletions
@@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
 		ToolChoice:       req.ToolChoice,
 	}
 	// Inject max_tokens from model registry when client doesn't specify one.
 	// Prevents providers from applying a low default output cap.
 	if unifiedReq.MaxTokens == nil {
 		s.registryMu.RLock()
 		meta := s.registry.FindModel(modelID)
 		s.registryMu.RUnlock()
 		if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
 			unifiedReq.MaxTokens = &meta.Limit.Output
 		}
 	}
 	// Handle Stop sequences
 	if req.Stop != nil {
 		var stop []string