feat: inject max_tokens from models.dev registry when not specified in request
When a client omits max_tokens, providers (DeepSeek, etc.) apply a low server-side default output cap. Now gophergate looks up the model in the models.dev registry and injects the model's output limit, preventing silent truncation.
This commit is contained in:
@@ -381,6 +381,17 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
|||||||
ToolChoice: req.ToolChoice,
|
ToolChoice: req.ToolChoice,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inject max_tokens from model registry when client doesn't specify one.
|
||||||
|
// Prevents providers from applying a low default output cap.
|
||||||
|
if unifiedReq.MaxTokens == nil {
|
||||||
|
s.registryMu.RLock()
|
||||||
|
meta := s.registry.FindModel(modelID)
|
||||||
|
s.registryMu.RUnlock()
|
||||||
|
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||||
|
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Handle Stop sequences
|
// Handle Stop sequences
|
||||||
if req.Stop != nil {
|
if req.Stop != nil {
|
||||||
var stop []string
|
var stop []string
|
||||||
|
|||||||
Reference in New Issue
Block a user