fix(ollama): map max_tokens to num_predict and increase context window
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

- Map MaxTokens to num_predict in options map
- Set default num_ctx to 8192 for common models (gemma, llama, etc.)
- This ensures Ollama doesn't cut off responses early due to default limits
This commit is contained in:
2026-04-07 13:44:17 +00:00
parent dbbf48cb14
commit fdbb068a6c
+17
View File
@@ -115,24 +115,41 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{},
"stream": stream, "stream": stream,
} }
options := make(map[string]interface{})
// Context window size (8192 for common models)
if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") {
options["num_ctx"] = 8192
}
if request.Temperature != nil { if request.Temperature != nil {
body["temperature"] = *request.Temperature body["temperature"] = *request.Temperature
options["temperature"] = *request.Temperature
} }
if request.MaxTokens != nil { if request.MaxTokens != nil {
body["max_tokens"] = *request.MaxTokens body["max_tokens"] = *request.MaxTokens
options["num_predict"] = *request.MaxTokens
} else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") { } else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") {
// Default to 4096 for common Ollama models if not specified, // Default to 4096 for common Ollama models if not specified,
// as Ollama's compatibility layer sometimes defaults to 128 // as Ollama's compatibility layer sometimes defaults to 128
body["max_tokens"] = 4096 body["max_tokens"] = 4096
options["num_predict"] = 4096
} }
if request.TopP != nil { if request.TopP != nil {
body["top_p"] = *request.TopP body["top_p"] = *request.TopP
options["top_p"] = *request.TopP
} }
if request.TopK != nil { if request.TopK != nil {
body["top_k"] = *request.TopK body["top_k"] = *request.TopK
options["top_k"] = *request.TopK
} }
if len(options) > 0 {
body["options"] = options
}
if len(request.Stop) > 0 { if len(request.Stop) > 0 {
body["stop"] = request.Stop body["stop"] = request.Stop
} }