fix(ollama): map max_tokens to num_predict and increase context window
- Map MaxTokens to num_predict in options map - Set default num_ctx to 8192 for common models (gemma, llama, etc.) - This ensures Ollama doesn't cut off responses early due to default limits
This commit is contained in:
@@ -115,24 +115,41 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{},
|
|||||||
"stream": stream,
|
"stream": stream,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
options := make(map[string]interface{})
|
||||||
|
|
||||||
|
// Context window size (8192 for common models)
|
||||||
|
if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") {
|
||||||
|
options["num_ctx"] = 8192
|
||||||
|
}
|
||||||
|
|
||||||
if request.Temperature != nil {
|
if request.Temperature != nil {
|
||||||
body["temperature"] = *request.Temperature
|
body["temperature"] = *request.Temperature
|
||||||
|
options["temperature"] = *request.Temperature
|
||||||
}
|
}
|
||||||
|
|
||||||
if request.MaxTokens != nil {
|
if request.MaxTokens != nil {
|
||||||
body["max_tokens"] = *request.MaxTokens
|
body["max_tokens"] = *request.MaxTokens
|
||||||
|
options["num_predict"] = *request.MaxTokens
|
||||||
} else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") {
|
} else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") {
|
||||||
// Default to 4096 for common Ollama models if not specified,
|
// Default to 4096 for common Ollama models if not specified,
|
||||||
// as Ollama's compatibility layer sometimes defaults to 128
|
// as Ollama's compatibility layer sometimes defaults to 128
|
||||||
body["max_tokens"] = 4096
|
body["max_tokens"] = 4096
|
||||||
|
options["num_predict"] = 4096
|
||||||
}
|
}
|
||||||
|
|
||||||
if request.TopP != nil {
|
if request.TopP != nil {
|
||||||
body["top_p"] = *request.TopP
|
body["top_p"] = *request.TopP
|
||||||
|
options["top_p"] = *request.TopP
|
||||||
}
|
}
|
||||||
if request.TopK != nil {
|
if request.TopK != nil {
|
||||||
body["top_k"] = *request.TopK
|
body["top_k"] = *request.TopK
|
||||||
|
options["top_k"] = *request.TopK
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(options) > 0 {
|
||||||
|
body["options"] = options
|
||||||
|
}
|
||||||
|
|
||||||
if len(request.Stop) > 0 {
|
if len(request.Stop) > 0 {
|
||||||
body["stop"] = request.Stop
|
body["stop"] = request.Stop
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user