fix(ollama): improve tool-calling support and restore gemma/llama context limits
- Explicitly set tool_choice: auto when tools are present to aid gemma/llama models - Sync stop sequences into the options map for broader compatibility - Restore gemma/llama to the high-context (32k) optimization list
This commit is contained in:
@@ -121,13 +121,14 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{},
|
||||
|
||||
// Context window size (default 8k for all, 32k+ for modern large-context models)
|
||||
ctxSize := 8192
|
||||
if strings.Contains(modelLower, "llama3") ||
|
||||
if strings.Contains(modelLower, "llama") ||
|
||||
strings.Contains(modelLower, "gemma") ||
|
||||
strings.Contains(modelLower, "mistral") ||
|
||||
strings.Contains(modelLower, "mixtral") ||
|
||||
strings.Contains(modelLower, "qwen") ||
|
||||
strings.Contains(modelLower, "deepseek") ||
|
||||
strings.Contains(modelLower, "command-r") ||
|
||||
strings.Contains(modelLower, "phi3") {
|
||||
strings.Contains(modelLower, "phi") {
|
||||
ctxSize = 32768
|
||||
}
|
||||
options["num_ctx"] = ctxSize
|
||||
@@ -157,15 +158,21 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{},
|
||||
options["top_k"] = *request.TopK
|
||||
}
|
||||
|
||||
if len(request.Stop) > 0 {
|
||||
body["stop"] = request.Stop
|
||||
options["stop"] = request.Stop
|
||||
}
|
||||
|
||||
if len(options) > 0 {
|
||||
body["options"] = options
|
||||
}
|
||||
|
||||
if len(request.Stop) > 0 {
|
||||
body["stop"] = request.Stop
|
||||
}
|
||||
if len(request.Tools) > 0 {
|
||||
body["tools"] = request.Tools
|
||||
// Explicitly set tool_choice to auto if tools are present but choice is not specified
|
||||
if request.ToolChoice == nil {
|
||||
body["tool_choice"] = "auto"
|
||||
}
|
||||
}
|
||||
if request.ToolChoice != nil {
|
||||
var toolChoice interface{}
|
||||
|
||||
Reference in New Issue
Block a user