From 4095c68822167225a002be0b334ad408d6b128ad Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Tue, 7 Apr 2026 14:05:21 +0000 Subject: [PATCH] fix(ollama): improve model detection and ensure robust token/context limits - Use case-insensitive matching for model names and routing - Default max_tokens/num_predict to 8192 for all Ollama models to prevent truncation - Increase default context window and add more large-context model families - Ensure DeepSeek routing handles Ollama-hosted variants correctly --- internal/providers/ollama.go | 22 ++++++++++++++++------ internal/server/server.go | 21 ++++++++++++++++----- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/internal/providers/ollama.go b/internal/providers/ollama.go index 589aa4a8..2d9985e8 100644 --- a/internal/providers/ollama.go +++ b/internal/providers/ollama.go @@ -117,11 +117,20 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{}, } options := make(map[string]interface{}) + modelLower := strings.ToLower(request.Model) - // Context window size (32k for modern models to avoid truncation) - if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") { - options["num_ctx"] = 32768 + // Context window size (default 8k for all, 32k+ for modern large-context models) + ctxSize := 8192 + if strings.Contains(modelLower, "llama3") || + strings.Contains(modelLower, "mistral") || + strings.Contains(modelLower, "mixtral") || + strings.Contains(modelLower, "qwen") || + strings.Contains(modelLower, "deepseek") || + strings.Contains(modelLower, "command-r") || + strings.Contains(modelLower, "phi3") { + ctxSize = 32768 } + options["num_ctx"] = ctxSize if request.Temperature != nil { body["temperature"] = *request.Temperature @@ -131,9 +140,10 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{}, if request.MaxTokens != nil { body["max_tokens"] = *request.MaxTokens options["num_predict"] = *request.MaxTokens - } else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") { - // Default to 8192 for common Ollama models if not specified, - // as Ollama's compatibility layer sometimes defaults to 128 + } else { + // Default to 8192 for all Ollama models if not specified, + // as Ollama's compatibility layer defaults to 128 if neither + // max_tokens nor num_predict are provided. body["max_tokens"] = 8192 options["num_predict"] = 8192 } diff --git a/internal/server/server.go b/internal/server/server.go index 4bec9944..9d429c22 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -310,15 +310,26 @@ func (s *Server) handleChatCompletions(c *gin.Context) { // Select provider based on model name providerName := "openai" // default - if strings.HasPrefix(req.Model, "gemini/") || strings.Contains(req.Model, "gemini") || strings.HasPrefix(req.Model, "google/") { + modelLower := strings.ToLower(req.Model) + if strings.HasPrefix(modelLower, "gemini/") || strings.Contains(modelLower, "gemini") || strings.HasPrefix(modelLower, "google/") { providerName = "gemini" - } else if strings.HasPrefix(req.Model, "deepseek/") || strings.Contains(req.Model, "deepseek") { + } else if strings.HasPrefix(modelLower, "deepseek/") || (strings.Contains(modelLower, "deepseek") && !strings.Contains(modelLower, "ollama")) { + // Only use deepseek provider if it's not explicitly tagged for ollama providerName = "deepseek" - } else if strings.HasPrefix(req.Model, "moonshot/") || strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") { + } else if strings.HasPrefix(modelLower, "moonshot/") || strings.Contains(modelLower, "kimi") || strings.Contains(modelLower, "moonshot") { providerName = "moonshot" - } else if strings.HasPrefix(req.Model, "grok/") || strings.Contains(req.Model, "grok") { + } else if strings.HasPrefix(modelLower, "grok/") || strings.Contains(modelLower, "grok") { providerName = "grok" - } else if strings.HasPrefix(req.Model, "ollama/") || strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") { + } else if strings.HasPrefix(modelLower, "ollama/") || + strings.Contains(modelLower, "glm-") || + strings.Contains(modelLower, "qwen") || + strings.Contains(modelLower, "gemma") || + strings.Contains(modelLower, "llama") || + strings.Contains(modelLower, "mistral") || + strings.Contains(modelLower, "phi") || + strings.Contains(modelLower, "yi") || + strings.Contains(modelLower, "codellama") || + strings.Contains(modelLower, "command-r") { providerName = "ollama" }