From dbbf48cb14c562caae7a74f333672a3587b3ad00 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Tue, 7 Apr 2026 13:42:10 +0000 Subject: [PATCH] fix(ollama): increase timeout and add default max_tokens for large models - Increase Ollama timeout to 5m for larger models (e.g. gemma4) - Set default max_tokens to 4096 for common Ollama models - Expand stream scanner buffer to 10MB to prevent truncation - Improve model routing and prefix stripping in server --- internal/providers/ollama.go | 15 +++++++++++++-- internal/server/server.go | 20 +++++++++++--------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/internal/providers/ollama.go b/internal/providers/ollama.go index 3b1b414f..182c4ae2 100644 --- a/internal/providers/ollama.go +++ b/internal/providers/ollama.go @@ -21,8 +21,8 @@ type OllamaProvider struct { func NewOllamaProvider(cfg config.OllamaConfig) *OllamaProvider { client := resty.New() - // Set reasonable timeouts for local Ollama server - client.SetTimeout(30 * time.Second) + // Set reasonable timeouts for local Ollama server (longer for larger models) + client.SetTimeout(5 * time.Minute) client.SetRetryCount(2) client.SetRetryWaitTime(1 * time.Second) @@ -118,9 +118,15 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{}, if request.Temperature != nil { body["temperature"] = *request.Temperature } + if request.MaxTokens != nil { body["max_tokens"] = *request.MaxTokens + } else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") { + // Default to 4096 for common Ollama models if not specified, + // as Ollama's compatibility layer sometimes defaults to 128 + body["max_tokens"] = 4096 } + if request.TopP != nil { body["top_p"] = *request.TopP } @@ -196,6 +202,11 @@ func ParseOllamaStreamChunk(line string) (*models.ChatCompletionStreamResponse, func StreamOllama(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error { defer ctx.Close() scanner := bufio.NewScanner(ctx) + // Set a larger buffer for scanning to handle large chunks if they occur + const maxCapacity = 10 * 1024 * 1024 // 10MB + buf := make([]byte, 64*1024) + scanner.Buffer(buf, maxCapacity) + for scanner.Scan() { line := scanner.Text() chunk, done, err := ParseOllamaStreamChunk(line) diff --git a/internal/server/server.go b/internal/server/server.go index dccb3af3..4bec9944 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -310,15 +310,15 @@ func (s *Server) handleChatCompletions(c *gin.Context) { // Select provider based on model name providerName := "openai" // default - if strings.Contains(req.Model, "gemini") { + if strings.HasPrefix(req.Model, "gemini/") || strings.Contains(req.Model, "gemini") || strings.HasPrefix(req.Model, "google/") { providerName = "gemini" - } else if strings.Contains(req.Model, "deepseek") { + } else if strings.HasPrefix(req.Model, "deepseek/") || strings.Contains(req.Model, "deepseek") { providerName = "deepseek" - } else if strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") { + } else if strings.HasPrefix(req.Model, "moonshot/") || strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") { providerName = "moonshot" - } else if strings.Contains(req.Model, "grok") { + } else if strings.HasPrefix(req.Model, "grok/") || strings.Contains(req.Model, "grok") { providerName = "grok" - } else if strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") { + } else if strings.HasPrefix(req.Model, "ollama/") || strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") { providerName = "ollama" } @@ -330,10 +330,12 @@ func (s *Server) handleChatCompletions(c *gin.Context) { // Strip common prefixes modelID := req.Model - if strings.HasPrefix(modelID, "gemini/") { - modelID = strings.TrimPrefix(modelID, "gemini/") - } else if strings.HasPrefix(modelID, "google/") { - modelID = strings.TrimPrefix(modelID, "google/") + prefixes := []string{"gemini/", "google/", "openai/", "deepseek/", "moonshot/", "grok/", "ollama/"} + for _, p := range prefixes { + if strings.HasPrefix(modelID, p) { + modelID = strings.TrimPrefix(modelID, p) + break + } } // Convert ChatCompletionRequest to UnifiedRequest