fix(ollama): increase timeout and add default max_tokens for large models
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

- Increase Ollama timeout to 5m for larger models (e.g. gemma4)
- Set default max_tokens to 4096 for common Ollama models
- Expand stream scanner buffer to 10MB to prevent truncation
- Improve model routing and prefix stripping in server
This commit is contained in:
2026-04-07 13:42:10 +00:00
parent 1e13b0376b
commit dbbf48cb14
2 changed files with 24 additions and 11 deletions
+13 -2
View File
@@ -21,8 +21,8 @@ type OllamaProvider struct {
func NewOllamaProvider(cfg config.OllamaConfig) *OllamaProvider { func NewOllamaProvider(cfg config.OllamaConfig) *OllamaProvider {
client := resty.New() client := resty.New()
// Set reasonable timeouts for local Ollama server // Set reasonable timeouts for local Ollama server (longer for larger models)
client.SetTimeout(30 * time.Second) client.SetTimeout(5 * time.Minute)
client.SetRetryCount(2) client.SetRetryCount(2)
client.SetRetryWaitTime(1 * time.Second) client.SetRetryWaitTime(1 * time.Second)
@@ -118,9 +118,15 @@ func BuildOllamaBody(request *models.UnifiedRequest, messagesJSON []interface{},
if request.Temperature != nil { if request.Temperature != nil {
body["temperature"] = *request.Temperature body["temperature"] = *request.Temperature
} }
if request.MaxTokens != nil { if request.MaxTokens != nil {
body["max_tokens"] = *request.MaxTokens body["max_tokens"] = *request.MaxTokens
} else if strings.Contains(request.Model, "gemma") || strings.Contains(request.Model, "llama") || strings.Contains(request.Model, "mistral") || strings.Contains(request.Model, "qwen") {
// Default to 4096 for common Ollama models if not specified,
// as Ollama's compatibility layer sometimes defaults to 128
body["max_tokens"] = 4096
} }
if request.TopP != nil { if request.TopP != nil {
body["top_p"] = *request.TopP body["top_p"] = *request.TopP
} }
@@ -196,6 +202,11 @@ func ParseOllamaStreamChunk(line string) (*models.ChatCompletionStreamResponse,
func StreamOllama(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error { func StreamOllama(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
defer ctx.Close() defer ctx.Close()
scanner := bufio.NewScanner(ctx) scanner := bufio.NewScanner(ctx)
// Set a larger buffer for scanning to handle large chunks if they occur
const maxCapacity = 10 * 1024 * 1024 // 10MB
buf := make([]byte, 64*1024)
scanner.Buffer(buf, maxCapacity)
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
chunk, done, err := ParseOllamaStreamChunk(line) chunk, done, err := ParseOllamaStreamChunk(line)
+11 -9
View File
@@ -310,15 +310,15 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
// Select provider based on model name // Select provider based on model name
providerName := "openai" // default providerName := "openai" // default
if strings.Contains(req.Model, "gemini") { if strings.HasPrefix(req.Model, "gemini/") || strings.Contains(req.Model, "gemini") || strings.HasPrefix(req.Model, "google/") {
providerName = "gemini" providerName = "gemini"
} else if strings.Contains(req.Model, "deepseek") { } else if strings.HasPrefix(req.Model, "deepseek/") || strings.Contains(req.Model, "deepseek") {
providerName = "deepseek" providerName = "deepseek"
} else if strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") { } else if strings.HasPrefix(req.Model, "moonshot/") || strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") {
providerName = "moonshot" providerName = "moonshot"
} else if strings.Contains(req.Model, "grok") { } else if strings.HasPrefix(req.Model, "grok/") || strings.Contains(req.Model, "grok") {
providerName = "grok" providerName = "grok"
} else if strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") { } else if strings.HasPrefix(req.Model, "ollama/") || strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") {
providerName = "ollama" providerName = "ollama"
} }
@@ -330,10 +330,12 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
// Strip common prefixes // Strip common prefixes
modelID := req.Model modelID := req.Model
if strings.HasPrefix(modelID, "gemini/") { prefixes := []string{"gemini/", "google/", "openai/", "deepseek/", "moonshot/", "grok/", "ollama/"}
modelID = strings.TrimPrefix(modelID, "gemini/") for _, p := range prefixes {
} else if strings.HasPrefix(modelID, "google/") { if strings.HasPrefix(modelID, p) {
modelID = strings.TrimPrefix(modelID, "google/") modelID = strings.TrimPrefix(modelID, p)
break
}
} }
// Convert ChatCompletionRequest to UnifiedRequest // Convert ChatCompletionRequest to UnifiedRequest