fix(ollama): increase timeout and add default max_tokens for large models
- Increase Ollama timeout to 5m for larger models (e.g. gemma4) - Set default max_tokens to 4096 for common Ollama models - Expand stream scanner buffer to 10MB to prevent truncation - Improve model routing and prefix stripping in server
This commit is contained in:
@@ -310,15 +310,15 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
||||
|
||||
// Select provider based on model name
|
||||
providerName := "openai" // default
|
||||
if strings.Contains(req.Model, "gemini") {
|
||||
if strings.HasPrefix(req.Model, "gemini/") || strings.Contains(req.Model, "gemini") || strings.HasPrefix(req.Model, "google/") {
|
||||
providerName = "gemini"
|
||||
} else if strings.Contains(req.Model, "deepseek") {
|
||||
} else if strings.HasPrefix(req.Model, "deepseek/") || strings.Contains(req.Model, "deepseek") {
|
||||
providerName = "deepseek"
|
||||
} else if strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") {
|
||||
} else if strings.HasPrefix(req.Model, "moonshot/") || strings.Contains(req.Model, "kimi") || strings.Contains(req.Model, "moonshot") {
|
||||
providerName = "moonshot"
|
||||
} else if strings.Contains(req.Model, "grok") {
|
||||
} else if strings.HasPrefix(req.Model, "grok/") || strings.Contains(req.Model, "grok") {
|
||||
providerName = "grok"
|
||||
} else if strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") {
|
||||
} else if strings.HasPrefix(req.Model, "ollama/") || strings.Contains(req.Model, "glm-") || strings.Contains(req.Model, "qwen") || strings.Contains(req.Model, "gemma") || strings.Contains(req.Model, "llama") || strings.Contains(req.Model, "mistral") || strings.Contains(req.Model, "codellama") {
|
||||
providerName = "ollama"
|
||||
}
|
||||
|
||||
@@ -330,10 +330,12 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
||||
|
||||
// Strip common prefixes
|
||||
modelID := req.Model
|
||||
if strings.HasPrefix(modelID, "gemini/") {
|
||||
modelID = strings.TrimPrefix(modelID, "gemini/")
|
||||
} else if strings.HasPrefix(modelID, "google/") {
|
||||
modelID = strings.TrimPrefix(modelID, "google/")
|
||||
prefixes := []string{"gemini/", "google/", "openai/", "deepseek/", "moonshot/", "grok/", "ollama/"}
|
||||
for _, p := range prefixes {
|
||||
if strings.HasPrefix(modelID, p) {
|
||||
modelID = strings.TrimPrefix(modelID, p)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Convert ChatCompletionRequest to UnifiedRequest
|
||||
|
||||
Reference in New Issue
Block a user