diff --git a/internal/providers/deepseek.go b/internal/providers/deepseek.go index 098a5cbe..9cbb86e8 100644 --- a/internal/providers/deepseek.go +++ b/internal/providers/deepseek.go @@ -72,19 +72,26 @@ func (p *DeepSeekProvider) ChatCompletion(ctx context.Context, req *models.Unifi body := BuildOpenAIBody(req, messagesJSON, false) - // Sanitize for deepseek-reasoner - if req.Model == "deepseek-reasoner" { - delete(body, "temperature") - delete(body, "top_p") - delete(body, "presence_penalty") - delete(body, "frequency_penalty") + // Sanitize for models that support reasoning/thinking mode + isReasoner := strings.Contains(req.Model, "reasoner") || strings.Contains(req.Model, "v4") || strings.Contains(req.Model, "r1") + + if isReasoner { + // deepseek-reasoner (R1) does not support these parameters + if req.Model == "deepseek-reasoner" || strings.HasPrefix(req.Model, "deepseek-r1") { + delete(body, "temperature") + delete(body, "top_p") + delete(body, "presence_penalty") + delete(body, "frequency_penalty") + } if msgs, ok := body["messages"].([]interface{}); ok { for _, m := range msgs { if msg, ok := m.(map[string]interface{}); ok { if msg["role"] == "assistant" { + // DeepSeek requires reasoning_content to be passed back in history + // if the model is in thinking mode. if msg["reasoning_content"] == nil { - msg["reasoning_content"] = " " + msg["reasoning_content"] = "" } if msg["content"] == nil || msg["content"] == "" { msg["content"] = "" @@ -106,7 +113,15 @@ func (p *DeepSeekProvider) ChatCompletion(ctx context.Context, req *models.Unifi } if !resp.IsSuccess() { - return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), resp.String()) + var msg string + if resp.RawBody() != nil { + bodyBytes, _ := io.ReadAll(resp.RawBody()) + msg = string(bodyBytes) + } + if msg == "" { + msg = resp.String() + } + return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), msg) } var respJSON map[string]interface{} @@ -141,19 +156,26 @@ func (p *DeepSeekProvider) ChatCompletionStream(ctx context.Context, req *models body := BuildOpenAIBody(req, messagesJSON, true) - // Sanitize for deepseek-reasoner - if req.Model == "deepseek-reasoner" { - delete(body, "temperature") - delete(body, "top_p") - delete(body, "presence_penalty") - delete(body, "frequency_penalty") + // Sanitize for models that support reasoning/thinking mode + isReasoner := strings.Contains(req.Model, "reasoner") || strings.Contains(req.Model, "v4") || strings.Contains(req.Model, "r1") + + if isReasoner { + // deepseek-reasoner (R1) does not support these parameters + if req.Model == "deepseek-reasoner" || strings.HasPrefix(req.Model, "deepseek-r1") { + delete(body, "temperature") + delete(body, "top_p") + delete(body, "presence_penalty") + delete(body, "frequency_penalty") + } if msgs, ok := body["messages"].([]interface{}); ok { for _, m := range msgs { if msg, ok := m.(map[string]interface{}); ok { if msg["role"] == "assistant" { + // DeepSeek requires reasoning_content to be passed back in history + // if the model is in thinking mode. if msg["reasoning_content"] == nil { - msg["reasoning_content"] = " " + msg["reasoning_content"] = "" } if msg["content"] == nil || msg["content"] == "" { msg["content"] = "" @@ -176,7 +198,15 @@ func (p *DeepSeekProvider) ChatCompletionStream(ctx context.Context, req *models } if !resp.IsSuccess() { - return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), resp.String()) + var msg string + if resp.RawBody() != nil { + bodyBytes, _ := io.ReadAll(resp.RawBody()) + msg = string(bodyBytes) + } + if msg == "" { + msg = resp.String() + } + return nil, fmt.Errorf("DeepSeek API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ChatCompletionStreamResponse) diff --git a/internal/providers/gemini.go b/internal/providers/gemini.go index 157f00dd..a2f9ab5b 100644 --- a/internal/providers/gemini.go +++ b/internal/providers/gemini.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "strings" "time" @@ -125,7 +126,13 @@ func (p *GeminiProvider) ImageGeneration(ctx context.Context, req *models.ImageG } if !resp.IsSuccess() { - return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), msg) } // Parse Imagen response @@ -363,11 +370,17 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified } if !resp.IsSuccess() { - fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), msg) // Also log the request body for debugging (careful with API keys if logged elsewhere) reqJSON, _ := json.Marshal(body) fmt.Printf("[Gemini] Request Body: %s\n", string(reqJSON)) - return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String()) + return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), msg) } // Parse Gemini response and convert to OpenAI format @@ -599,7 +612,13 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U } if !resp.IsSuccess() { - return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ChatCompletionStreamResponse) diff --git a/internal/providers/grok.go b/internal/providers/grok.go index b3a9460f..12933a22 100644 --- a/internal/providers/grok.go +++ b/internal/providers/grok.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "time" "github.com/go-resty/resty/v2" @@ -48,7 +49,13 @@ func (p *GrokProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRe } if !resp.IsSuccess() { - return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), msg) } var respJSON map[string]interface{} @@ -79,7 +86,13 @@ func (p *GrokProvider) ChatCompletionStream(ctx context.Context, req *models.Uni } if !resp.IsSuccess() { - return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Grok API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ChatCompletionStreamResponse) diff --git a/internal/providers/helpers.go b/internal/providers/helpers.go index ad8d6c54..d209347e 100644 --- a/internal/providers/helpers.go +++ b/internal/providers/helpers.go @@ -14,7 +14,12 @@ import ( func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, error) { var result []interface{} for _, m := range messages { - if m.Role == "tool" { + role := strings.ToLower(m.Role) + if role == "model" { + role = "assistant" + } + + if role == "tool" || role == "function" { text := "" if len(m.Content) > 0 { text = m.Content[0].Text @@ -23,13 +28,12 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro "role": "tool", "content": text, } + id := "unknown" if m.ToolCallID != nil { - id := *m.ToolCallID - if len(id) > 40 { - id = id[:40] - } - msg["tool_call_id"] = id + id = *m.ToolCallID } + msg["tool_call_id"] = id + if m.Name != nil { msg["name"] = *m.Name } @@ -59,7 +63,9 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro } var finalContent interface{} - if len(parts) == 1 { + if len(parts) == 0 { + finalContent = nil + } else if len(parts) == 1 { if p, ok := parts[0].(map[string]interface{}); ok && p["type"] == "text" { finalContent = p["text"] } else { @@ -70,7 +76,7 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro } msg := map[string]interface{}{ - "role": m.Role, + "role": role, "content": finalContent, } @@ -82,20 +88,17 @@ func MessagesToOpenAIJSON(messages []models.UnifiedMessage) ([]interface{}, erro sanitizedCalls := make([]models.ToolCall, len(m.ToolCalls)) copy(sanitizedCalls, m.ToolCalls) for i := range sanitizedCalls { - if len(sanitizedCalls[i].ID) > 40 { - sanitizedCalls[i].ID = sanitizedCalls[i].ID[:40] + if sanitizedCalls[i].Type == "" { + sanitizedCalls[i].Type = "function" } } msg["tool_calls"] = sanitizedCalls - if len(parts) == 0 { - msg["content"] = "" - } + msg["content"] = "" // OpenAI requirement: content must be string if tool_calls present } if m.Name != nil { msg["name"] = *m.Name } - result = append(result, msg) } return result, nil diff --git a/internal/providers/moonshot.go b/internal/providers/moonshot.go index cf36a03c..66e90953 100644 --- a/internal/providers/moonshot.go +++ b/internal/providers/moonshot.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "strings" "time" @@ -59,7 +60,13 @@ func (p *MoonshotProvider) ChatCompletion(ctx context.Context, req *models.Unifi } if !resp.IsSuccess() { - return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), msg) } var respJSON map[string]interface{} @@ -100,7 +107,13 @@ func (p *MoonshotProvider) ChatCompletionStream(ctx context.Context, req *models } if !resp.IsSuccess() { - return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Moonshot API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ChatCompletionStreamResponse) diff --git a/internal/providers/ollama.go b/internal/providers/ollama.go index 641fb210..5151171b 100644 --- a/internal/providers/ollama.go +++ b/internal/providers/ollama.go @@ -56,7 +56,13 @@ func (p *OllamaProvider) ChatCompletion(ctx context.Context, req *models.Unified } if !resp.IsSuccess() { - return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), msg) } var respJSON map[string]interface{} @@ -86,7 +92,13 @@ func (p *OllamaProvider) ChatCompletionStream(ctx context.Context, req *models.U } if !resp.IsSuccess() { - return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("Ollama API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ChatCompletionStreamResponse) diff --git a/internal/providers/openai.go b/internal/providers/openai.go index 867b6917..aa51cb4c 100644 --- a/internal/providers/openai.go +++ b/internal/providers/openai.go @@ -4,6 +4,8 @@ import ( "context" "encoding/json" "fmt" + "io" + "log" "strings" "time" @@ -38,6 +40,17 @@ func (p *OpenAIProvider) ChatCompletion(ctx context.Context, req *models.Unified body := BuildOpenAIBody(req, messagesJSON, false) + // Debug message sequence + for i, m := range messagesJSON { + mMap, _ := m.(map[string]interface{}) + role, _ := mMap["role"].(string) + hasToolCalls := false + if tc, ok := mMap["tool_calls"]; ok && tc != nil { + hasToolCalls = true + } + log.Printf("[DEBUG] OpenAI Msg[%d]: role=%s, hasToolCalls=%v", i, role, hasToolCalls) + } + // Transition: Newer models require max_completion_tokens if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") { if maxTokens, ok := body["max_tokens"]; ok { @@ -57,7 +70,14 @@ func (p *OpenAIProvider) ChatCompletion(ctx context.Context, req *models.Unified } if !resp.IsSuccess() { - return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + log.Printf("OpenAI API Error (%d): %s", resp.StatusCode(), msg) + return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), msg) } var respJSON map[string]interface{} @@ -104,7 +124,13 @@ func (p *OpenAIProvider) ImageGeneration(ctx context.Context, req *models.ImageG } if !resp.IsSuccess() { - return nil, fmt.Errorf("OpenAI image API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("OpenAI image API error (%d): %s", resp.StatusCode(), msg) } var result models.ImageGenerationResponse @@ -123,6 +149,17 @@ func (p *OpenAIProvider) ChatCompletionStream(ctx context.Context, req *models.U body := BuildOpenAIBody(req, messagesJSON, true) + // Debug message sequence + for i, m := range messagesJSON { + mMap, _ := m.(map[string]interface{}) + role, _ := mMap["role"].(string) + hasToolCalls := false + if tc, ok := mMap["tool_calls"]; ok && tc != nil { + hasToolCalls = true + } + log.Printf("[DEBUG] OpenAI Stream Msg[%d]: role=%s, hasToolCalls=%v", i, role, hasToolCalls) + } + // Transition: Newer models require max_completion_tokens if strings.HasPrefix(req.Model, "o1-") || strings.HasPrefix(req.Model, "o3-") || strings.Contains(req.Model, "gpt-5") { if maxTokens, ok := body["max_tokens"]; ok { @@ -143,7 +180,14 @@ func (p *OpenAIProvider) ChatCompletionStream(ctx context.Context, req *models.U } if !resp.IsSuccess() { - return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + log.Printf("OpenAI API Error (%d): %s", resp.StatusCode(), msg) + return nil, fmt.Errorf("OpenAI API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ChatCompletionStreamResponse) diff --git a/internal/providers/openai_responses.go b/internal/providers/openai_responses.go index f8829eb2..8ab19d0c 100644 --- a/internal/providers/openai_responses.go +++ b/internal/providers/openai_responses.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "gophergate/internal/models" ) @@ -26,7 +27,13 @@ func (p *OpenAIProvider) Responses(ctx context.Context, req *models.ResponsesReq } if !resp.IsSuccess() { - return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), msg) } var respJSON map[string]interface{} @@ -53,7 +60,13 @@ func (p *OpenAIProvider) ResponsesStream(ctx context.Context, req *models.Respon } if !resp.IsSuccess() { - return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), resp.String()) + msg := resp.String() + if msg == "" { + if body, err := io.ReadAll(resp.RawBody()); err == nil { + msg = string(body) + } + } + return nil, fmt.Errorf("OpenAI Responses API error (%d): %s", resp.StatusCode(), msg) } ch := make(chan *models.ResponsesStreamChunk) diff --git a/internal/server/server.go b/internal/server/server.go index 68bb8394..652cc0cd 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -545,6 +545,9 @@ func (s *Server) handleChatCompletions(c *gin.Context) { // Resolve model groups to concrete models (hierarchical — groups can target groups) modelGroup := "" + for i, m := range req.Messages { + log.Printf("[DEBUG] Incoming Msg[%d]: role=%s, hasToolCalls=%v, hasContent=%v", i, m.Role, len(m.ToolCalls) > 0, m.Content != nil) + } if s.modelRouter != nil { userMessage := extractUserMessage(req.Messages) decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, userMessage) @@ -582,27 +585,28 @@ func (s *Server) handleChatCompletions(c *gin.Context) { ToolChoice: req.ToolChoice, } -// Inject max_tokens from model registry when client doesn't specify one. -// Prevents providers from applying a low default output cap. -// DEBUG: Trace max_tokens through the proxy -clientMaxTokens := "nil" -if unifiedReq.MaxTokens != nil { - clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens) -} -log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens) -if unifiedReq.MaxTokens == nil { + // Inject or cap max_tokens from model registry. s.registryMu.RLock() meta := s.registry.FindModel(modelID) s.registryMu.RUnlock() + if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 { - unifiedReq.MaxTokens = &meta.Limit.Output - log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output) + if unifiedReq.MaxTokens == nil { + unifiedReq.MaxTokens = &meta.Limit.Output + log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output) + } else if *unifiedReq.MaxTokens > meta.Limit.Output { + log.Printf("[DEBUG] %s: capping client max_tokens (%d) to registry limit (%d)", modelID, *unifiedReq.MaxTokens, meta.Limit.Output) + unifiedReq.MaxTokens = &meta.Limit.Output + } else { + log.Printf("[DEBUG] %s: using client max_tokens (%d)", modelID, *unifiedReq.MaxTokens) + } } else { - log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID) + if unifiedReq.MaxTokens == nil { + log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil", modelID) + } else { + log.Printf("[DEBUG] %s: using client max_tokens (%d), no registry limit to cap", modelID, *unifiedReq.MaxTokens) + } } -} else { - log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens) -} // Handle Stop sequences if req.Stop != nil {