From be4ec3482ab20feb0cad384cec56c7000f0aeb6e Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Tue, 7 Apr 2026 18:50:48 +0000 Subject: [PATCH] fix(gemini): group adjacent tool messages and ensure correct role sequence - Group consecutive 'tool' messages into a single Gemini content message with multiple 'functionResponse' parts - Ensure assistant tool calls are properly mapped and sent - Maintain v1beta for preview and newer models - Added debug logging for API errors --- internal/providers/gemini.go | 123 +++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 50 deletions(-) diff --git a/internal/providers/gemini.go b/internal/providers/gemini.go index d63b7ade..388aa509 100644 --- a/internal/providers/gemini.go +++ b/internal/providers/gemini.go @@ -77,44 +77,48 @@ type GeminiFunctionResponse struct { func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) { // Gemini mapping var contents []GeminiContent - for _, msg := range req.Messages { + + // Group tool messages together for Gemini + for i := 0; i < len(req.Messages); i++ { + msg := req.Messages[i] role := "user" if msg.Role == "assistant" { role = "model" } else if msg.Role == "tool" { - role = "function" // Function results use 'function' role in Gemini contents + role = "function" } var parts []GeminiPart - // Handle tool responses if msg.Role == "tool" { - text := "" - if len(msg.Content) > 0 { - text = msg.Content[0].Text - } - - // Gemini expects functionResponse to be an object - name := "unknown_function" - if msg.Name != nil { - name = *msg.Name - } - - // Try to parse text as JSON if it looks like it, Gemini expects an object - var responseObj interface{} - if err := json.Unmarshal([]byte(text), &responseObj); err != nil { - // If not valid JSON, wrap it in an object - responseObj = map[string]interface{}{"result": text} - } + // Check if we can group this with previous tool message + // Actually, it's easier to just collect all current and subsequent tool messages + for j := i; j < len(req.Messages) && req.Messages[j].Role == "tool"; j++ { + m := req.Messages[j] + text := "" + if len(m.Content) > 0 { + text = m.Content[0].Text + } + + name := "unknown_function" + if m.Name != nil { + name = *m.Name + } + + var responseObj interface{} + if err := json.Unmarshal([]byte(text), &responseObj); err != nil { + responseObj = map[string]interface{}{"result": text} + } + respBytes, _ := json.Marshal(responseObj) - respBytes, _ := json.Marshal(responseObj) - - parts = append(parts, GeminiPart{ - FunctionResponse: &GeminiFunctionResponse{ - Name: name, - Response: json.RawMessage(respBytes), - }, - }) + parts = append(parts, GeminiPart{ + FunctionResponse: &GeminiFunctionResponse{ + Name: name, + Response: json.RawMessage(respBytes), + }, + }) + i = j // Advance outer loop + } } else { for _, cp := range msg.Content { if cp.Type == "text" { @@ -208,6 +212,10 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified } if !resp.IsSuccess() { + fmt.Printf("[Gemini] API Error %d: %s\n", resp.StatusCode(), resp.String()) + // Also log the request body for debugging (careful with API keys if logged elsewhere) + reqJSON, _ := json.Marshal(body) + fmt.Printf("[Gemini] Request Body: %s\n", string(reqJSON)) return nil, fmt.Errorf("Gemini API error (%d): %s", resp.StatusCode(), resp.String()) } @@ -292,7 +300,8 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.UnifiedRequest) (<-chan *models.ChatCompletionStreamResponse, error) { // Simplified Gemini mapping var contents []GeminiContent - for _, msg := range req.Messages { + for i := 0; i < len(req.Messages); i++ { + msg := req.Messages[i] role := "user" if msg.Role == "assistant" { role = "model" @@ -303,30 +312,44 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U var parts []GeminiPart if msg.Role == "tool" { - text := "" - if len(msg.Content) > 0 { - text = msg.Content[0].Text - } - name := "unknown" - if msg.Name != nil { - name = *msg.Name - } - - var responseObj interface{} - if err := json.Unmarshal([]byte(text), &responseObj); err != nil { - responseObj = map[string]interface{}{"result": text} - } - respBytes, _ := json.Marshal(responseObj) + for j := i; j < len(req.Messages) && req.Messages[j].Role == "tool"; j++ { + m := req.Messages[j] + text := "" + if len(m.Content) > 0 { + text = m.Content[0].Text + } + name := "unknown_function" + if m.Name != nil { + name = *m.Name + } + + var responseObj interface{} + if err := json.Unmarshal([]byte(text), &responseObj); err != nil { + responseObj = map[string]interface{}{"result": text} + } + respBytes, _ := json.Marshal(responseObj) - parts = append(parts, GeminiPart{ - FunctionResponse: &GeminiFunctionResponse{ - Name: name, - Response: json.RawMessage(respBytes), - }, - }) + parts = append(parts, GeminiPart{ + FunctionResponse: &GeminiFunctionResponse{ + Name: name, + Response: json.RawMessage(respBytes), + }, + }) + i = j + } } else { for _, p := range msg.Content { - parts = append(parts, GeminiPart{Text: p.Text}) + if p.Type == "text" { + parts = append(parts, GeminiPart{Text: p.Text}) + } else if p.Image != nil { + base64Data, mimeType, _ := p.Image.ToBase64() + parts = append(parts, GeminiPart{ + InlineData: &GeminiInlineData{ + MimeType: mimeType, + Data: base64Data, + }, + }) + } } if msg.Role == "assistant" && len(msg.ToolCalls) > 0 { for _, tc := range msg.ToolCalls {