feat: capture Gemini cached content tokens in cost tracking
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

- Add CachedContentTokenCount to UsageMetadata parsing for both
  streaming (helpers.go) and non-streaming (gemini.go) requests
- CacheReadTokens now populated from Gemini cachedContentTokenCount
- Add uint32Ptr helper for nil-safe uint32 pointer creation
This commit is contained in:
2026-04-26 21:14:53 -04:00
parent 1c3b1c6fe9
commit 14e26a4323
3 changed files with 43 additions and 33 deletions
+29 -20
View File
@@ -2,14 +2,14 @@ package providers
import (
"context"
"time"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/go-resty/resty/v2"
"gophergate/internal/config"
"gophergate/internal/models"
"github.com/go-resty/resty/v2"
)
type GeminiProvider struct {
@@ -54,10 +54,10 @@ type GeminiContent struct {
}
type GeminiPart struct {
Text string `json:"text,omitempty"`
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
Text string `json:"text,omitempty"`
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
}
type GeminiInlineData struct {
@@ -78,10 +78,10 @@ type GeminiFunctionResponse struct {
func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) {
// Gemini mapping
var contents []GeminiContent
for i := 0; i < len(req.Messages); i++ {
msg := req.Messages[i]
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
// 1. Add the assistant (model) message with tool calls
parts := []GeminiPart{}
@@ -114,7 +114,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
foundAny := false
for j < len(req.Messages) && req.Messages[j].Role == "tool" {
m := req.Messages[j]
// Try to match by ID or just take them in order if IDs are missing/mismatched
// Gemini is strict: you must respond to EVERY call in the previous message.
text := ""
@@ -125,7 +125,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
if m.Name != nil {
name = *m.Name
}
var responseObj interface{}
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
responseObj = map[string]interface{}{"result": text}
@@ -147,7 +147,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
i = j - 1 // Advance outer loop past the tool messages we consumed
} else {
// If no tool results found but assistant made calls, Gemini WILL error.
// We should probably skip the calls or provide dummy results,
// We should probably skip the calls or provide dummy results,
// but usually this means the conversation is incomplete.
// For now, don't add a "function" message if none found.
}
@@ -165,7 +165,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
// Skip or map to user? Skipping is safer for API stability.
continue
}
var parts []GeminiPart
for _, cp := range msg.Content {
if cp.Type == "text" && cp.Text != "" {
@@ -265,9 +265,10 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
FinishReason string `json:"finishReason"`
} `json:"candidates"`
UsageMetadata struct {
PromptTokenCount uint32 `json:"promptTokenCount"`
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
TotalTokenCount uint32 `json:"totalTokenCount"`
PromptTokenCount uint32 `json:"promptTokenCount"`
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
TotalTokenCount uint32 `json:"totalTokenCount"`
CachedContentTokenCount uint32 `json:"cachedContentTokenCount"`
} `json:"usageMetadata"`
}
@@ -307,7 +308,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
openAIResp := &models.ChatCompletionResponse{
ID: "gemini-" + req.Model,
Object: "chat.completion",
Created: 0,
Created: 0,
Model: req.Model,
Choices: []models.ChatChoice{
{
@@ -324,6 +325,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
CacheReadTokens: uint32Ptr(geminiResp.UsageMetadata.CachedContentTokenCount),
},
}
@@ -335,7 +337,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
var contents []GeminiContent
for i := 0; i < len(req.Messages); i++ {
msg := req.Messages[i]
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
parts := []GeminiPart{}
for _, cp := range msg.Content {
@@ -366,7 +368,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
if m.Name != nil {
name = *m.Name
}
var responseObj interface{}
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
responseObj = map[string]interface{}{"result": text}
@@ -398,7 +400,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
} else if msg.Role == "tool" {
continue
}
var parts []GeminiPart
for _, cp := range msg.Content {
if cp.Type == "text" && cp.Text != "" {
@@ -483,7 +485,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
}
ch := make(chan *models.ChatCompletionStreamResponse)
go func() {
defer close(ch)
err := StreamGemini(resp.RawBody(), ch, req.Model)
@@ -494,3 +496,10 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
return ch, nil
}
func uint32Ptr(v uint32) *uint32 {
if v > 0 {
return &v
}
return nil
}
+13 -12
View File
@@ -134,10 +134,10 @@ func BuildOpenAIBody(request *models.UnifiedRequest, messagesJSON []interface{},
}
type openAIUsage struct {
PromptTokens uint32 `json:"prompt_tokens"`
CompletionTokens uint32 `json:"completion_tokens"`
TotalTokens uint32 `json:"total_tokens"`
PromptTokensDetails *struct {
PromptTokens uint32 `json:"prompt_tokens"`
CompletionTokens uint32 `json:"completion_tokens"`
TotalTokens uint32 `json:"total_tokens"`
PromptTokensDetails *struct {
CachedTokens uint32 `json:"cached_tokens"`
} `json:"prompt_tokens_details"`
CompletionTokensDetails *struct {
@@ -165,7 +165,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
if err != nil {
return nil, err
}
var resp models.ChatCompletionResponse
if err := json.Unmarshal(data, &resp); err != nil {
return nil, err
@@ -180,7 +180,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
resp.Usage = oUsage.ToUnified()
}
}
return &resp, nil
}
@@ -236,9 +236,9 @@ func StreamOpenAI(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
defer ctx.Close()
dec := json.NewDecoder(ctx)
t, err := dec.Token()
if err != nil {
return err
@@ -261,11 +261,11 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
TotalTokenCount uint32 `json:"totalTokenCount"`
} `json:"usageMetadata"`
}
if err := dec.Decode(&geminiChunk); err != nil {
return err
}
if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
content := ""
var reasoning *string
@@ -282,7 +282,7 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
}
}
}
var finishReason *string
if len(geminiChunk.Candidates) > 0 {
fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
@@ -308,11 +308,12 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
PromptTokens: geminiChunk.UsageMetadata.PromptTokenCount,
CompletionTokens: geminiChunk.UsageMetadata.CandidatesTokenCount,
TotalTokens: geminiChunk.UsageMetadata.TotalTokenCount,
CacheReadTokens: uint32Ptr(geminiChunk.UsageMetadata.CachedContentTokenCount),
},
}
}
}
}
return nil
}