feat: capture Gemini cached content tokens in cost tracking

- Add CachedContentTokenCount to UsageMetadata parsing for both streaming (helpers.go) and non-streaming (gemini.go) requests - CacheReadTokens now populated from Gemini cachedContentTokenCount - Add uint32Ptr helper for nil-safe uint32 pointer creation
2026-04-26 21:14:53 -04:00
parent 1c3b1c6fe9
commit 14e26a4323
3 changed files with 43 additions and 33 deletions
@@ -2,14 +2,14 @@ package providers

 import (
 	"context"
-	"time"
 	"encoding/json"
 	"fmt"
 	"strings"
+	"time"

+	"github.com/go-resty/resty/v2"
 	"gophergate/internal/config"
 	"gophergate/internal/models"
-	"github.com/go-resty/resty/v2"
 )

 type GeminiProvider struct {
@@ -54,10 +54,10 @@ type GeminiContent struct {
 }

 type GeminiPart struct {
-	Text             string                   `json:"text,omitempty"`
-	InlineData       *GeminiInlineData        `json:"inlineData,omitempty"`
-	FunctionCall     *GeminiFunctionCall      `json:"functionCall,omitempty"`
-	FunctionResponse *GeminiFunctionResponse  `json:"functionResponse,omitempty"`
+	Text             string                  `json:"text,omitempty"`
+	InlineData       *GeminiInlineData       `json:"inlineData,omitempty"`
+	FunctionCall     *GeminiFunctionCall     `json:"functionCall,omitempty"`
+	FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
 }

 type GeminiInlineData struct {
@@ -78,10 +78,10 @@ type GeminiFunctionResponse struct {
 func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) {
 	// Gemini mapping
 	var contents []GeminiContent
-	
+
 	for i := 0; i < len(req.Messages); i++ {
 		msg := req.Messages[i]
-		
+
 		if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
 			// 1. Add the assistant (model) message with tool calls
 			parts := []GeminiPart{}
@@ -114,7 +114,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 			foundAny := false
 			for j < len(req.Messages) && req.Messages[j].Role == "tool" {
 				m := req.Messages[j]
-				
+
 				// Try to match by ID or just take them in order if IDs are missing/mismatched
 				// Gemini is strict: you must respond to EVERY call in the previous message.
 				text := ""
@@ -125,7 +125,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 				if m.Name != nil {
 					name = *m.Name
 				}
-				
+
 				var responseObj interface{}
 				if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
 					responseObj = map[string]interface{}{"result": text}
@@ -147,7 +147,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 				i = j - 1 // Advance outer loop past the tool messages we consumed
 			} else {
 				// If no tool results found but assistant made calls, Gemini WILL error.
-				// We should probably skip the calls or provide dummy results, 
+				// We should probably skip the calls or provide dummy results,
 				// but usually this means the conversation is incomplete.
 				// For now, don't add a "function" message if none found.
 			}
@@ -165,7 +165,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 			// Skip or map to user? Skipping is safer for API stability.
 			continue
 		}
-		
+
 		var parts []GeminiPart
 		for _, cp := range msg.Content {
 			if cp.Type == "text" && cp.Text != "" {
@@ -265,9 +265,10 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 			FinishReason string `json:"finishReason"`
 		} `json:"candidates"`
 		UsageMetadata struct {
-			PromptTokenCount     uint32 `json:"promptTokenCount"`
-			CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
-			TotalTokenCount      uint32 `json:"totalTokenCount"`
+			PromptTokenCount        uint32 `json:"promptTokenCount"`
+			CandidatesTokenCount    uint32 `json:"candidatesTokenCount"`
+			TotalTokenCount         uint32 `json:"totalTokenCount"`
+			CachedContentTokenCount uint32 `json:"cachedContentTokenCount"`
 		} `json:"usageMetadata"`
 	}

@@ -307,7 +308,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 	openAIResp := &models.ChatCompletionResponse{
 		ID:      "gemini-" + req.Model,
 		Object:  "chat.completion",
-		Created: 0, 
+		Created: 0,
 		Model:   req.Model,
 		Choices: []models.ChatChoice{
 			{
@@ -324,6 +325,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
 			PromptTokens:     geminiResp.UsageMetadata.PromptTokenCount,
 			CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
 			TotalTokens:      geminiResp.UsageMetadata.TotalTokenCount,
+			CacheReadTokens:  uint32Ptr(geminiResp.UsageMetadata.CachedContentTokenCount),
 		},
 	}

@@ -335,7 +337,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
 	var contents []GeminiContent
 	for i := 0; i < len(req.Messages); i++ {
 		msg := req.Messages[i]
-		
+
 		if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
 			parts := []GeminiPart{}
 			for _, cp := range msg.Content {
@@ -366,7 +368,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
 				if m.Name != nil {
 					name = *m.Name
 				}
-				
+
 				var responseObj interface{}
 				if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
 					responseObj = map[string]interface{}{"result": text}
@@ -398,7 +400,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
 		} else if msg.Role == "tool" {
 			continue
 		}
-		
+
 		var parts []GeminiPart
 		for _, cp := range msg.Content {
 			if cp.Type == "text" && cp.Text != "" {
@@ -483,7 +485,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
 	}

 	ch := make(chan *models.ChatCompletionStreamResponse)
-	
+
 	go func() {
 		defer close(ch)
 		err := StreamGemini(resp.RawBody(), ch, req.Model)
@@ -494,3 +496,10 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U

 	return ch, nil
 }
+
+func uint32Ptr(v uint32) *uint32 {
+	if v > 0 {
+		return &v
+	}
+	return nil
+}