feat: capture Gemini cached content tokens in cost tracking
- Add CachedContentTokenCount to UsageMetadata parsing for both streaming (helpers.go) and non-streaming (gemini.go) requests - CacheReadTokens now populated from Gemini cachedContentTokenCount - Add uint32Ptr helper for nil-safe uint32 pointer creation
This commit is contained in:
@@ -2,5 +2,5 @@
|
|||||||
"files": {},
|
"files": {},
|
||||||
"turnCycles": 0,
|
"turnCycles": 0,
|
||||||
"maxCycles": 3,
|
"maxCycles": 3,
|
||||||
"lastUpdated": "2026-04-27T01:09:48.183Z"
|
"lastUpdated": "2026-04-27T01:12:44.352Z"
|
||||||
}
|
}
|
||||||
@@ -2,14 +2,14 @@ package providers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"time"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-resty/resty/v2"
|
||||||
"gophergate/internal/config"
|
"gophergate/internal/config"
|
||||||
"gophergate/internal/models"
|
"gophergate/internal/models"
|
||||||
"github.com/go-resty/resty/v2"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type GeminiProvider struct {
|
type GeminiProvider struct {
|
||||||
@@ -54,10 +54,10 @@ type GeminiContent struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type GeminiPart struct {
|
type GeminiPart struct {
|
||||||
Text string `json:"text,omitempty"`
|
Text string `json:"text,omitempty"`
|
||||||
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
|
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
|
||||||
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
|
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
|
||||||
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
|
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GeminiInlineData struct {
|
type GeminiInlineData struct {
|
||||||
@@ -78,10 +78,10 @@ type GeminiFunctionResponse struct {
|
|||||||
func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) {
|
func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) {
|
||||||
// Gemini mapping
|
// Gemini mapping
|
||||||
var contents []GeminiContent
|
var contents []GeminiContent
|
||||||
|
|
||||||
for i := 0; i < len(req.Messages); i++ {
|
for i := 0; i < len(req.Messages); i++ {
|
||||||
msg := req.Messages[i]
|
msg := req.Messages[i]
|
||||||
|
|
||||||
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
|
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
|
||||||
// 1. Add the assistant (model) message with tool calls
|
// 1. Add the assistant (model) message with tool calls
|
||||||
parts := []GeminiPart{}
|
parts := []GeminiPart{}
|
||||||
@@ -114,7 +114,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
foundAny := false
|
foundAny := false
|
||||||
for j < len(req.Messages) && req.Messages[j].Role == "tool" {
|
for j < len(req.Messages) && req.Messages[j].Role == "tool" {
|
||||||
m := req.Messages[j]
|
m := req.Messages[j]
|
||||||
|
|
||||||
// Try to match by ID or just take them in order if IDs are missing/mismatched
|
// Try to match by ID or just take them in order if IDs are missing/mismatched
|
||||||
// Gemini is strict: you must respond to EVERY call in the previous message.
|
// Gemini is strict: you must respond to EVERY call in the previous message.
|
||||||
text := ""
|
text := ""
|
||||||
@@ -125,7 +125,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
if m.Name != nil {
|
if m.Name != nil {
|
||||||
name = *m.Name
|
name = *m.Name
|
||||||
}
|
}
|
||||||
|
|
||||||
var responseObj interface{}
|
var responseObj interface{}
|
||||||
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
|
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
|
||||||
responseObj = map[string]interface{}{"result": text}
|
responseObj = map[string]interface{}{"result": text}
|
||||||
@@ -147,7 +147,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
i = j - 1 // Advance outer loop past the tool messages we consumed
|
i = j - 1 // Advance outer loop past the tool messages we consumed
|
||||||
} else {
|
} else {
|
||||||
// If no tool results found but assistant made calls, Gemini WILL error.
|
// If no tool results found but assistant made calls, Gemini WILL error.
|
||||||
// We should probably skip the calls or provide dummy results,
|
// We should probably skip the calls or provide dummy results,
|
||||||
// but usually this means the conversation is incomplete.
|
// but usually this means the conversation is incomplete.
|
||||||
// For now, don't add a "function" message if none found.
|
// For now, don't add a "function" message if none found.
|
||||||
}
|
}
|
||||||
@@ -165,7 +165,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
// Skip or map to user? Skipping is safer for API stability.
|
// Skip or map to user? Skipping is safer for API stability.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var parts []GeminiPart
|
var parts []GeminiPart
|
||||||
for _, cp := range msg.Content {
|
for _, cp := range msg.Content {
|
||||||
if cp.Type == "text" && cp.Text != "" {
|
if cp.Type == "text" && cp.Text != "" {
|
||||||
@@ -265,9 +265,10 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
FinishReason string `json:"finishReason"`
|
FinishReason string `json:"finishReason"`
|
||||||
} `json:"candidates"`
|
} `json:"candidates"`
|
||||||
UsageMetadata struct {
|
UsageMetadata struct {
|
||||||
PromptTokenCount uint32 `json:"promptTokenCount"`
|
PromptTokenCount uint32 `json:"promptTokenCount"`
|
||||||
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
|
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
|
||||||
TotalTokenCount uint32 `json:"totalTokenCount"`
|
TotalTokenCount uint32 `json:"totalTokenCount"`
|
||||||
|
CachedContentTokenCount uint32 `json:"cachedContentTokenCount"`
|
||||||
} `json:"usageMetadata"`
|
} `json:"usageMetadata"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -307,7 +308,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
openAIResp := &models.ChatCompletionResponse{
|
openAIResp := &models.ChatCompletionResponse{
|
||||||
ID: "gemini-" + req.Model,
|
ID: "gemini-" + req.Model,
|
||||||
Object: "chat.completion",
|
Object: "chat.completion",
|
||||||
Created: 0,
|
Created: 0,
|
||||||
Model: req.Model,
|
Model: req.Model,
|
||||||
Choices: []models.ChatChoice{
|
Choices: []models.ChatChoice{
|
||||||
{
|
{
|
||||||
@@ -324,6 +325,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
|||||||
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
|
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
|
||||||
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
|
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
|
||||||
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
|
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
|
||||||
|
CacheReadTokens: uint32Ptr(geminiResp.UsageMetadata.CachedContentTokenCount),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -335,7 +337,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
var contents []GeminiContent
|
var contents []GeminiContent
|
||||||
for i := 0; i < len(req.Messages); i++ {
|
for i := 0; i < len(req.Messages); i++ {
|
||||||
msg := req.Messages[i]
|
msg := req.Messages[i]
|
||||||
|
|
||||||
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
|
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
|
||||||
parts := []GeminiPart{}
|
parts := []GeminiPart{}
|
||||||
for _, cp := range msg.Content {
|
for _, cp := range msg.Content {
|
||||||
@@ -366,7 +368,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
if m.Name != nil {
|
if m.Name != nil {
|
||||||
name = *m.Name
|
name = *m.Name
|
||||||
}
|
}
|
||||||
|
|
||||||
var responseObj interface{}
|
var responseObj interface{}
|
||||||
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
|
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
|
||||||
responseObj = map[string]interface{}{"result": text}
|
responseObj = map[string]interface{}{"result": text}
|
||||||
@@ -398,7 +400,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
} else if msg.Role == "tool" {
|
} else if msg.Role == "tool" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var parts []GeminiPart
|
var parts []GeminiPart
|
||||||
for _, cp := range msg.Content {
|
for _, cp := range msg.Content {
|
||||||
if cp.Type == "text" && cp.Text != "" {
|
if cp.Type == "text" && cp.Text != "" {
|
||||||
@@ -483,7 +485,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
err := StreamGemini(resp.RawBody(), ch, req.Model)
|
err := StreamGemini(resp.RawBody(), ch, req.Model)
|
||||||
@@ -494,3 +496,10 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
|||||||
|
|
||||||
return ch, nil
|
return ch, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func uint32Ptr(v uint32) *uint32 {
|
||||||
|
if v > 0 {
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -134,10 +134,10 @@ func BuildOpenAIBody(request *models.UnifiedRequest, messagesJSON []interface{},
|
|||||||
}
|
}
|
||||||
|
|
||||||
type openAIUsage struct {
|
type openAIUsage struct {
|
||||||
PromptTokens uint32 `json:"prompt_tokens"`
|
PromptTokens uint32 `json:"prompt_tokens"`
|
||||||
CompletionTokens uint32 `json:"completion_tokens"`
|
CompletionTokens uint32 `json:"completion_tokens"`
|
||||||
TotalTokens uint32 `json:"total_tokens"`
|
TotalTokens uint32 `json:"total_tokens"`
|
||||||
PromptTokensDetails *struct {
|
PromptTokensDetails *struct {
|
||||||
CachedTokens uint32 `json:"cached_tokens"`
|
CachedTokens uint32 `json:"cached_tokens"`
|
||||||
} `json:"prompt_tokens_details"`
|
} `json:"prompt_tokens_details"`
|
||||||
CompletionTokensDetails *struct {
|
CompletionTokensDetails *struct {
|
||||||
@@ -165,7 +165,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var resp models.ChatCompletionResponse
|
var resp models.ChatCompletionResponse
|
||||||
if err := json.Unmarshal(data, &resp); err != nil {
|
if err := json.Unmarshal(data, &resp); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -180,7 +180,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
|
|||||||
resp.Usage = oUsage.ToUnified()
|
resp.Usage = oUsage.ToUnified()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &resp, nil
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,9 +236,9 @@ func StreamOpenAI(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
|||||||
|
|
||||||
func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
|
func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
|
||||||
defer ctx.Close()
|
defer ctx.Close()
|
||||||
|
|
||||||
dec := json.NewDecoder(ctx)
|
dec := json.NewDecoder(ctx)
|
||||||
|
|
||||||
t, err := dec.Token()
|
t, err := dec.Token()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -261,11 +261,11 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
|||||||
TotalTokenCount uint32 `json:"totalTokenCount"`
|
TotalTokenCount uint32 `json:"totalTokenCount"`
|
||||||
} `json:"usageMetadata"`
|
} `json:"usageMetadata"`
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := dec.Decode(&geminiChunk); err != nil {
|
if err := dec.Decode(&geminiChunk); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
|
if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
|
||||||
content := ""
|
content := ""
|
||||||
var reasoning *string
|
var reasoning *string
|
||||||
@@ -282,7 +282,7 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var finishReason *string
|
var finishReason *string
|
||||||
if len(geminiChunk.Candidates) > 0 {
|
if len(geminiChunk.Candidates) > 0 {
|
||||||
fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
|
fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
|
||||||
@@ -308,11 +308,12 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
|||||||
PromptTokens: geminiChunk.UsageMetadata.PromptTokenCount,
|
PromptTokens: geminiChunk.UsageMetadata.PromptTokenCount,
|
||||||
CompletionTokens: geminiChunk.UsageMetadata.CandidatesTokenCount,
|
CompletionTokens: geminiChunk.UsageMetadata.CandidatesTokenCount,
|
||||||
TotalTokens: geminiChunk.UsageMetadata.TotalTokenCount,
|
TotalTokens: geminiChunk.UsageMetadata.TotalTokenCount,
|
||||||
|
CacheReadTokens: uint32Ptr(geminiChunk.UsageMetadata.CachedContentTokenCount),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user