feat: capture Gemini cached content tokens in cost tracking
- Add CachedContentTokenCount to UsageMetadata parsing for both streaming (helpers.go) and non-streaming (gemini.go) requests - CacheReadTokens now populated from Gemini cachedContentTokenCount - Add uint32Ptr helper for nil-safe uint32 pointer creation
This commit is contained in:
@@ -2,14 +2,14 @@ package providers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
"gophergate/internal/config"
|
||||
"gophergate/internal/models"
|
||||
"github.com/go-resty/resty/v2"
|
||||
)
|
||||
|
||||
type GeminiProvider struct {
|
||||
@@ -54,10 +54,10 @@ type GeminiContent struct {
|
||||
}
|
||||
|
||||
type GeminiPart struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
|
||||
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
|
||||
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
|
||||
Text string `json:"text,omitempty"`
|
||||
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
|
||||
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
|
||||
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
|
||||
}
|
||||
|
||||
type GeminiInlineData struct {
|
||||
@@ -78,10 +78,10 @@ type GeminiFunctionResponse struct {
|
||||
func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) {
|
||||
// Gemini mapping
|
||||
var contents []GeminiContent
|
||||
|
||||
|
||||
for i := 0; i < len(req.Messages); i++ {
|
||||
msg := req.Messages[i]
|
||||
|
||||
|
||||
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
|
||||
// 1. Add the assistant (model) message with tool calls
|
||||
parts := []GeminiPart{}
|
||||
@@ -114,7 +114,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
foundAny := false
|
||||
for j < len(req.Messages) && req.Messages[j].Role == "tool" {
|
||||
m := req.Messages[j]
|
||||
|
||||
|
||||
// Try to match by ID or just take them in order if IDs are missing/mismatched
|
||||
// Gemini is strict: you must respond to EVERY call in the previous message.
|
||||
text := ""
|
||||
@@ -125,7 +125,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
if m.Name != nil {
|
||||
name = *m.Name
|
||||
}
|
||||
|
||||
|
||||
var responseObj interface{}
|
||||
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
|
||||
responseObj = map[string]interface{}{"result": text}
|
||||
@@ -147,7 +147,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
i = j - 1 // Advance outer loop past the tool messages we consumed
|
||||
} else {
|
||||
// If no tool results found but assistant made calls, Gemini WILL error.
|
||||
// We should probably skip the calls or provide dummy results,
|
||||
// We should probably skip the calls or provide dummy results,
|
||||
// but usually this means the conversation is incomplete.
|
||||
// For now, don't add a "function" message if none found.
|
||||
}
|
||||
@@ -165,7 +165,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
// Skip or map to user? Skipping is safer for API stability.
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
var parts []GeminiPart
|
||||
for _, cp := range msg.Content {
|
||||
if cp.Type == "text" && cp.Text != "" {
|
||||
@@ -265,9 +265,10 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
FinishReason string `json:"finishReason"`
|
||||
} `json:"candidates"`
|
||||
UsageMetadata struct {
|
||||
PromptTokenCount uint32 `json:"promptTokenCount"`
|
||||
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
|
||||
TotalTokenCount uint32 `json:"totalTokenCount"`
|
||||
PromptTokenCount uint32 `json:"promptTokenCount"`
|
||||
CandidatesTokenCount uint32 `json:"candidatesTokenCount"`
|
||||
TotalTokenCount uint32 `json:"totalTokenCount"`
|
||||
CachedContentTokenCount uint32 `json:"cachedContentTokenCount"`
|
||||
} `json:"usageMetadata"`
|
||||
}
|
||||
|
||||
@@ -307,7 +308,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
openAIResp := &models.ChatCompletionResponse{
|
||||
ID: "gemini-" + req.Model,
|
||||
Object: "chat.completion",
|
||||
Created: 0,
|
||||
Created: 0,
|
||||
Model: req.Model,
|
||||
Choices: []models.ChatChoice{
|
||||
{
|
||||
@@ -324,6 +325,7 @@ func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.Unified
|
||||
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
|
||||
CacheReadTokens: uint32Ptr(geminiResp.UsageMetadata.CachedContentTokenCount),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -335,7 +337,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
||||
var contents []GeminiContent
|
||||
for i := 0; i < len(req.Messages); i++ {
|
||||
msg := req.Messages[i]
|
||||
|
||||
|
||||
if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
|
||||
parts := []GeminiPart{}
|
||||
for _, cp := range msg.Content {
|
||||
@@ -366,7 +368,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
||||
if m.Name != nil {
|
||||
name = *m.Name
|
||||
}
|
||||
|
||||
|
||||
var responseObj interface{}
|
||||
if err := json.Unmarshal([]byte(text), &responseObj); err != nil {
|
||||
responseObj = map[string]interface{}{"result": text}
|
||||
@@ -398,7 +400,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
||||
} else if msg.Role == "tool" {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
var parts []GeminiPart
|
||||
for _, cp := range msg.Content {
|
||||
if cp.Type == "text" && cp.Text != "" {
|
||||
@@ -483,7 +485,7 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
||||
}
|
||||
|
||||
ch := make(chan *models.ChatCompletionStreamResponse)
|
||||
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
err := StreamGemini(resp.RawBody(), ch, req.Model)
|
||||
@@ -494,3 +496,10 @@ func (p *GeminiProvider) ChatCompletionStream(ctx context.Context, req *models.U
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func uint32Ptr(v uint32) *uint32 {
|
||||
if v > 0 {
|
||||
return &v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user