feat: capture Gemini cached content tokens in cost tracking
- Add CachedContentTokenCount to UsageMetadata parsing for both streaming (helpers.go) and non-streaming (gemini.go) requests - CacheReadTokens now populated from Gemini cachedContentTokenCount - Add uint32Ptr helper for nil-safe uint32 pointer creation
This commit is contained in:
@@ -134,10 +134,10 @@ func BuildOpenAIBody(request *models.UnifiedRequest, messagesJSON []interface{},
|
||||
}
|
||||
|
||||
type openAIUsage struct {
|
||||
PromptTokens uint32 `json:"prompt_tokens"`
|
||||
CompletionTokens uint32 `json:"completion_tokens"`
|
||||
TotalTokens uint32 `json:"total_tokens"`
|
||||
PromptTokensDetails *struct {
|
||||
PromptTokens uint32 `json:"prompt_tokens"`
|
||||
CompletionTokens uint32 `json:"completion_tokens"`
|
||||
TotalTokens uint32 `json:"total_tokens"`
|
||||
PromptTokensDetails *struct {
|
||||
CachedTokens uint32 `json:"cached_tokens"`
|
||||
} `json:"prompt_tokens_details"`
|
||||
CompletionTokensDetails *struct {
|
||||
@@ -165,7 +165,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
||||
var resp models.ChatCompletionResponse
|
||||
if err := json.Unmarshal(data, &resp); err != nil {
|
||||
return nil, err
|
||||
@@ -180,7 +180,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
|
||||
resp.Usage = oUsage.ToUnified()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
@@ -236,9 +236,9 @@ func StreamOpenAI(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
||||
|
||||
func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
|
||||
defer ctx.Close()
|
||||
|
||||
|
||||
dec := json.NewDecoder(ctx)
|
||||
|
||||
|
||||
t, err := dec.Token()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -261,11 +261,11 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
||||
TotalTokenCount uint32 `json:"totalTokenCount"`
|
||||
} `json:"usageMetadata"`
|
||||
}
|
||||
|
||||
|
||||
if err := dec.Decode(&geminiChunk); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
|
||||
content := ""
|
||||
var reasoning *string
|
||||
@@ -282,7 +282,7 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var finishReason *string
|
||||
if len(geminiChunk.Candidates) > 0 {
|
||||
fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
|
||||
@@ -308,11 +308,12 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
|
||||
PromptTokens: geminiChunk.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: geminiChunk.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: geminiChunk.UsageMetadata.TotalTokenCount,
|
||||
CacheReadTokens: uint32Ptr(geminiChunk.UsageMetadata.CachedContentTokenCount),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user