feat: capture Gemini cached content tokens in cost tracking

- Add CachedContentTokenCount to UsageMetadata parsing for both streaming (helpers.go) and non-streaming (gemini.go) requests - CacheReadTokens now populated from Gemini cachedContentTokenCount - Add uint32Ptr helper for nil-safe uint32 pointer creation
2026-04-26 21:14:53 -04:00
parent 1c3b1c6fe9
commit 14e26a4323
3 changed files with 43 additions and 33 deletions
@@ -134,10 +134,10 @@ func BuildOpenAIBody(request *models.UnifiedRequest, messagesJSON []interface{},
 }

 type openAIUsage struct {
-	PromptTokens            uint32 `json:"prompt_tokens"`
-	CompletionTokens        uint32 `json:"completion_tokens"`
-	TotalTokens             uint32 `json:"total_tokens"`
-	PromptTokensDetails     *struct {
+	PromptTokens        uint32 `json:"prompt_tokens"`
+	CompletionTokens    uint32 `json:"completion_tokens"`
+	TotalTokens         uint32 `json:"total_tokens"`
+	PromptTokensDetails *struct {
 		CachedTokens uint32 `json:"cached_tokens"`
 	} `json:"prompt_tokens_details"`
 	CompletionTokensDetails *struct {
@@ -165,7 +165,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
 	if err != nil {
 		return nil, err
 	}
-	
+
 	var resp models.ChatCompletionResponse
 	if err := json.Unmarshal(data, &resp); err != nil {
 		return nil, err
@@ -180,7 +180,7 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
 			resp.Usage = oUsage.ToUnified()
 		}
 	}
-	
+
 	return &resp, nil
 }

@@ -236,9 +236,9 @@ func StreamOpenAI(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo

 func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamResponse, model string) error {
 	defer ctx.Close()
-	
+
 	dec := json.NewDecoder(ctx)
-	
+
 	t, err := dec.Token()
 	if err != nil {
 		return err
@@ -261,11 +261,11 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
 					TotalTokenCount      uint32 `json:"totalTokenCount"`
 				} `json:"usageMetadata"`
 			}
-			
+
 			if err := dec.Decode(&geminiChunk); err != nil {
 				return err
 			}
-			
+
 			if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
 				content := ""
 				var reasoning *string
@@ -282,7 +282,7 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
 						}
 					}
 				}
-				
+
 				var finishReason *string
 				if len(geminiChunk.Candidates) > 0 {
 					fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
@@ -308,11 +308,12 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
 						PromptTokens:     geminiChunk.UsageMetadata.PromptTokenCount,
 						CompletionTokens: geminiChunk.UsageMetadata.CandidatesTokenCount,
 						TotalTokens:      geminiChunk.UsageMetadata.TotalTokenCount,
+						CacheReadTokens:  uint32Ptr(geminiChunk.UsageMetadata.CachedContentTokenCount),
 					},
 				}
 			}
 		}
 	}
-	
+
 	return nil
 }