feat: enhance usage and cost tracking accuracy
Some checks failed
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

Improved extraction of reasoning and cached tokens from OpenAI and DeepSeek responses (including streams). Ensured accurate cost calculation using registry metadata.
This commit is contained in:
2026-03-19 11:56:26 -04:00
parent 66a1643bca
commit 0f3c5b6eb4
2 changed files with 139 additions and 17 deletions

View File

@@ -122,6 +122,33 @@ func BuildOpenAIBody(request *models.UnifiedRequest, messagesJSON []interface{},
return body
}
type openAIUsage struct {
PromptTokens uint32 `json:"prompt_tokens"`
CompletionTokens uint32 `json:"completion_tokens"`
TotalTokens uint32 `json:"total_tokens"`
PromptTokensDetails *struct {
CachedTokens uint32 `json:"cached_tokens"`
} `json:"prompt_tokens_details"`
CompletionTokensDetails *struct {
ReasoningTokens uint32 `json:"reasoning_tokens"`
} `json:"completion_tokens_details"`
}
func (u *openAIUsage) ToUnified() *models.Usage {
usage := &models.Usage{
PromptTokens: u.PromptTokens,
CompletionTokens: u.CompletionTokens,
TotalTokens: u.TotalTokens,
}
if u.PromptTokensDetails != nil && u.PromptTokensDetails.CachedTokens > 0 {
usage.CacheReadTokens = &u.PromptTokensDetails.CachedTokens
}
if u.CompletionTokensDetails != nil && u.CompletionTokensDetails.ReasoningTokens > 0 {
usage.ReasoningTokens = &u.CompletionTokensDetails.ReasoningTokens
}
return usage
}
func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models.ChatCompletionResponse, error) {
data, err := json.Marshal(respJSON)
if err != nil {
@@ -132,6 +159,16 @@ func ParseOpenAIResponse(respJSON map[string]interface{}, model string) (*models
if err := json.Unmarshal(data, &resp); err != nil {
return nil, err
}
// Manually fix usage because ChatCompletionResponse uses the unified Usage struct
// but the provider might have returned more details.
if usageData, ok := respJSON["usage"]; ok {
var oUsage openAIUsage
usageBytes, _ := json.Marshal(usageData)
if err := json.Unmarshal(usageBytes, &oUsage); err == nil {
resp.Usage = oUsage.ToUnified()
}
}
return &resp, nil
}
@@ -156,6 +193,14 @@ func ParseOpenAIStreamChunk(line string) (*models.ChatCompletionStreamResponse,
return nil, false, fmt.Errorf("failed to unmarshal stream chunk: %w", err)
}
// Handle specialized usage in stream chunks
var rawChunk struct {
Usage *openAIUsage `json:"usage"`
}
if err := json.Unmarshal([]byte(data), &rawChunk); err == nil && rawChunk.Usage != nil {
chunk.Usage = rawChunk.Usage.ToUnified()
}
return &chunk, false, nil
}
@@ -210,24 +255,27 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
return err
}
if len(geminiChunk.Candidates) > 0 {
if len(geminiChunk.Candidates) > 0 || geminiChunk.UsageMetadata.TotalTokenCount > 0 {
content := ""
var reasoning *string
for _, p := range geminiChunk.Candidates[0].Content.Parts {
if p.Text != "" {
content += p.Text
}
if p.Thought != "" {
if reasoning == nil {
reasoning = new(string)
if len(geminiChunk.Candidates) > 0 {
for _, p := range geminiChunk.Candidates[0].Content.Parts {
if p.Text != "" {
content += p.Text
}
if p.Thought != "" {
if reasoning == nil {
reasoning = new(string)
}
*reasoning += p.Thought
}
*reasoning += p.Thought
}
}
finishReason := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
if finishReason == "stop" {
finishReason = "stop"
var finishReason *string
if len(geminiChunk.Candidates) > 0 {
fr := strings.ToLower(geminiChunk.Candidates[0].FinishReason)
finishReason = &fr
}
ch <- &models.ChatCompletionStreamResponse{
@@ -242,7 +290,7 @@ func StreamGemini(ctx io.ReadCloser, ch chan<- *models.ChatCompletionStreamRespo
Content: &content,
ReasoningContent: reasoning,
},
FinishReason: &finishReason,
FinishReason: finishReason,
},
},
Usage: &models.Usage{