fix: improve cost tracking accuracy for modern models
- Added support for reasoning tokens in cost calculations. - Fixed DeepSeek cache-write token mapping (PromptCacheMissTokens). - Improved CalculateCost debug logging to trace all pricing variables.
This commit is contained in:
@@ -51,6 +51,9 @@ func (u *deepSeekUsage) ToUnified() *models.Usage {
|
|||||||
if u.PromptCacheHitTokens > 0 {
|
if u.PromptCacheHitTokens > 0 {
|
||||||
usage.CacheReadTokens = &u.PromptCacheHitTokens
|
usage.CacheReadTokens = &u.PromptCacheHitTokens
|
||||||
}
|
}
|
||||||
|
if u.PromptCacheMissTokens > 0 {
|
||||||
|
usage.CacheWriteTokens = &u.PromptCacheMissTokens
|
||||||
|
}
|
||||||
if u.CompletionTokensDetails != nil && u.CompletionTokensDetails.ReasoningTokens > 0 {
|
if u.CompletionTokensDetails != nil && u.CompletionTokensDetails.ReasoningTokens > 0 {
|
||||||
usage.ReasoningTokens = &u.CompletionTokensDetails.ReasoningTokens
|
usage.ReasoningTokens = &u.CompletionTokensDetails.ReasoningTokens
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -378,9 +378,9 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate cost using registry
|
// Calculate cost using registry
|
||||||
entry.Cost = utils.CalculateCost(s.registry, model, entry.PromptTokens, entry.CompletionTokens, entry.CacheReadTokens, entry.CacheWriteTokens)
|
entry.Cost = utils.CalculateCost(s.registry, model, entry.PromptTokens, entry.CompletionTokens, entry.ReasoningTokens, entry.CacheReadTokens, entry.CacheWriteTokens)
|
||||||
fmt.Printf("[DEBUG] Request logged: model=%s, prompt=%d, completion=%d, cache_read=%d, cost=%f\n",
|
fmt.Printf("[DEBUG] Request logged: model=%s, prompt=%d, completion=%d, reasoning=%d, cache_read=%d, cost=%f\n",
|
||||||
model, entry.PromptTokens, entry.CompletionTokens, entry.CacheReadTokens, entry.Cost)
|
model, entry.PromptTokens, entry.CompletionTokens, entry.ReasoningTokens, entry.CacheReadTokens, entry.Cost)
|
||||||
}
|
}
|
||||||
|
|
||||||
s.logger.LogRequest(entry)
|
s.logger.LogRequest(entry)
|
||||||
|
|||||||
@@ -34,9 +34,10 @@ func FetchRegistry() (*models.ModelRegistry, error) {
|
|||||||
return &models.ModelRegistry{Providers: providers}, nil
|
return &models.ModelRegistry{Providers: providers}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, cacheRead, cacheWrite uint32) float64 {
|
func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, reasoningTokens, cacheRead, cacheWrite uint32) float64 {
|
||||||
meta := registry.FindModel(modelID)
|
meta := registry.FindModel(modelID)
|
||||||
if meta == nil || meta.Cost == nil {
|
if meta == nil || meta.Cost == nil {
|
||||||
|
log.Printf("[DEBUG] CalculateCost: model %s not found or has no cost metadata", modelID)
|
||||||
return 0.0
|
return 0.0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,5 +62,8 @@ func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens,
|
|||||||
cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0
|
cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Printf("[DEBUG] CalculateCost: model=%s, uncached=%d, completion=%d, reasoning=%d, cache_read=%d, cache_write=%d, cost=%f (input_rate=%f, output_rate=%f)",
|
||||||
|
modelID, uncachedTokens, completionTokens, reasoningTokens, cacheRead, cacheWrite, cost, meta.Cost.Input, meta.Cost.Output)
|
||||||
|
|
||||||
return cost
|
return cost
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user