fix: correct deepseek pricing, gemini streaming tokens, and group-name logging
- Add promo discount system for deepseek-v4-pro (75% off until 2026-05-31) - Rewrite StreamGemini to handle both SSE and JSON array response formats, fixing 0-token logging for gemini-3-flash and gemini-3-flash-preview - Fall back to model group name for cost lookup when concrete model isnt in the registry (fixes $0 cost on deepseek-auto entries) - Move registry lock before FindModel call to fix data race
This commit is contained in:
+39
-21
@@ -309,9 +309,32 @@ func (s *Server) handleResponses(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Select provider based on model name
|
||||
// Strip common prefixes and resolve model groups to concrete models
|
||||
// (same pattern as handleChatCompletions).
|
||||
modelGroup := ""
|
||||
modelID := req.Model
|
||||
prefixes := []string{"gemini/", "google/", "openai/", "deepseek/", "moonshot/", "grok/", "ollama/"}
|
||||
for _, p := range prefixes {
|
||||
if strings.HasPrefix(modelID, p) {
|
||||
modelID = strings.TrimPrefix(modelID, p)
|
||||
break
|
||||
}
|
||||
}
|
||||
if s.modelRouter != nil {
|
||||
decision, err := s.modelRouter.RouteToConcrete(c.Request.Context(), modelID, "")
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("model routing failed: %v", err)})
|
||||
return
|
||||
}
|
||||
if decision.SelectedModel != modelID {
|
||||
modelGroup = modelID
|
||||
}
|
||||
modelID = decision.SelectedModel
|
||||
}
|
||||
|
||||
// Select provider based on resolved model name
|
||||
providerName := "openai" // default for Responses API
|
||||
modelLower := strings.ToLower(req.Model)
|
||||
modelLower := strings.ToLower(modelID)
|
||||
if strings.HasPrefix(modelLower, "gemini/") || strings.Contains(modelLower, "gemini") || strings.HasPrefix(modelLower, "google/") {
|
||||
providerName = "gemini"
|
||||
} else if strings.HasPrefix(modelLower, "deepseek/") || (strings.Contains(modelLower, "deepseek") && !strings.Contains(modelLower, "ollama")) {
|
||||
@@ -339,17 +362,7 @@ func (s *Server) handleResponses(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Strip common prefixes from model name
|
||||
modelID := req.Model
|
||||
prefixes := []string{"gemini/", "google/", "openai/", "deepseek/", "moonshot/", "grok/", "ollama/"}
|
||||
for _, p := range prefixes {
|
||||
if strings.HasPrefix(modelID, p) {
|
||||
modelID = strings.TrimPrefix(modelID, p)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Use the stripped model name for the actual API call
|
||||
// Use resolved model for the actual API call
|
||||
req.Model = modelID
|
||||
|
||||
clientID := "default"
|
||||
@@ -364,7 +377,7 @@ func (s *Server) handleResponses(c *gin.Context) {
|
||||
if stream {
|
||||
ch, err := provider.ResponsesStream(c.Request.Context(), &req)
|
||||
if err != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, "", nil, err, false)
|
||||
s.logRequest(startTime, clientID, providerName, modelID, modelGroup, nil, err, false)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
@@ -379,9 +392,9 @@ func (s *Server) handleResponses(c *gin.Context) {
|
||||
if !ok {
|
||||
fmt.Fprintf(w, "data: [DONE]\n\n")
|
||||
if lastUsage != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, "", lastUsage.ToUsage(), nil, false)
|
||||
s.logRequest(startTime, clientID, providerName, modelID, modelGroup, lastUsage.ToUsage(), nil, false)
|
||||
} else {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, "", nil, nil, false)
|
||||
s.logRequest(startTime, clientID, providerName, modelID, modelGroup, nil, nil, false)
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -401,15 +414,15 @@ func (s *Server) handleResponses(c *gin.Context) {
|
||||
|
||||
resp, err := provider.Responses(c.Request.Context(), &req)
|
||||
if err != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, "", nil, err, false)
|
||||
s.logRequest(startTime, clientID, providerName, modelID, modelGroup, nil, err, false)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if resp.Usage != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, "", resp.Usage.ToUsage(), nil, false)
|
||||
s.logRequest(startTime, clientID, providerName, modelID, modelGroup, resp.Usage.ToUsage(), nil, false)
|
||||
} else {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, "", nil, nil, false)
|
||||
s.logRequest(startTime, clientID, providerName, modelID, modelGroup, nil, nil, false)
|
||||
}
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
@@ -881,9 +894,14 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model, modelGro
|
||||
entry.CacheWriteTokens = *usage.CacheWriteTokens
|
||||
}
|
||||
|
||||
// Calculate cost using registry
|
||||
// Calculate cost using registry; if the resolved model is unknown,
|
||||
// fall back to the model group so group requests still get priced.
|
||||
s.registryMu.RLock()
|
||||
entry.Cost = utils.CalculateCost(s.registry, model, entry.PromptTokens, entry.CompletionTokens, entry.ReasoningTokens, entry.CacheReadTokens, entry.CacheWriteTokens)
|
||||
pricingModel := model
|
||||
if s.registry != nil && s.registry.FindModel(pricingModel) == nil && modelGroup != "" {
|
||||
pricingModel = modelGroup
|
||||
}
|
||||
entry.Cost = utils.CalculateCost(s.registry, pricingModel, entry.PromptTokens, entry.CompletionTokens, entry.ReasoningTokens, entry.CacheReadTokens, entry.CacheWriteTokens)
|
||||
s.registryMu.RUnlock()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user