Files
GopherGate/internal/utils/registry.go
T
hobokenchicken 1f574d8134
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled
feat: Phase 2 - reliability & observability
- Circuit breaker: proper thresholds (3 failures, 30s timeout)
- HTTP timeouts: 30s on all providers (was no timeout)
- Structured logging: slog replaces fmt.Printf throughout
- Stream errors: propagated as SSE error events to client
- Registry fetch: retry with backoff (3 attempts)
- Registry reads in dashboard protected by RWMutex
2026-04-26 14:48:56 -04:00

77 lines
1.9 KiB
Go

package utils
import (
"encoding/json"
"fmt"
"log"
"time"
"github.com/go-resty/resty/v2"
"gophergate/internal/models"
)
const ModelsDevURL = "https://models.dev/api.json"
func FetchRegistry() (*models.ModelRegistry, error) {
client := resty.New().SetTimeout(10 * time.Second)
var lastErr error
for attempt := 0; attempt < 3; attempt++ {
if attempt > 0 {
backoff := time.Duration(1<<attempt) * time.Second
time.Sleep(backoff)
}
resp, err := client.R().Get(ModelsDevURL)
if err != nil {
lastErr = fmt.Errorf("attempt %d: %w", attempt+1, err)
continue
}
if !resp.IsSuccess() {
lastErr = fmt.Errorf("attempt %d: HTTP %d", attempt+1, resp.StatusCode())
continue
}
var providers map[string]models.ProviderInfo
if err := json.Unmarshal(resp.Body(), &providers); err != nil {
lastErr = fmt.Errorf("attempt %d: unmarshal: %w", attempt+1, err)
continue
}
log.Println("Successfully loaded model registry")
return &models.ModelRegistry{Providers: providers}, nil
}
return nil, fmt.Errorf("failed to fetch registry after 3 attempts: %w", lastErr)
}
func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, reasoningTokens, cacheRead, cacheWrite uint32) float64 {
meta := registry.FindModel(modelID)
if meta == nil || meta.Cost == nil {
return 0.0
}
// promptTokens is usually the TOTAL prompt size.
// We subtract cacheRead from it to get the uncached part.
uncachedTokens := promptTokens
if cacheRead > 0 {
if cacheRead > promptTokens {
uncachedTokens = 0
} else {
uncachedTokens = promptTokens - cacheRead
}
}
cost := (float64(uncachedTokens) * meta.Cost.Input / 1000000.0) +
(float64(completionTokens) * meta.Cost.Output / 1000000.0)
if meta.Cost.CacheRead != nil {
cost += float64(cacheRead) * (*meta.Cost.CacheRead) / 1000000.0
}
if meta.Cost.CacheWrite != nil {
cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0
}
return cost
}