GopherGate/internal/utils/registry.go

package utils

import (
	"encoding/json"
	"fmt"
	"log"
	"time"

	"github.com/go-resty/resty/v2"
	"gophergate/internal/models"
)

const ModelsDevURL = "https://models.dev/api.json"

func FetchRegistry() (*models.ModelRegistry, error) {
	client := resty.New().SetTimeout(10 * time.Second)

	var lastErr error
	for attempt := 0; attempt < 3; attempt++ {
		if attempt > 0 {
			backoff := time.Duration(1<<attempt) * time.Second
			time.Sleep(backoff)
		}

		resp, err := client.R().Get(ModelsDevURL)
		if err != nil {
			lastErr = fmt.Errorf("attempt %d: %w", attempt+1, err)
			continue
		}
		if !resp.IsSuccess() {
			lastErr = fmt.Errorf("attempt %d: HTTP %d", attempt+1, resp.StatusCode())
			continue
		}

		var providers map[string]models.ProviderInfo
		if err := json.Unmarshal(resp.Body(), &providers); err != nil {
			lastErr = fmt.Errorf("attempt %d: unmarshal: %w", attempt+1, err)
			continue
		}

		log.Println("Successfully loaded model registry")
		return &models.ModelRegistry{Providers: providers}, nil
	}

	return nil, fmt.Errorf("failed to fetch registry after 3 attempts: %w", lastErr)
}

func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, reasoningTokens, cacheRead, cacheWrite uint32) float64 {
	meta := registry.FindModel(modelID)
	if meta == nil || meta.Cost == nil {
		return 0.0
	}

	// promptTokens is usually the TOTAL prompt size.
	// We subtract cacheRead from it to get the uncached part.
	uncachedTokens := promptTokens
	if cacheRead > 0 {
		if cacheRead > promptTokens {
			uncachedTokens = 0
		} else {
			uncachedTokens = promptTokens - cacheRead
		}
	}

	cost := (float64(uncachedTokens) * meta.Cost.Input / 1000000.0) +
		(float64(completionTokens) * meta.Cost.Output / 1000000.0)

	if meta.Cost.CacheRead != nil {
		cost += float64(cacheRead) * (*meta.Cost.CacheRead) / 1000000.0
	}
	if meta.Cost.CacheWrite != nil {
		cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0
	}

	return cost
}