diff --git a/.env b/.env deleted file mode 100644 index bc247de3..00000000 --- a/.env +++ /dev/null @@ -1,28 +0,0 @@ -# LLM Proxy Gateway Environment Variables - -# OpenAI -OPENAI_API_KEY=sk-demo-openai-key - -# Google Gemini -GEMINI_API_KEY=AIza-demo-gemini-key - -# DeepSeek -DEEPSEEK_API_KEY=sk-demo-deepseek-key - -# xAI Grok (not yet available) -GROK_API_KEY=gk-demo-grok-key - -# Authentication tokens (comma-separated list) -LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token - -# Database path (optional) -LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db - -# Session Secret (for signed tokens) -SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0 - -# Encryption key (required) -LLM_PROXY__ENCRYPTION_KEY=69879f5b7913ba169982190526ae213e830b3f1f33e785ef2b68cf48c7853fcd - -# Server port (optional) -LLM_PROXY__SERVER__PORT=8080 diff --git a/.env.backup b/.env.backup deleted file mode 100644 index 796ebbe0..00000000 --- a/.env.backup +++ /dev/null @@ -1,22 +0,0 @@ -# LLM Proxy Gateway Environment Variables - -# OpenAI -OPENAI_API_KEY=sk-demo-openai-key - -# Google Gemini -GEMINI_API_KEY=AIza-demo-gemini-key - -# DeepSeek -DEEPSEEK_API_KEY=sk-demo-deepseek-key - -# xAI Grok (not yet available) -GROK_API_KEY=gk-demo-grok-key - -# Authentication tokens (comma-separated list) -LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token - -# Server port (optional) -LLM_PROXY__SERVER__PORT=8080 - -# Database path (optional) -LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db \ No newline at end of file diff --git a/.env.example b/.env.example index fed54e09..467dc367 100644 --- a/.env.example +++ b/.env.example @@ -1,28 +1,43 @@ -# LLM Proxy Gateway Environment Variables -# Copy to .env and fill in your API keys +# LLM Proxy Gateway Configuration Example +# Copy this file to .env and fill in your values -# MANDATORY: Encryption key for sessions and stored API keys -# Must be a 32-byte hex or base64 encoded string -# Example (hex): LLM_PROXY__ENCRYPTION_KEY=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef +# ============================================================================== +# MANDATORY: Encryption & Security +# ============================================================================== +# A 32-byte hex or base64 encoded string used for session signing and +# database encryption. +# Generate one with: openssl rand -hex 32 LLM_PROXY__ENCRYPTION_KEY=your_secure_32_byte_key_here -# LLM Provider API Keys (Standard Environment Variables) -OPENAI_API_KEY=your_openai_api_key_here -GEMINI_API_KEY=your_gemini_api_key_here -DEEPSEEK_API_KEY=your_deepseek_api_key_here -GROK_API_KEY=your_grok_api_key_here +# ============================================================================== +# LLM Provider API Keys +# ============================================================================== +OPENAI_API_KEY=sk-... +GEMINI_API_KEY=AIza... +DEEPSEEK_API_KEY=sk-... +GROK_API_KEY=xai-... +# ============================================================================== +# Server Configuration +# ============================================================================== +LLM_PROXY__SERVER__PORT=8080 +LLM_PROXY__SERVER__HOST=0.0.0.0 + +# Optional: Bearer tokens for client authentication (comma-separated) +# If not set, the proxy will look up tokens in the database. +# LLM_PROXY__SERVER__AUTH_TOKENS=token1,token2 + +# ============================================================================== +# Database Configuration +# ============================================================================== +LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db +LLM_PROXY__DATABASE__MAX_CONNECTIONS=10 + +# ============================================================================== # Provider Overrides (Optional) +# ============================================================================== # LLM_PROXY__PROVIDERS__OPENAI__BASE_URL=https://api.openai.com/v1 # LLM_PROXY__PROVIDERS__GEMINI__ENABLED=true # LLM_PROXY__PROVIDERS__OLLAMA__BASE_URL=http://localhost:11434/v1 # LLM_PROXY__PROVIDERS__OLLAMA__ENABLED=true # LLM_PROXY__PROVIDERS__OLLAMA__MODELS=llama3,mistral,llava - -# Server Configuration -LLM_PROXY__SERVER__PORT=8080 -LLM_PROXY__SERVER__HOST=0.0.0.0 - -# Database Configuration -LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db -LLM_PROXY__DATABASE__MAX_CONNECTIONS=10 diff --git a/.gitignore b/.gitignore index fdb9268f..98967c05 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,11 @@ +.env +.env.* +!.env.example /target -/.env -/*.db -/*.db-shm -/*.db-wal +/llm-proxy +/llm-proxy-go +*.db +*.db-shm +*.db-wal +*.log +server.pid diff --git a/TODO.md b/TODO.md index 5d608255..879a6b43 100644 --- a/TODO.md +++ b/TODO.md @@ -7,12 +7,15 @@ - [x] Auth Middleware - [x] Basic Provider implementations (OpenAI, Gemini, DeepSeek, Grok) - [x] Streaming Support (SSE & Gemini custom streaming) -- [x] Move Rust files to `rust_backup` +- [x] Archive Rust files to `rust` branch +- [x] Clean root and set Go version as `main` - [x] Enhanced `helpers.go` for Multimodal & Tool Calling (OpenAI compatible) - [x] Enhanced `server.go` for robust request conversion - [x] Dashboard Management APIs (Clients, Tokens, Users, Providers) - [x] Dashboard Analytics & Usage Summary - [x] WebSocket for real-time dashboard updates +- [x] Asynchronous Request Logging to SQLite +- [x] Update documentation (README, deployment, architecture) ## Feature Parity Checklist (High Priority) @@ -38,10 +41,9 @@ - [x] Multimodal support ## Infrastructure & Middleware -- [ ] Implement Request Logging to SQLite (asynchronous) - [ ] Implement Rate Limiting (`golang.org/x/time/rate`) - [ ] Implement Circuit Breaker (`github.com/sony/gobreaker`) -- [ ] Implement Model Cost Calculation logic +- [ ] Implement Model Cost Calculation logic (needs registry/pricing integration) ## Verification - [ ] Unit tests for feature-specific mapping (CoT, Tools, Images) diff --git a/internal/models/registry.go b/internal/models/registry.go new file mode 100644 index 00000000..6cd2b313 --- /dev/null +++ b/internal/models/registry.go @@ -0,0 +1,58 @@ +package models + +type ModelRegistry struct { + Providers map[string]ProviderInfo `json:"-"` +} + +type ProviderInfo struct { + ID string `json:"id"` + Name string `json:"name"` + Models map[string]ModelMetadata `json:"models"` +} + +type ModelMetadata struct { + ID string `json:"id"` + Name string `json:"name"` + Cost *ModelCost `json:"cost,omitempty"` + Limit *ModelLimit `json:"limit,omitempty"` + Modalities *ModelModalities `json:"modalities,omitempty"` + ToolCall *bool `json:"tool_call,omitempty"` + Reasoning *bool `json:"reasoning,omitempty"` +} + +type ModelCost struct { + Input float64 `json:"input"` + Output float64 `json:"output"` + CacheRead *float64 `json:"cache_read,omitempty"` + CacheWrite *float64 `json:"cache_write,omitempty"` +} + +type ModelLimit struct { + Context uint32 `json:"context"` + Output uint32 `json:"output"` +} + +type ModelModalities struct { + Input []string `json:"input"` + Output []string `json:"output"` +} + +func (r *ModelRegistry) FindModel(modelID string) *ModelMetadata { + // First try exact match in models map + for _, provider := range r.Providers { + if model, ok := provider.Models[modelID]; ok { + return &model + } + } + + // Try searching by ID in metadata + for _, provider := range r.Providers { + for _, model := range provider.Models { + if model.ID == modelID { + return &model + } + } + } + + return nil +} diff --git a/internal/server/server.go b/internal/server/server.go index 8fa75e4e..b4554d8d 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -26,12 +26,20 @@ type Server struct { sessions *SessionManager hub *Hub logger *RequestLogger + registry *models.ModelRegistry } func NewServer(cfg *config.Config, database *db.DB) *Server { router := gin.Default() hub := NewHub() + // Fetch registry (non-blocking for startup if it fails, but we'll try once) + registry, err := utils.FetchRegistry() + if err != nil { + fmt.Printf("Warning: Failed to fetch initial model registry: %v\n", err) + registry = &models.ModelRegistry{Providers: make(map[string]models.ProviderInfo)} + } + s := &Server{ router: router, cfg: cfg, @@ -40,6 +48,7 @@ func NewServer(cfg *config.Config, database *db.DB) *Server { sessions: NewSessionManager(cfg.KeyBytes, 24*time.Hour), hub: hub, logger: NewRequestLogger(database, hub), + registry: registry, } // Initialize providers @@ -311,8 +320,9 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u if usage.CacheWriteTokens != nil { entry.CacheWriteTokens = *usage.CacheWriteTokens } - // TODO: Calculate cost properly based on pricing - entry.Cost = 0.0 + + // Calculate cost using registry + entry.Cost = utils.CalculateCost(s.registry, model, entry.PromptTokens, entry.CompletionTokens, entry.CacheReadTokens, entry.CacheWriteTokens) } s.logger.LogRequest(entry) @@ -321,6 +331,18 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u func (s *Server) Run() error { go s.hub.Run() s.logger.Start() + + // Start registry refresher + go func() { + ticker := time.NewTicker(24 * time.Hour) + for range ticker.C { + newRegistry, err := utils.FetchRegistry() + if err == nil { + s.registry = newRegistry + } + } + }() + addr := fmt.Sprintf("%s:%d", s.cfg.Server.Host, s.cfg.Server.Port) return s.router.Run(addr) } diff --git a/internal/utils/registry.go b/internal/utils/registry.go new file mode 100644 index 00000000..fb612558 --- /dev/null +++ b/internal/utils/registry.go @@ -0,0 +1,54 @@ +package utils + +import ( + "encoding/json" + "fmt" + "log" + "time" + + "llm-proxy/internal/models" + "github.com/go-resty/resty/v2" +) + +const ModelsDevURL = "https://models.dev/api.json" + +func FetchRegistry() (*models.ModelRegistry, error) { + log.Printf("Fetching model registry from %s", ModelsDevURL) + + client := resty.New().SetTimeout(10 * time.Second) + resp, err := client.R().Get(ModelsDevURL) + if err != nil { + return nil, fmt.Errorf("failed to fetch registry: %w", err) + } + + if !resp.IsSuccess() { + return nil, fmt.Errorf("failed to fetch registry: HTTP %d", resp.StatusCode()) + } + + var providers map[string]models.ProviderInfo + if err := json.Unmarshal(resp.Body(), &providers); err != nil { + return nil, fmt.Errorf("failed to unmarshal registry: %w", err) + } + + log.Println("Successfully loaded model registry") + return &models.ModelRegistry{Providers: providers}, nil +} + +func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, cacheRead, cacheWrite uint32) float64 { + meta := registry.FindModel(modelID) + if meta == nil || meta.Cost == nil { + return 0.0 + } + + cost := (float64(promptTokens) * meta.Cost.Input / 1000000.0) + + (float64(completionTokens) * meta.Cost.Output / 1000000.0) + + if meta.Cost.CacheRead != nil { + cost += float64(cacheRead) * (*meta.Cost.CacheRead) / 1000000.0 + } + if meta.Cost.CacheWrite != nil { + cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0 + } + + return cost +}