Compare commits
15 Commits
6b10d4249c
...
90874a6721
| Author | SHA1 | Date | |
|---|---|---|---|
| 90874a6721 | |||
| 57aa0aa70e | |||
| 4de457cc5e | |||
| 66e8b114b9 | |||
| 1cac45502a | |||
| 79dc8fe409 | |||
| 24a898c9a7 | |||
| 7c2a317c01 | |||
| cb619f9286 | |||
| 441270317c | |||
| 2e4318d84b | |||
| d0be16d8e3 | |||
| 83e0ad0240 | |||
| 275ce34d05 | |||
| cb5b921550 |
28
.env
28
.env
@@ -1,28 +0,0 @@
|
||||
# LLM Proxy Gateway Environment Variables
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=sk-demo-openai-key
|
||||
|
||||
# Google Gemini
|
||||
GEMINI_API_KEY=AIza-demo-gemini-key
|
||||
|
||||
# DeepSeek
|
||||
DEEPSEEK_API_KEY=sk-demo-deepseek-key
|
||||
|
||||
# xAI Grok (not yet available)
|
||||
GROK_API_KEY=gk-demo-grok-key
|
||||
|
||||
# Authentication tokens (comma-separated list)
|
||||
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
||||
|
||||
# Database path (optional)
|
||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||
|
||||
# Session Secret (for signed tokens)
|
||||
SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0
|
||||
|
||||
# Encryption key (required)
|
||||
LLM_PROXY__ENCRYPTION_KEY=69879f5b7913ba169982190526ae213e830b3f1f33e785ef2b68cf48c7853fcd
|
||||
|
||||
# Server port (optional)
|
||||
LLM_PROXY__SERVER__PORT=8080
|
||||
22
.env.backup
22
.env.backup
@@ -1,22 +0,0 @@
|
||||
# LLM Proxy Gateway Environment Variables
|
||||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY=sk-demo-openai-key
|
||||
|
||||
# Google Gemini
|
||||
GEMINI_API_KEY=AIza-demo-gemini-key
|
||||
|
||||
# DeepSeek
|
||||
DEEPSEEK_API_KEY=sk-demo-deepseek-key
|
||||
|
||||
# xAI Grok (not yet available)
|
||||
GROK_API_KEY=gk-demo-grok-key
|
||||
|
||||
# Authentication tokens (comma-separated list)
|
||||
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
||||
|
||||
# Server port (optional)
|
||||
LLM_PROXY__SERVER__PORT=8080
|
||||
|
||||
# Database path (optional)
|
||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||
51
.env.example
51
.env.example
@@ -1,28 +1,43 @@
|
||||
# LLM Proxy Gateway Environment Variables
|
||||
# Copy to .env and fill in your API keys
|
||||
# LLM Proxy Gateway Configuration Example
|
||||
# Copy this file to .env and fill in your values
|
||||
|
||||
# MANDATORY: Encryption key for sessions and stored API keys
|
||||
# Must be a 32-byte hex or base64 encoded string
|
||||
# Example (hex): LLM_PROXY__ENCRYPTION_KEY=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
|
||||
# ==============================================================================
|
||||
# MANDATORY: Encryption & Security
|
||||
# ==============================================================================
|
||||
# A 32-byte hex or base64 encoded string used for session signing and
|
||||
# database encryption.
|
||||
# Generate one with: openssl rand -hex 32
|
||||
LLM_PROXY__ENCRYPTION_KEY=your_secure_32_byte_key_here
|
||||
|
||||
# LLM Provider API Keys (Standard Environment Variables)
|
||||
OPENAI_API_KEY=your_openai_api_key_here
|
||||
GEMINI_API_KEY=your_gemini_api_key_here
|
||||
DEEPSEEK_API_KEY=your_deepseek_api_key_here
|
||||
GROK_API_KEY=your_grok_api_key_here
|
||||
# ==============================================================================
|
||||
# LLM Provider API Keys
|
||||
# ==============================================================================
|
||||
OPENAI_API_KEY=sk-...
|
||||
GEMINI_API_KEY=AIza...
|
||||
DEEPSEEK_API_KEY=sk-...
|
||||
GROK_API_KEY=xai-...
|
||||
|
||||
# ==============================================================================
|
||||
# Server Configuration
|
||||
# ==============================================================================
|
||||
LLM_PROXY__SERVER__PORT=8080
|
||||
LLM_PROXY__SERVER__HOST=0.0.0.0
|
||||
|
||||
# Optional: Bearer tokens for client authentication (comma-separated)
|
||||
# If not set, the proxy will look up tokens in the database.
|
||||
# LLM_PROXY__SERVER__AUTH_TOKENS=token1,token2
|
||||
|
||||
# ==============================================================================
|
||||
# Database Configuration
|
||||
# ==============================================================================
|
||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||
LLM_PROXY__DATABASE__MAX_CONNECTIONS=10
|
||||
|
||||
# ==============================================================================
|
||||
# Provider Overrides (Optional)
|
||||
# ==============================================================================
|
||||
# LLM_PROXY__PROVIDERS__OPENAI__BASE_URL=https://api.openai.com/v1
|
||||
# LLM_PROXY__PROVIDERS__GEMINI__ENABLED=true
|
||||
# LLM_PROXY__PROVIDERS__OLLAMA__BASE_URL=http://localhost:11434/v1
|
||||
# LLM_PROXY__PROVIDERS__OLLAMA__ENABLED=true
|
||||
# LLM_PROXY__PROVIDERS__OLLAMA__MODELS=llama3,mistral,llava
|
||||
|
||||
# Server Configuration
|
||||
LLM_PROXY__SERVER__PORT=8080
|
||||
LLM_PROXY__SERVER__HOST=0.0.0.0
|
||||
|
||||
# Database Configuration
|
||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||
LLM_PROXY__DATABASE__MAX_CONNECTIONS=10
|
||||
|
||||
14
.gitignore
vendored
14
.gitignore
vendored
@@ -1,5 +1,11 @@
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
/target
|
||||
/.env
|
||||
/*.db
|
||||
/*.db-shm
|
||||
/*.db-wal
|
||||
/llm-proxy
|
||||
/llm-proxy-go
|
||||
*.db
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
*.log
|
||||
server.pid
|
||||
|
||||
8
TODO.md
8
TODO.md
@@ -7,12 +7,15 @@
|
||||
- [x] Auth Middleware
|
||||
- [x] Basic Provider implementations (OpenAI, Gemini, DeepSeek, Grok)
|
||||
- [x] Streaming Support (SSE & Gemini custom streaming)
|
||||
- [x] Move Rust files to `rust_backup`
|
||||
- [x] Archive Rust files to `rust` branch
|
||||
- [x] Clean root and set Go version as `main`
|
||||
- [x] Enhanced `helpers.go` for Multimodal & Tool Calling (OpenAI compatible)
|
||||
- [x] Enhanced `server.go` for robust request conversion
|
||||
- [x] Dashboard Management APIs (Clients, Tokens, Users, Providers)
|
||||
- [x] Dashboard Analytics & Usage Summary
|
||||
- [x] WebSocket for real-time dashboard updates
|
||||
- [x] Asynchronous Request Logging to SQLite
|
||||
- [x] Update documentation (README, deployment, architecture)
|
||||
|
||||
## Feature Parity Checklist (High Priority)
|
||||
|
||||
@@ -38,10 +41,9 @@
|
||||
- [x] Multimodal support
|
||||
|
||||
## Infrastructure & Middleware
|
||||
- [ ] Implement Request Logging to SQLite (asynchronous)
|
||||
- [ ] Implement Rate Limiting (`golang.org/x/time/rate`)
|
||||
- [ ] Implement Circuit Breaker (`github.com/sony/gobreaker`)
|
||||
- [ ] Implement Model Cost Calculation logic
|
||||
- [ ] Implement Model Cost Calculation logic (needs registry/pricing integration)
|
||||
|
||||
## Verification
|
||||
- [ ] Unit tests for feature-specific mapping (CoT, Tools, Images)
|
||||
|
||||
58
internal/models/registry.go
Normal file
58
internal/models/registry.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package models
|
||||
|
||||
type ModelRegistry struct {
|
||||
Providers map[string]ProviderInfo `json:"-"`
|
||||
}
|
||||
|
||||
type ProviderInfo struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Models map[string]ModelMetadata `json:"models"`
|
||||
}
|
||||
|
||||
type ModelMetadata struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Cost *ModelCost `json:"cost,omitempty"`
|
||||
Limit *ModelLimit `json:"limit,omitempty"`
|
||||
Modalities *ModelModalities `json:"modalities,omitempty"`
|
||||
ToolCall *bool `json:"tool_call,omitempty"`
|
||||
Reasoning *bool `json:"reasoning,omitempty"`
|
||||
}
|
||||
|
||||
type ModelCost struct {
|
||||
Input float64 `json:"input"`
|
||||
Output float64 `json:"output"`
|
||||
CacheRead *float64 `json:"cache_read,omitempty"`
|
||||
CacheWrite *float64 `json:"cache_write,omitempty"`
|
||||
}
|
||||
|
||||
type ModelLimit struct {
|
||||
Context uint32 `json:"context"`
|
||||
Output uint32 `json:"output"`
|
||||
}
|
||||
|
||||
type ModelModalities struct {
|
||||
Input []string `json:"input"`
|
||||
Output []string `json:"output"`
|
||||
}
|
||||
|
||||
func (r *ModelRegistry) FindModel(modelID string) *ModelMetadata {
|
||||
// First try exact match in models map
|
||||
for _, provider := range r.Providers {
|
||||
if model, ok := provider.Models[modelID]; ok {
|
||||
return &model
|
||||
}
|
||||
}
|
||||
|
||||
// Try searching by ID in metadata
|
||||
for _, provider := range r.Providers {
|
||||
for _, model := range provider.Models {
|
||||
if model.ID == modelID {
|
||||
return &model
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -26,12 +26,20 @@ type Server struct {
|
||||
sessions *SessionManager
|
||||
hub *Hub
|
||||
logger *RequestLogger
|
||||
registry *models.ModelRegistry
|
||||
}
|
||||
|
||||
func NewServer(cfg *config.Config, database *db.DB) *Server {
|
||||
router := gin.Default()
|
||||
hub := NewHub()
|
||||
|
||||
// Fetch registry (non-blocking for startup if it fails, but we'll try once)
|
||||
registry, err := utils.FetchRegistry()
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: Failed to fetch initial model registry: %v\n", err)
|
||||
registry = &models.ModelRegistry{Providers: make(map[string]models.ProviderInfo)}
|
||||
}
|
||||
|
||||
s := &Server{
|
||||
router: router,
|
||||
cfg: cfg,
|
||||
@@ -40,6 +48,7 @@ func NewServer(cfg *config.Config, database *db.DB) *Server {
|
||||
sessions: NewSessionManager(cfg.KeyBytes, 24*time.Hour),
|
||||
hub: hub,
|
||||
logger: NewRequestLogger(database, hub),
|
||||
registry: registry,
|
||||
}
|
||||
|
||||
// Initialize providers
|
||||
@@ -311,8 +320,9 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u
|
||||
if usage.CacheWriteTokens != nil {
|
||||
entry.CacheWriteTokens = *usage.CacheWriteTokens
|
||||
}
|
||||
// TODO: Calculate cost properly based on pricing
|
||||
entry.Cost = 0.0
|
||||
|
||||
// Calculate cost using registry
|
||||
entry.Cost = utils.CalculateCost(s.registry, model, entry.PromptTokens, entry.CompletionTokens, entry.CacheReadTokens, entry.CacheWriteTokens)
|
||||
}
|
||||
|
||||
s.logger.LogRequest(entry)
|
||||
@@ -321,6 +331,18 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u
|
||||
func (s *Server) Run() error {
|
||||
go s.hub.Run()
|
||||
s.logger.Start()
|
||||
|
||||
// Start registry refresher
|
||||
go func() {
|
||||
ticker := time.NewTicker(24 * time.Hour)
|
||||
for range ticker.C {
|
||||
newRegistry, err := utils.FetchRegistry()
|
||||
if err == nil {
|
||||
s.registry = newRegistry
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
addr := fmt.Sprintf("%s:%d", s.cfg.Server.Host, s.cfg.Server.Port)
|
||||
return s.router.Run(addr)
|
||||
}
|
||||
|
||||
54
internal/utils/registry.go
Normal file
54
internal/utils/registry.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"llm-proxy/internal/models"
|
||||
"github.com/go-resty/resty/v2"
|
||||
)
|
||||
|
||||
const ModelsDevURL = "https://models.dev/api.json"
|
||||
|
||||
func FetchRegistry() (*models.ModelRegistry, error) {
|
||||
log.Printf("Fetching model registry from %s", ModelsDevURL)
|
||||
|
||||
client := resty.New().SetTimeout(10 * time.Second)
|
||||
resp, err := client.R().Get(ModelsDevURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch registry: %w", err)
|
||||
}
|
||||
|
||||
if !resp.IsSuccess() {
|
||||
return nil, fmt.Errorf("failed to fetch registry: HTTP %d", resp.StatusCode())
|
||||
}
|
||||
|
||||
var providers map[string]models.ProviderInfo
|
||||
if err := json.Unmarshal(resp.Body(), &providers); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal registry: %w", err)
|
||||
}
|
||||
|
||||
log.Println("Successfully loaded model registry")
|
||||
return &models.ModelRegistry{Providers: providers}, nil
|
||||
}
|
||||
|
||||
func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, cacheRead, cacheWrite uint32) float64 {
|
||||
meta := registry.FindModel(modelID)
|
||||
if meta == nil || meta.Cost == nil {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
cost := (float64(promptTokens) * meta.Cost.Input / 1000000.0) +
|
||||
(float64(completionTokens) * meta.Cost.Output / 1000000.0)
|
||||
|
||||
if meta.Cost.CacheRead != nil {
|
||||
cost += float64(cacheRead) * (*meta.Cost.CacheRead) / 1000000.0
|
||||
}
|
||||
if meta.Cost.CacheWrite != nil {
|
||||
cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0
|
||||
}
|
||||
|
||||
return cost
|
||||
}
|
||||
Reference in New Issue
Block a user