Compare commits
15 Commits
6b10d4249c
...
90874a6721
| Author | SHA1 | Date | |
|---|---|---|---|
| 90874a6721 | |||
| 57aa0aa70e | |||
| 4de457cc5e | |||
| 66e8b114b9 | |||
| 1cac45502a | |||
| 79dc8fe409 | |||
| 24a898c9a7 | |||
| 7c2a317c01 | |||
| cb619f9286 | |||
| 441270317c | |||
| 2e4318d84b | |||
| d0be16d8e3 | |||
| 83e0ad0240 | |||
| 275ce34d05 | |||
| cb5b921550 |
28
.env
28
.env
@@ -1,28 +0,0 @@
|
|||||||
# LLM Proxy Gateway Environment Variables
|
|
||||||
|
|
||||||
# OpenAI
|
|
||||||
OPENAI_API_KEY=sk-demo-openai-key
|
|
||||||
|
|
||||||
# Google Gemini
|
|
||||||
GEMINI_API_KEY=AIza-demo-gemini-key
|
|
||||||
|
|
||||||
# DeepSeek
|
|
||||||
DEEPSEEK_API_KEY=sk-demo-deepseek-key
|
|
||||||
|
|
||||||
# xAI Grok (not yet available)
|
|
||||||
GROK_API_KEY=gk-demo-grok-key
|
|
||||||
|
|
||||||
# Authentication tokens (comma-separated list)
|
|
||||||
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
|
||||||
|
|
||||||
# Database path (optional)
|
|
||||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
|
||||||
|
|
||||||
# Session Secret (for signed tokens)
|
|
||||||
SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0
|
|
||||||
|
|
||||||
# Encryption key (required)
|
|
||||||
LLM_PROXY__ENCRYPTION_KEY=69879f5b7913ba169982190526ae213e830b3f1f33e785ef2b68cf48c7853fcd
|
|
||||||
|
|
||||||
# Server port (optional)
|
|
||||||
LLM_PROXY__SERVER__PORT=8080
|
|
||||||
22
.env.backup
22
.env.backup
@@ -1,22 +0,0 @@
|
|||||||
# LLM Proxy Gateway Environment Variables
|
|
||||||
|
|
||||||
# OpenAI
|
|
||||||
OPENAI_API_KEY=sk-demo-openai-key
|
|
||||||
|
|
||||||
# Google Gemini
|
|
||||||
GEMINI_API_KEY=AIza-demo-gemini-key
|
|
||||||
|
|
||||||
# DeepSeek
|
|
||||||
DEEPSEEK_API_KEY=sk-demo-deepseek-key
|
|
||||||
|
|
||||||
# xAI Grok (not yet available)
|
|
||||||
GROK_API_KEY=gk-demo-grok-key
|
|
||||||
|
|
||||||
# Authentication tokens (comma-separated list)
|
|
||||||
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
|
||||||
|
|
||||||
# Server port (optional)
|
|
||||||
LLM_PROXY__SERVER__PORT=8080
|
|
||||||
|
|
||||||
# Database path (optional)
|
|
||||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
|
||||||
51
.env.example
51
.env.example
@@ -1,28 +1,43 @@
|
|||||||
# LLM Proxy Gateway Environment Variables
|
# LLM Proxy Gateway Configuration Example
|
||||||
# Copy to .env and fill in your API keys
|
# Copy this file to .env and fill in your values
|
||||||
|
|
||||||
# MANDATORY: Encryption key for sessions and stored API keys
|
# ==============================================================================
|
||||||
# Must be a 32-byte hex or base64 encoded string
|
# MANDATORY: Encryption & Security
|
||||||
# Example (hex): LLM_PROXY__ENCRYPTION_KEY=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
|
# ==============================================================================
|
||||||
|
# A 32-byte hex or base64 encoded string used for session signing and
|
||||||
|
# database encryption.
|
||||||
|
# Generate one with: openssl rand -hex 32
|
||||||
LLM_PROXY__ENCRYPTION_KEY=your_secure_32_byte_key_here
|
LLM_PROXY__ENCRYPTION_KEY=your_secure_32_byte_key_here
|
||||||
|
|
||||||
# LLM Provider API Keys (Standard Environment Variables)
|
# ==============================================================================
|
||||||
OPENAI_API_KEY=your_openai_api_key_here
|
# LLM Provider API Keys
|
||||||
GEMINI_API_KEY=your_gemini_api_key_here
|
# ==============================================================================
|
||||||
DEEPSEEK_API_KEY=your_deepseek_api_key_here
|
OPENAI_API_KEY=sk-...
|
||||||
GROK_API_KEY=your_grok_api_key_here
|
GEMINI_API_KEY=AIza...
|
||||||
|
DEEPSEEK_API_KEY=sk-...
|
||||||
|
GROK_API_KEY=xai-...
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Server Configuration
|
||||||
|
# ==============================================================================
|
||||||
|
LLM_PROXY__SERVER__PORT=8080
|
||||||
|
LLM_PROXY__SERVER__HOST=0.0.0.0
|
||||||
|
|
||||||
|
# Optional: Bearer tokens for client authentication (comma-separated)
|
||||||
|
# If not set, the proxy will look up tokens in the database.
|
||||||
|
# LLM_PROXY__SERVER__AUTH_TOKENS=token1,token2
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Database Configuration
|
||||||
|
# ==============================================================================
|
||||||
|
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||||
|
LLM_PROXY__DATABASE__MAX_CONNECTIONS=10
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
# Provider Overrides (Optional)
|
# Provider Overrides (Optional)
|
||||||
|
# ==============================================================================
|
||||||
# LLM_PROXY__PROVIDERS__OPENAI__BASE_URL=https://api.openai.com/v1
|
# LLM_PROXY__PROVIDERS__OPENAI__BASE_URL=https://api.openai.com/v1
|
||||||
# LLM_PROXY__PROVIDERS__GEMINI__ENABLED=true
|
# LLM_PROXY__PROVIDERS__GEMINI__ENABLED=true
|
||||||
# LLM_PROXY__PROVIDERS__OLLAMA__BASE_URL=http://localhost:11434/v1
|
# LLM_PROXY__PROVIDERS__OLLAMA__BASE_URL=http://localhost:11434/v1
|
||||||
# LLM_PROXY__PROVIDERS__OLLAMA__ENABLED=true
|
# LLM_PROXY__PROVIDERS__OLLAMA__ENABLED=true
|
||||||
# LLM_PROXY__PROVIDERS__OLLAMA__MODELS=llama3,mistral,llava
|
# LLM_PROXY__PROVIDERS__OLLAMA__MODELS=llama3,mistral,llava
|
||||||
|
|
||||||
# Server Configuration
|
|
||||||
LLM_PROXY__SERVER__PORT=8080
|
|
||||||
LLM_PROXY__SERVER__HOST=0.0.0.0
|
|
||||||
|
|
||||||
# Database Configuration
|
|
||||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
|
||||||
LLM_PROXY__DATABASE__MAX_CONNECTIONS=10
|
|
||||||
|
|||||||
14
.gitignore
vendored
14
.gitignore
vendored
@@ -1,5 +1,11 @@
|
|||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
/target
|
/target
|
||||||
/.env
|
/llm-proxy
|
||||||
/*.db
|
/llm-proxy-go
|
||||||
/*.db-shm
|
*.db
|
||||||
/*.db-wal
|
*.db-shm
|
||||||
|
*.db-wal
|
||||||
|
*.log
|
||||||
|
server.pid
|
||||||
|
|||||||
8
TODO.md
8
TODO.md
@@ -7,12 +7,15 @@
|
|||||||
- [x] Auth Middleware
|
- [x] Auth Middleware
|
||||||
- [x] Basic Provider implementations (OpenAI, Gemini, DeepSeek, Grok)
|
- [x] Basic Provider implementations (OpenAI, Gemini, DeepSeek, Grok)
|
||||||
- [x] Streaming Support (SSE & Gemini custom streaming)
|
- [x] Streaming Support (SSE & Gemini custom streaming)
|
||||||
- [x] Move Rust files to `rust_backup`
|
- [x] Archive Rust files to `rust` branch
|
||||||
|
- [x] Clean root and set Go version as `main`
|
||||||
- [x] Enhanced `helpers.go` for Multimodal & Tool Calling (OpenAI compatible)
|
- [x] Enhanced `helpers.go` for Multimodal & Tool Calling (OpenAI compatible)
|
||||||
- [x] Enhanced `server.go` for robust request conversion
|
- [x] Enhanced `server.go` for robust request conversion
|
||||||
- [x] Dashboard Management APIs (Clients, Tokens, Users, Providers)
|
- [x] Dashboard Management APIs (Clients, Tokens, Users, Providers)
|
||||||
- [x] Dashboard Analytics & Usage Summary
|
- [x] Dashboard Analytics & Usage Summary
|
||||||
- [x] WebSocket for real-time dashboard updates
|
- [x] WebSocket for real-time dashboard updates
|
||||||
|
- [x] Asynchronous Request Logging to SQLite
|
||||||
|
- [x] Update documentation (README, deployment, architecture)
|
||||||
|
|
||||||
## Feature Parity Checklist (High Priority)
|
## Feature Parity Checklist (High Priority)
|
||||||
|
|
||||||
@@ -38,10 +41,9 @@
|
|||||||
- [x] Multimodal support
|
- [x] Multimodal support
|
||||||
|
|
||||||
## Infrastructure & Middleware
|
## Infrastructure & Middleware
|
||||||
- [ ] Implement Request Logging to SQLite (asynchronous)
|
|
||||||
- [ ] Implement Rate Limiting (`golang.org/x/time/rate`)
|
- [ ] Implement Rate Limiting (`golang.org/x/time/rate`)
|
||||||
- [ ] Implement Circuit Breaker (`github.com/sony/gobreaker`)
|
- [ ] Implement Circuit Breaker (`github.com/sony/gobreaker`)
|
||||||
- [ ] Implement Model Cost Calculation logic
|
- [ ] Implement Model Cost Calculation logic (needs registry/pricing integration)
|
||||||
|
|
||||||
## Verification
|
## Verification
|
||||||
- [ ] Unit tests for feature-specific mapping (CoT, Tools, Images)
|
- [ ] Unit tests for feature-specific mapping (CoT, Tools, Images)
|
||||||
|
|||||||
58
internal/models/registry.go
Normal file
58
internal/models/registry.go
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
package models
|
||||||
|
|
||||||
|
type ModelRegistry struct {
|
||||||
|
Providers map[string]ProviderInfo `json:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ProviderInfo struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Models map[string]ModelMetadata `json:"models"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelMetadata struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Cost *ModelCost `json:"cost,omitempty"`
|
||||||
|
Limit *ModelLimit `json:"limit,omitempty"`
|
||||||
|
Modalities *ModelModalities `json:"modalities,omitempty"`
|
||||||
|
ToolCall *bool `json:"tool_call,omitempty"`
|
||||||
|
Reasoning *bool `json:"reasoning,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelCost struct {
|
||||||
|
Input float64 `json:"input"`
|
||||||
|
Output float64 `json:"output"`
|
||||||
|
CacheRead *float64 `json:"cache_read,omitempty"`
|
||||||
|
CacheWrite *float64 `json:"cache_write,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelLimit struct {
|
||||||
|
Context uint32 `json:"context"`
|
||||||
|
Output uint32 `json:"output"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelModalities struct {
|
||||||
|
Input []string `json:"input"`
|
||||||
|
Output []string `json:"output"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ModelRegistry) FindModel(modelID string) *ModelMetadata {
|
||||||
|
// First try exact match in models map
|
||||||
|
for _, provider := range r.Providers {
|
||||||
|
if model, ok := provider.Models[modelID]; ok {
|
||||||
|
return &model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try searching by ID in metadata
|
||||||
|
for _, provider := range r.Providers {
|
||||||
|
for _, model := range provider.Models {
|
||||||
|
if model.ID == modelID {
|
||||||
|
return &model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -26,12 +26,20 @@ type Server struct {
|
|||||||
sessions *SessionManager
|
sessions *SessionManager
|
||||||
hub *Hub
|
hub *Hub
|
||||||
logger *RequestLogger
|
logger *RequestLogger
|
||||||
|
registry *models.ModelRegistry
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewServer(cfg *config.Config, database *db.DB) *Server {
|
func NewServer(cfg *config.Config, database *db.DB) *Server {
|
||||||
router := gin.Default()
|
router := gin.Default()
|
||||||
hub := NewHub()
|
hub := NewHub()
|
||||||
|
|
||||||
|
// Fetch registry (non-blocking for startup if it fails, but we'll try once)
|
||||||
|
registry, err := utils.FetchRegistry()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Warning: Failed to fetch initial model registry: %v\n", err)
|
||||||
|
registry = &models.ModelRegistry{Providers: make(map[string]models.ProviderInfo)}
|
||||||
|
}
|
||||||
|
|
||||||
s := &Server{
|
s := &Server{
|
||||||
router: router,
|
router: router,
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
@@ -40,6 +48,7 @@ func NewServer(cfg *config.Config, database *db.DB) *Server {
|
|||||||
sessions: NewSessionManager(cfg.KeyBytes, 24*time.Hour),
|
sessions: NewSessionManager(cfg.KeyBytes, 24*time.Hour),
|
||||||
hub: hub,
|
hub: hub,
|
||||||
logger: NewRequestLogger(database, hub),
|
logger: NewRequestLogger(database, hub),
|
||||||
|
registry: registry,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize providers
|
// Initialize providers
|
||||||
@@ -311,8 +320,9 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u
|
|||||||
if usage.CacheWriteTokens != nil {
|
if usage.CacheWriteTokens != nil {
|
||||||
entry.CacheWriteTokens = *usage.CacheWriteTokens
|
entry.CacheWriteTokens = *usage.CacheWriteTokens
|
||||||
}
|
}
|
||||||
// TODO: Calculate cost properly based on pricing
|
|
||||||
entry.Cost = 0.0
|
// Calculate cost using registry
|
||||||
|
entry.Cost = utils.CalculateCost(s.registry, model, entry.PromptTokens, entry.CompletionTokens, entry.CacheReadTokens, entry.CacheWriteTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
s.logger.LogRequest(entry)
|
s.logger.LogRequest(entry)
|
||||||
@@ -321,6 +331,18 @@ func (s *Server) logRequest(start time.Time, clientID, provider, model string, u
|
|||||||
func (s *Server) Run() error {
|
func (s *Server) Run() error {
|
||||||
go s.hub.Run()
|
go s.hub.Run()
|
||||||
s.logger.Start()
|
s.logger.Start()
|
||||||
|
|
||||||
|
// Start registry refresher
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(24 * time.Hour)
|
||||||
|
for range ticker.C {
|
||||||
|
newRegistry, err := utils.FetchRegistry()
|
||||||
|
if err == nil {
|
||||||
|
s.registry = newRegistry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
addr := fmt.Sprintf("%s:%d", s.cfg.Server.Host, s.cfg.Server.Port)
|
addr := fmt.Sprintf("%s:%d", s.cfg.Server.Host, s.cfg.Server.Port)
|
||||||
return s.router.Run(addr)
|
return s.router.Run(addr)
|
||||||
}
|
}
|
||||||
|
|||||||
54
internal/utils/registry.go
Normal file
54
internal/utils/registry.go
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"llm-proxy/internal/models"
|
||||||
|
"github.com/go-resty/resty/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
const ModelsDevURL = "https://models.dev/api.json"
|
||||||
|
|
||||||
|
func FetchRegistry() (*models.ModelRegistry, error) {
|
||||||
|
log.Printf("Fetching model registry from %s", ModelsDevURL)
|
||||||
|
|
||||||
|
client := resty.New().SetTimeout(10 * time.Second)
|
||||||
|
resp, err := client.R().Get(ModelsDevURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch registry: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !resp.IsSuccess() {
|
||||||
|
return nil, fmt.Errorf("failed to fetch registry: HTTP %d", resp.StatusCode())
|
||||||
|
}
|
||||||
|
|
||||||
|
var providers map[string]models.ProviderInfo
|
||||||
|
if err := json.Unmarshal(resp.Body(), &providers); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal registry: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("Successfully loaded model registry")
|
||||||
|
return &models.ModelRegistry{Providers: providers}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func CalculateCost(registry *models.ModelRegistry, modelID string, promptTokens, completionTokens, cacheRead, cacheWrite uint32) float64 {
|
||||||
|
meta := registry.FindModel(modelID)
|
||||||
|
if meta == nil || meta.Cost == nil {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
cost := (float64(promptTokens) * meta.Cost.Input / 1000000.0) +
|
||||||
|
(float64(completionTokens) * meta.Cost.Output / 1000000.0)
|
||||||
|
|
||||||
|
if meta.Cost.CacheRead != nil {
|
||||||
|
cost += float64(cacheRead) * (*meta.Cost.CacheRead) / 1000000.0
|
||||||
|
}
|
||||||
|
if meta.Cost.CacheWrite != nil {
|
||||||
|
cost += float64(cacheWrite) * (*meta.Cost.CacheWrite) / 1000000.0
|
||||||
|
}
|
||||||
|
|
||||||
|
return cost
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user