merge
This commit is contained in:
10
.env
10
.env
@@ -15,12 +15,14 @@ GROK_API_KEY=gk-demo-grok-key
|
|||||||
# Authentication tokens (comma-separated list)
|
# Authentication tokens (comma-separated list)
|
||||||
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
||||||
|
|
||||||
# Server port (optional)
|
|
||||||
LLM_PROXY__SERVER__PORT=8080
|
|
||||||
|
|
||||||
# Database path (optional)
|
# Database path (optional)
|
||||||
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||||
|
|
||||||
|
# Session Secret (for signed tokens)
|
||||||
SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0
|
SESSION_SECRET=ki9khXAk9usDkasMrD2UbK4LOgrDRJz0
|
||||||
|
|
||||||
LLM_PROXY__ENCRYPTION_KEY=eac0239bfc402c7eb888366dd76c314288a8693efd5b7457819aeaf1fe429ac2
|
# Encryption key (required)
|
||||||
|
LLM_PROXY__ENCRYPTION_KEY=69879f5b7913ba169982190526ae213e830b3f1f33e785ef2b68cf48c7853fcd
|
||||||
|
|
||||||
|
# Server port (optional)
|
||||||
|
LLM_PROXY__SERVER__PORT=8081
|
||||||
|
|||||||
22
.env.backup
Normal file
22
.env.backup
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# LLM Proxy Gateway Environment Variables
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
OPENAI_API_KEY=sk-demo-openai-key
|
||||||
|
|
||||||
|
# Google Gemini
|
||||||
|
GEMINI_API_KEY=AIza-demo-gemini-key
|
||||||
|
|
||||||
|
# DeepSeek
|
||||||
|
DEEPSEEK_API_KEY=sk-demo-deepseek-key
|
||||||
|
|
||||||
|
# xAI Grok (not yet available)
|
||||||
|
GROK_API_KEY=gk-demo-grok-key
|
||||||
|
|
||||||
|
# Authentication tokens (comma-separated list)
|
||||||
|
LLM_PROXY__SERVER__AUTH_TOKENS=demo-token-123456,another-token
|
||||||
|
|
||||||
|
# Server port (optional)
|
||||||
|
LLM_PROXY__SERVER__PORT=8080
|
||||||
|
|
||||||
|
# Database path (optional)
|
||||||
|
LLM_PROXY__DATABASE__PATH=./data/llm_proxy.db
|
||||||
45
PLAN.md
45
PLAN.md
@@ -56,7 +56,44 @@ This document outlines the roadmap for standardizing frontend security, cleaning
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Technical Standards
|
# Phase 6: Cache Cost & Provider Audit (ACTIVE)
|
||||||
- **Rust:** No `unwrap()` in production code; use proper error handling (`Result`).
|
**Primary Agents:** `frontend-developer`, `backend-developer`, `database-optimizer`, `lab-assistant`
|
||||||
- **Frontend:** Always use `window.api` wrappers for sensitive operations.
|
|
||||||
- **Security:** Secrets must never be logged or hardcoded.
|
## 6.1 Dashboard UI Updates (@frontend-developer)
|
||||||
|
- [ ] **Update Models Page Modal:** Add input fields for `Cache Read Cost` and `Cache Write Cost` in `static/js/pages/models.js`.
|
||||||
|
- [ ] **API Integration:** Ensure `window.api.put` includes these new cost fields in the request body.
|
||||||
|
- [ ] **Verify Costs Page:** Confirm `static/js/pages/costs.js` displays these rates correctly in the pricing table.
|
||||||
|
|
||||||
|
## 6.2 Provider Audit & Stream Fixes (@backend-developer)
|
||||||
|
- [ ] **Standard DeepSeek Fix:** Modify `src/providers/deepseek.rs` to stop stripping `stream_options` for `deepseek-chat`.
|
||||||
|
- [ ] **Grok Audit:** Verify if Grok correctly returns usage in streaming; it uses `build_openai_body` and doesn't seem to strip it.
|
||||||
|
- [ ] **Gemini Audit:** Confirm Gemini returns `usage_metadata` reliably in the final chunk.
|
||||||
|
- [ ] **Anthropic Audit:** Check if Anthropic streaming requires `include_usage` or similar flags.
|
||||||
|
|
||||||
|
## 6.3 Database & Migration Validation (@database-optimizer)
|
||||||
|
- [ ] **Test Migrations:** Run the server to ensure `ALTER TABLE` logic in `src/database/mod.rs` applies the new columns correctly.
|
||||||
|
- [ ] **Schema Verification:** Verify `model_configs` has `cache_read_cost_per_m` and `cache_write_cost_per_m` columns.
|
||||||
|
|
||||||
|
## 6.4 Token Estimation Refinement (@lab-assistant)
|
||||||
|
- [ ] **Analyze Heuristic:** Review `chars / 4` in `src/utils/tokens.rs`.
|
||||||
|
- [ ] **Background Precise Recount:** Propose a mechanism for a precise token count (using Tiktoken) after the response is finalized.
|
||||||
|
|
||||||
|
## Critical Path
|
||||||
|
Migration Validation → UI Fields → Provider Stream Usage Reporting.
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
gantt
|
||||||
|
title Phase 6 Timeline
|
||||||
|
dateFormat YYYY-MM-DD
|
||||||
|
section Frontend
|
||||||
|
Models Page UI :2026-03-06, 1d
|
||||||
|
Costs Table Update:after Models Page UI, 1d
|
||||||
|
section Backend
|
||||||
|
DeepSeek Fix :2026-03-06, 1d
|
||||||
|
Provider Audit (Grok/Gemini):after DeepSeek Fix, 2d
|
||||||
|
section Database
|
||||||
|
Migration Test :2026-03-06, 1d
|
||||||
|
section Optimization
|
||||||
|
Token Heuristic Review :2026-03-06, 1d
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
11
server.log
Normal file
11
server.log
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[2m2026-03-06T20:07:36.737914Z[0m [32m INFO[0m Starting LLM Proxy Gateway v0.1.0
|
||||||
|
[2m2026-03-06T20:07:36.738903Z[0m [32m INFO[0m Configuration loaded from Some("/home/newkirk/Documents/projects/web_projects/llm-proxy/config.toml")
|
||||||
|
[2m2026-03-06T20:07:36.738945Z[0m [32m INFO[0m Encryption initialized
|
||||||
|
[2m2026-03-06T20:07:36.739124Z[0m [32m INFO[0m Connecting to database at ./data/llm_proxy.db
|
||||||
|
[2m2026-03-06T20:07:36.753254Z[0m [32m INFO[0m Database migrations completed
|
||||||
|
[2m2026-03-06T20:07:36.753294Z[0m [32m INFO[0m Database initialized at "./data/llm_proxy.db"
|
||||||
|
[2m2026-03-06T20:07:36.755187Z[0m [32m INFO[0m Fetching model registry from https://models.dev/api.json
|
||||||
|
[2m2026-03-06T20:07:37.000853Z[0m [32m INFO[0m Successfully loaded model registry
|
||||||
|
[2m2026-03-06T20:07:37.001382Z[0m [32m INFO[0m Model config cache initialized
|
||||||
|
[2m2026-03-06T20:07:37.001702Z[0m [33m WARN[0m SESSION_SECRET environment variable not set. Using a randomly generated secret. This will invalidate all sessions on restart. Set SESSION_SECRET to a fixed hex or base64 encoded 32-byte value.
|
||||||
|
[2m2026-03-06T20:07:37.002898Z[0m [32m INFO[0m Server listening on http://0.0.0.0:8082
|
||||||
1
server.pid
Normal file
1
server.pid
Normal file
@@ -0,0 +1 @@
|
|||||||
|
945904
|
||||||
@@ -109,6 +109,8 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
enabled BOOLEAN DEFAULT TRUE,
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
prompt_cost_per_m REAL,
|
prompt_cost_per_m REAL,
|
||||||
completion_cost_per_m REAL,
|
completion_cost_per_m REAL,
|
||||||
|
cache_read_cost_per_m REAL,
|
||||||
|
cache_write_cost_per_m REAL,
|
||||||
mapping TEXT,
|
mapping TEXT,
|
||||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
FOREIGN KEY (provider_id) REFERENCES provider_configs(id) ON DELETE CASCADE
|
FOREIGN KEY (provider_id) REFERENCES provider_configs(id) ON DELETE CASCADE
|
||||||
@@ -180,6 +182,14 @@ pub async fn run_migrations(pool: &DbPool) -> Result<()> {
|
|||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
// Add manual cache cost columns to model_configs if they don't exist
|
||||||
|
let _ = sqlx::query("ALTER TABLE model_configs ADD COLUMN cache_read_cost_per_m REAL")
|
||||||
|
.execute(pool)
|
||||||
|
.await;
|
||||||
|
let _ = sqlx::query("ALTER TABLE model_configs ADD COLUMN cache_write_cost_per_m REAL")
|
||||||
|
.execute(pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
// Insert default admin user if none exists (default password: admin)
|
// Insert default admin user if none exists (default password: admin)
|
||||||
let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?;
|
let user_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users").fetch_one(pool).await?;
|
||||||
|
|
||||||
|
|||||||
@@ -152,8 +152,11 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
// Sanitize and fix for deepseek-reasoner (R1)
|
// Sanitize and fix for deepseek-reasoner (R1)
|
||||||
if request.model == "deepseek-reasoner" {
|
if request.model == "deepseek-reasoner" {
|
||||||
if let Some(obj) = body.as_object_mut() {
|
if let Some(obj) = body.as_object_mut() {
|
||||||
obj.remove("stream_options");
|
// Keep stream_options if present (DeepSeek supports include_usage)
|
||||||
|
|
||||||
|
// Remove unsupported parameters
|
||||||
obj.remove("temperature");
|
obj.remove("temperature");
|
||||||
|
|
||||||
obj.remove("top_p");
|
obj.remove("top_p");
|
||||||
obj.remove("presence_penalty");
|
obj.remove("presence_penalty");
|
||||||
obj.remove("frequency_penalty");
|
obj.remove("frequency_penalty");
|
||||||
@@ -177,11 +180,6 @@ impl super::Provider for DeepSeekProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// For standard deepseek-chat, keep it clean
|
|
||||||
if let Some(obj) = body.as_object_mut() {
|
|
||||||
obj.remove("stream_options");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let url = format!("{}/chat/completions", self.config.base_url);
|
let url = format!("{}/chat/completions", self.config.base_url);
|
||||||
|
|||||||
@@ -317,8 +317,7 @@ impl super::Provider for OpenAIProvider {
|
|||||||
|
|
||||||
// Standard OpenAI cleanup
|
// Standard OpenAI cleanup
|
||||||
if let Some(obj) = body.as_object_mut() {
|
if let Some(obj) = body.as_object_mut() {
|
||||||
obj.remove("stream_options");
|
// stream_options.include_usage is supported by OpenAI for token usage in streaming
|
||||||
|
|
||||||
// Transition: Newer OpenAI models (o1, o3, gpt-5) require max_completion_tokens
|
// Transition: Newer OpenAI models (o1, o3, gpt-5) require max_completion_tokens
|
||||||
if request.model.starts_with("o1-") || request.model.starts_with("o3-") || request.model.contains("gpt-5") {
|
if request.model.starts_with("o1-") || request.model.starts_with("o3-") || request.model.contains("gpt-5") {
|
||||||
if let Some(max_tokens) = obj.remove("max_tokens") {
|
if let Some(max_tokens) = obj.remove("max_tokens") {
|
||||||
|
|||||||
@@ -137,8 +137,23 @@ async fn get_model_cost(
|
|||||||
// Check in-memory cache for cost overrides (no SQLite hit)
|
// Check in-memory cache for cost overrides (no SQLite hit)
|
||||||
if let Some(cached) = state.model_config_cache.get(model).await {
|
if let Some(cached) = state.model_config_cache.get(model).await {
|
||||||
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
||||||
// Manual overrides don't have cache-specific rates, so use simple formula
|
// Manual overrides logic: if cache rates are provided, use cache-aware formula.
|
||||||
return (prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
// Formula: (non_cached_prompt * input_rate) + (cache_read * read_rate) + (cache_write * write_rate) + (completion * output_rate)
|
||||||
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
let mut total = (non_cached_prompt as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
||||||
|
|
||||||
|
if let Some(cr) = cached.cache_read_cost_per_m {
|
||||||
|
total += cache_read_tokens as f64 * cr / 1_000_000.0;
|
||||||
|
} else {
|
||||||
|
// No manual cache_read rate — charge cached tokens at full input rate (backwards compatibility)
|
||||||
|
total += cache_read_tokens as f64 * p / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(cw) = cached.cache_write_cost_per_m {
|
||||||
|
total += cache_write_tokens as f64 * cw / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return total;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ pub struct CachedModelConfig {
|
|||||||
pub mapping: Option<String>,
|
pub mapping: Option<String>,
|
||||||
pub prompt_cost_per_m: Option<f64>,
|
pub prompt_cost_per_m: Option<f64>,
|
||||||
pub completion_cost_per_m: Option<f64>,
|
pub completion_cost_per_m: Option<f64>,
|
||||||
|
pub cache_read_cost_per_m: Option<f64>,
|
||||||
|
pub cache_write_cost_per_m: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// In-memory cache for model_configs table.
|
/// In-memory cache for model_configs table.
|
||||||
@@ -35,15 +37,15 @@ impl ModelConfigCache {
|
|||||||
|
|
||||||
/// Load all model configs from the database into cache
|
/// Load all model configs from the database into cache
|
||||||
pub async fn refresh(&self) {
|
pub async fn refresh(&self) {
|
||||||
match sqlx::query_as::<_, (String, bool, Option<String>, Option<f64>, Option<f64>)>(
|
match sqlx::query_as::<_, (String, bool, Option<String>, Option<f64>, Option<f64>, Option<f64>, Option<f64>)>(
|
||||||
"SELECT id, enabled, mapping, prompt_cost_per_m, completion_cost_per_m FROM model_configs",
|
"SELECT id, enabled, mapping, prompt_cost_per_m, completion_cost_per_m, cache_read_cost_per_m, cache_write_cost_per_m FROM model_configs",
|
||||||
)
|
)
|
||||||
.fetch_all(&self.db_pool)
|
.fetch_all(&self.db_pool)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(rows) => {
|
Ok(rows) => {
|
||||||
let mut map = HashMap::with_capacity(rows.len());
|
let mut map = HashMap::with_capacity(rows.len());
|
||||||
for (id, enabled, mapping, prompt_cost, completion_cost) in rows {
|
for (id, enabled, mapping, prompt_cost, completion_cost, cache_read_cost, cache_write_cost) in rows {
|
||||||
map.insert(
|
map.insert(
|
||||||
id,
|
id,
|
||||||
CachedModelConfig {
|
CachedModelConfig {
|
||||||
@@ -51,6 +53,8 @@ impl ModelConfigCache {
|
|||||||
mapping,
|
mapping,
|
||||||
prompt_cost_per_m: prompt_cost,
|
prompt_cost_per_m: prompt_cost,
|
||||||
completion_cost_per_m: completion_cost,
|
completion_cost_per_m: completion_cost,
|
||||||
|
cache_read_cost_per_m: cache_read_cost,
|
||||||
|
cache_write_cost_per_m: cache_write_cost,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -118,8 +118,22 @@ where
|
|||||||
// Check in-memory cache for cost overrides (no SQLite hit)
|
// Check in-memory cache for cost overrides (no SQLite hit)
|
||||||
let cost = if let Some(cached) = config_cache.get(&model).await {
|
let cost = if let Some(cached) = config_cache.get(&model).await {
|
||||||
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
if let (Some(p), Some(c)) = (cached.prompt_cost_per_m, cached.completion_cost_per_m) {
|
||||||
// Cost override doesn't have cache-aware pricing, use simple formula
|
// Manual overrides logic: if cache rates are provided, use cache-aware formula.
|
||||||
(prompt_tokens as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0)
|
let non_cached_prompt = prompt_tokens.saturating_sub(cache_read_tokens);
|
||||||
|
let mut total = (non_cached_prompt as f64 * p / 1_000_000.0) + (completion_tokens as f64 * c / 1_000_000.0);
|
||||||
|
|
||||||
|
if let Some(cr) = cached.cache_read_cost_per_m {
|
||||||
|
total += cache_read_tokens as f64 * cr / 1_000_000.0;
|
||||||
|
} else {
|
||||||
|
// Charge cached tokens at full input rate if no specific rate provided
|
||||||
|
total += cache_read_tokens as f64 * p / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(cw) = cached.cache_write_cost_per_m {
|
||||||
|
total += cache_write_tokens as f64 * cw / 1_000_000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
total
|
||||||
} else {
|
} else {
|
||||||
provider.calculate_cost(
|
provider.calculate_cost(
|
||||||
&model,
|
&model,
|
||||||
|
|||||||
@@ -30,13 +30,13 @@ class ModelsPage {
|
|||||||
tableBody.innerHTML = '<tr><td colspan="7" class="text-center">No models found in registry</td></tr>';
|
tableBody.innerHTML = '<tr><td colspan="7" class="text-center">No models found in registry</td></tr>';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by provider then name
|
// Sort by provider then name
|
||||||
this.models.sort((a, b) => {
|
this.models.sort((a, b) => {
|
||||||
if (a.provider !== b.provider) return a.provider.localeCompare(b.provider);
|
if (a.provider !== b.provider) return a.provider.localeCompare(b.provider);
|
||||||
return a.name.localeCompare(b.name);
|
return a.name.localeCompare(b.name);
|
||||||
});
|
});
|
||||||
|
|
||||||
tableBody.innerHTML = this.models.map(model => {
|
tableBody.innerHTML = this.models.map(model => {
|
||||||
const statusClass = model.enabled ? 'success' : 'secondary';
|
const statusClass = model.enabled ? 'success' : 'secondary';
|
||||||
const statusIcon = model.enabled ? 'check-circle' : 'ban';
|
const statusIcon = model.enabled ? 'check-circle' : 'ban';
|
||||||
@@ -99,6 +99,14 @@ class ModelsPage {
|
|||||||
<input type="number" id="model-completion-cost" value="${model.completion_cost}" step="0.01">
|
<input type="number" id="model-completion-cost" value="${model.completion_cost}" step="0.01">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-control">
|
||||||
|
<label for="model-cache-read-cost">Cache Read Cost (per 1M tokens)</label>
|
||||||
|
<input type="number" id="model-cache-read-cost" value="${model.cache_read_cost || 0}" step="0.01">
|
||||||
|
</div>
|
||||||
|
<div class="form-control">
|
||||||
|
<label for="model-cache-write-cost">Cache Write Cost (per 1M tokens)</label>
|
||||||
|
<input type="number" id="model-cache-write-cost" value="${model.cache_write_cost || 0}" step="0.01">
|
||||||
|
</div>
|
||||||
<div class="form-control">
|
<div class="form-control">
|
||||||
<label for="model-mapping">Internal Mapping (Optional)</label>
|
<label for="model-mapping">Internal Mapping (Optional)</label>
|
||||||
<input type="text" id="model-mapping" value="${model.mapping || ''}" placeholder="e.g. gpt-4o-2024-05-13">
|
<input type="text" id="model-mapping" value="${model.mapping || ''}" placeholder="e.g. gpt-4o-2024-05-13">
|
||||||
@@ -118,6 +126,8 @@ class ModelsPage {
|
|||||||
const enabled = modal.querySelector('#model-enabled').checked;
|
const enabled = modal.querySelector('#model-enabled').checked;
|
||||||
const promptCost = parseFloat(modal.querySelector('#model-prompt-cost').value);
|
const promptCost = parseFloat(modal.querySelector('#model-prompt-cost').value);
|
||||||
const completionCost = parseFloat(modal.querySelector('#model-completion-cost').value);
|
const completionCost = parseFloat(modal.querySelector('#model-completion-cost').value);
|
||||||
|
const cacheReadCost = parseFloat(modal.querySelector('#model-cache-read-cost').value);
|
||||||
|
const cacheWriteCost = parseFloat(modal.querySelector('#model-cache-write-cost').value);
|
||||||
const mapping = modal.querySelector('#model-mapping').value;
|
const mapping = modal.querySelector('#model-mapping').value;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -125,6 +135,8 @@ class ModelsPage {
|
|||||||
enabled,
|
enabled,
|
||||||
prompt_cost: promptCost,
|
prompt_cost: promptCost,
|
||||||
completion_cost: completionCost,
|
completion_cost: completionCost,
|
||||||
|
cache_read_cost: isNaN(cacheReadCost) ? null : cacheReadCost,
|
||||||
|
cache_write_cost: isNaN(cacheWriteCost) ? null : cacheWriteCost,
|
||||||
mapping: mapping || null
|
mapping: mapping || null
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user