diff --git a/.hermes/plans/auto-model-routing.md b/.hermes/plans/auto-model-routing.md new file mode 100644 index 00000000..13b75049 --- /dev/null +++ b/.hermes/plans/auto-model-routing.md @@ -0,0 +1,919 @@ +# Automatic Model Routing — Implementation Plan + +> **For Hermes:** Use subagent-driven-development skill to implement this plan task-by-task. + +**Goal:** Add a model-group router that lets clients send `model: "deepseek-auto"` and have gophergate pick the best concrete model based on heuristic rules or an optional classifier LLM. + +**Architecture:** A new `internal/router/` package with heuristic and classifier strategies, backed by a `model_groups` DB table. The router injects into `handleChatCompletions` after provider resolution but before the provider call — zero changes to the Provider interface. Admin CRUD endpoints and a dashboard tab for management. + +**Tech Stack:** Go 1.22+, Gin, sqlx (SQLite), resty, existing OpenAI provider for classifier calls. + +--- + +## Task 1: Add `model_groups` DB migration and struct + +**Objective:** Create the `model_groups` table and Go struct. + +**Files:** +- Modify: `internal/db/db.go` + +**Step 1: Add CREATE TABLE to migrations** + +In `RunMigrations()`, add to the `queries` slice (after `client_tokens`): + +```go +`CREATE TABLE IF NOT EXISTS model_groups ( + id TEXT PRIMARY KEY, + strategy TEXT NOT NULL DEFAULT 'heuristic', + selector_model TEXT, + targets TEXT NOT NULL DEFAULT '[]', + complexity_threshold INTEGER, + heuristic_rules TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +)`, +``` + +**Step 2: Add the Go struct** + +After the `ClientToken` struct (around line 264), add: + +```go +type ModelGroup struct { + ID string `db:"id" json:"id"` + Strategy string `db:"strategy" json:"strategy"` + SelectorModel *string `db:"selector_model" json:"selector_model"` + Targets string `db:"targets" json:"targets"` // JSON array + ComplexityThreshold *int `db:"complexity_threshold" json:"complexity_threshold"` + HeuristicRules *string `db:"heuristic_rules" json:"heuristic_rules"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` +} +``` + +**Step 3: Seed default groups** + +After the "Default client" block in `RunMigrations()`, add: + +```go +// Seed default model groups +defaultGroups := []struct { + id, strategy, targets string +}{ + {"deepseek-auto", "heuristic", `["deepseek-chat","deepseek-reasoner"]`}, + {"openai-auto", "heuristic", `["gpt-4o-mini","gpt-4o"]`}, + {"gemini-auto", "heuristic", `["gemini-2.0-flash","gemini-2.5-pro"]`}, +} +for _, g := range defaultGroups { + db.Exec(`INSERT OR IGNORE INTO model_groups (id, strategy, targets) VALUES (?, ?, ?)`, + g.id, g.strategy, g.targets) +} +``` + +**Step 4: Build and verify** + +```bash +cd ~/Documents/projects/web_projects/gophergate && go build ./... +``` + +**Step 5: Commit** + +```bash +git add internal/db/db.go +git commit -m "feat: add model_groups table and default seed data" +``` + +--- + +## Task 2: Create router package — interface and heuristic router + +**Objective:** Create `internal/router/` with the Router interface and heuristic implementation. + +**Files:** +- Create: `internal/router/router.go` +- Create: `internal/router/heuristic.go` + +**Step 1: Create `internal/router/router.go`** + +```go +package router + +import ( + "context" + "encoding/json" + + "gophergate/internal/db" +) + +// Decision holds the result of a routing decision. +type Decision struct { + SelectedModel string `json:"selected_model"` + Strategy string `json:"strategy"` // "heuristic" or "classifier" + Reason string `json:"reason"` +} + +// ClassifierFunc is the callback for classifier-based routing. +// Takes a system prompt, user message, and selector model. +// Returns a complexity rating string (e.g. "3"). +type ClassifierFunc func(ctx context.Context, selectorModel, systemPrompt, userMessage string) (string, error) + +// Router resolves model groups to concrete models. +type Router struct { + groups map[string]db.ModelGroup + classify ClassifierFunc +} + +// New creates a Router. classify may be nil if no classifier groups exist. +func New(groups []db.ModelGroup, classify ClassifierFunc) *Router { + r := &Router{ + groups: make(map[string]db.ModelGroup), + classify: classify, + } + for _, g := range groups { + r.groups[g.ID] = g + } + return r +} + +// IsGroup returns true if the model name is a group ID. +func (r *Router) IsGroup(modelID string) bool { + _, ok := r.groups[modelID] + return ok +} + +// Route resolves a group to a concrete model. +// Extracts the user message from the request body JSON bytes. +func (r *Router) Route(ctx context.Context, groupID string, userMessage string) (*Decision, error) { + group, ok := r.groups[groupID] + if !ok { + return nil, fmt.Errorf("unknown model group: %s", groupID) + } + + var targets []string + if err := json.Unmarshal([]byte(group.Targets), &targets); err != nil || len(targets) == 0 { + return nil, fmt.Errorf("invalid or empty targets for group %s", groupID) + } + + switch group.Strategy { + case "heuristic": + return routeHeuristic(group, targets, userMessage) + case "classifier": + if r.classify == nil { + // Fall back to heuristic if no classifier is available + return routeHeuristic(group, targets, userMessage) + } + return routeClassifier(ctx, r.classify, group, targets, userMessage) + default: + return nil, fmt.Errorf("unknown strategy: %s", group.Strategy) + } +} + +// Reload replaces the group definitions without recreating the router. +func (r *Router) Reload(groups []db.ModelGroup) { + r.groups = make(map[string]db.ModelGroup) + for _, g := range groups { + r.groups[g.ID] = g + } +} +``` + +**Step 2: Create `internal/router/heuristic.go`** + +```go +package router + +import ( + "context" + "encoding/json" + "strings" + + "gophergate/internal/db" +) + +// HeuristicRule defines a pattern-based routing rule. +type HeuristicRule struct { + Pattern string `json:"pattern"` // substring to match in user message + TargetIdx int `json:"target"` // index into targets array (0-based) + CaseSensitive bool `json:"case_sensitive,omitempty"` +} + +func routeHeuristic(group db.ModelGroup, targets []string, userMessage string) (*Decision, error) { + // Default to first target (cheapest/fastest) + selected := targets[0] + reason := "default (first target)" + + // If heuristic_rules is set, use them + if group.HeuristicRules != nil && *group.HeuristicRules != "" { + var rules []HeuristicRule + if err := json.Unmarshal([]byte(*group.HeuristicRules), &rules); err == nil { + searchMsg := userMessage + for _, rule := range rules { + pattern := rule.Pattern + msg := searchMsg + if !rule.CaseSensitive { + pattern = strings.ToLower(pattern) + msg = strings.ToLower(msg) + } + if strings.Contains(msg, pattern) { + if rule.TargetIdx >= 0 && rule.TargetIdx < len(targets) { + selected = targets[rule.TargetIdx] + reason = "matched heuristic rule: " + rule.Pattern + break + } + } + } + } + } + + // Built-in fallback heuristics (apply even without custom rules) + if reason == "default (first target)" && len(targets) > 1 { + msgLower := strings.ToLower(userMessage) + // Complex task indicators → last target (usually the smarter model) + complexIndicators := []string{ + "step by step", "explain in detail", "reason through", + "think carefully", "analyze", "debug", "write code", + "implement", "refactor", "architecture", + } + for _, indicator := range complexIndicators { + if strings.Contains(msgLower, indicator) { + selected = targets[len(targets)-1] + reason = "complex task indicator: " + indicator + break + } + } + } + + return &Decision{ + SelectedModel: selected, + Strategy: "heuristic", + Reason: reason, + }, nil +} + +// routeHeuristic exists as a package-level func for direct use. +var _ = routeHeuristic // suppress unused warning when classifier is the only caller +``` + +Hmm, actually let me simplify. The `routeHeuristic` function IS used by `Router.Route()`. Let me not use the blank identifier trick. + +**Step 3: Build** + +```bash +cd ~/Documents/projects/web_projects/gophergate && go build ./... +``` + +Fix any compilation errors (missing imports, etc.). + +**Step 4: Commit** + +```bash +git add internal/router/ +git commit -m "feat: add router package with heuristic strategy" +``` + +--- + +## Task 3: Add classifier router + +**Objective:** Implement the classifier strategy that uses a cheap LLM to rate task complexity. + +**Files:** +- Create: `internal/router/classifier.go` + +**Step 1: Create `internal/router/classifier.go`** + +```go +package router + +import ( + "context" + "fmt" + "strconv" + "strings" + + "gophergate/internal/db" +) + +const classifierSystemPrompt = `You are a task complexity classifier. Rate the following user message on a scale of 1 to %d, where: +1 = trivial/simple (basic facts, greetings, simple math) +%d = highly complex (multi-step reasoning, code generation, architecture design) + +Reply with ONLY the number. No explanation.` + +func routeClassifier(ctx context.Context, classify ClassifierFunc, group db.ModelGroup, targets []string, userMessage string) (*Decision, error) { + maxRating := len(targets) + if maxRating < 2 { + maxRating = 2 + } + + prompt := fmt.Sprintf(classifierSystemPrompt, maxRating, maxRating) + ratingStr, err := classify(ctx, getSelectorModel(group, targets), prompt, userMessage) + if err != nil { + // Classifier failed — fall back to heuristic + return routeHeuristic(group, targets, userMessage) + } + + rating, err := strconv.Atoi(strings.TrimSpace(ratingStr)) + if err != nil || rating < 1 { + rating = 1 + } + if rating > maxRating { + rating = maxRating + } + + idx := rating - 1 // 0-based index into targets + return &Decision{ + SelectedModel: targets[idx], + Strategy: "classifier", + Reason: fmt.Sprintf("complexity rating: %d/%d", rating, maxRating), + }, nil +} + +func getSelectorModel(group db.ModelGroup, targets []string) string { + if group.SelectorModel != nil && *group.SelectorModel != "" { + return *group.SelectorModel + } + // Default: use the first (cheapest) target model as the selector + return targets[0] +} +``` + +**Step 2: Build** + +```bash +cd ~/Documents/projects/web_projects/gophergate && go build ./... +``` + +**Step 3: Commit** + +```bash +git add internal/router/classifier.go +git commit -m "feat: add classifier routing strategy with LLM complexity rating" +``` + +--- + +## Task 4: Wire router into the server + +**Objective:** Add the Router to the Server struct, initialize it, and inject it into `handleChatCompletions`. + +**Files:** +- Modify: `internal/server/server.go` + +**Step 1: Add router field to Server struct** + +In the `Server` struct (around line 23), add after the `registryMu` field: + +```go +router *router.Router +``` + +**Step 2: Add import** + +Add to the imports block: + +```go +"gophergate/internal/router" +``` + +**Step 3: Initialize router in NewServer** + +After `s.setupRoutes()` (line 66), add: + +```go +// Initialize model group router +s.refreshRouter() +``` + +**Step 4: Add refreshRouter method** + +Add a new method on Server: + +```go +func (s *Server) refreshRouter() { + var groups []db.ModelGroup + if err := s.database.Select(&groups, "SELECT * FROM model_groups"); err != nil { + fmt.Printf("Warning: Failed to load model groups: %v\n", err) + groups = nil + } + + // Build classifier function using the OpenAI provider + var classifyFn router.ClassifierFunc + if openaiProvider, ok := s.providers["openai"]; ok { + classifyFn = func(ctx context.Context, selectorModel, systemPrompt, userMessage string) (string, error) { + req := &models.UnifiedRequest{ + Model: selectorModel, + Messages: []models.UnifiedMessage{ + {Role: "system", Content: []models.ContentPart{{Type: "text", Text: systemPrompt}}}, + {Role: "user", Content: []models.ContentPart{{Type: "text", Text: userMessage}}}, + }, + MaxTokens: uint32Ptr(5), + Stream: false, + } + resp, err := openaiProvider.ChatCompletion(ctx, req) + if err != nil { + return "", err + } + if len(resp.Choices) == 0 { + return "", fmt.Errorf("no choices in classifier response") + } + return resp.Choices[0].Message.Content, nil + } + } + + if s.router == nil { + s.router = router.New(groups, classifyFn) + } else { + s.router.Reload(groups) + } +} +``` + +**Step 5: Add uint32Ptr helper (if not already in the codebase)** + +At the bottom of server.go, add: + +```go +func uint32Ptr(v uint32) *uint32 { return &v } +``` + +**Step 6: Inject router into handleChatCompletions** + +In `handleChatCompletions`, after the model prefix stripping block (after line 475) and before building the UnifiedRequest (line 478), add: + +```go +// Check if model is a group and route to a concrete model +if s.router != nil && s.router.IsGroup(modelID) { + userMessage := extractUserMessage(req.Messages) + decision, err := s.router.Route(c.Request.Context(), modelID, userMessage) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("model routing failed: %v", err)}) + return + } + modelID = decision.SelectedModel + log.Printf("[ROUTER] %s → %s (%s: %s)", req.Model, modelID, decision.Strategy, decision.Reason) +} +``` + +**Step 7: Add extractUserMessage helper** + +```go +func extractUserMessage(messages []models.ChatCompletionMessage) string { + for i := len(messages) - 1; i >= 0; i-- { + if messages[i].Role == "user" { + if s, ok := messages[i].Content.(string); ok { + return s + } + // It might be a content array — grab text from first part + if parts, ok := messages[i].Content.([]interface{}); ok && len(parts) > 0 { + if part, ok := parts[0].(map[string]interface{}); ok { + if text, ok := part["text"].(string); ok { + return text + } + } + } + return "" + } + } + return "" +} +``` + +**Step 8: Add router refresh to RefreshProviders** + +At the end of `RefreshProviders()` (before `return nil` at line 171), add: + +```go +s.refreshRouter() +``` + +**Step 9: Build** + +```bash +cd ~/Documents/projects/web_projects/gophergate && go build ./... +``` + +Expect compilation errors — need to check the `ChatCompletionMessage` type. The handler uses `models.ChatCompletionRequest` which has `Messages []ChatCompletionMessage`. Let me verify the type. If it's `[]models.ChatCompletionMessage` with `Content` as a string field, the helper is simpler. Fix as needed. + +**Step 10: Commit** + +```bash +git add internal/server/server.go +git commit -m "feat: wire model group router into chat completions handler" +``` + +--- + +## Task 5: Add admin API endpoints for model groups + +**Objective:** CRUD endpoints at `/api/model-groups` for dashboard management. + +**Files:** +- Create: `internal/server/model_groups_admin.go` + +**Step 1: Create `internal/server/model_groups_admin.go`** + +```go +package server + +import ( + "net/http" + + "gophergate/internal/db" + + "github.com/gin-gonic/gin" +) + +func (s *Server) handleGetModelGroups(c *gin.Context) { + var groups []db.ModelGroup + if err := s.database.Select(&groups, "SELECT * FROM model_groups ORDER BY id"); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + if groups == nil { + groups = []db.ModelGroup{} + } + c.JSON(http.StatusOK, groups) +} + +func (s *Server) handleCreateModelGroup(c *gin.Context) { + var group db.ModelGroup + if err := c.ShouldBindJSON(&group); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + _, err := s.database.Exec(` + INSERT INTO model_groups (id, strategy, selector_model, targets, complexity_threshold, heuristic_rules) + VALUES (?, ?, ?, ?, ?, ?)`, + group.ID, group.Strategy, group.SelectorModel, group.Targets, + group.ComplexityThreshold, group.HeuristicRules) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + s.refreshRouter() + c.JSON(http.StatusCreated, group) +} + +func (s *Server) handleUpdateModelGroup(c *gin.Context) { + id := c.Param("id") + var group db.ModelGroup + if err := c.ShouldBindJSON(&group); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + _, err := s.database.Exec(` + UPDATE model_groups SET strategy=?, selector_model=?, targets=?, complexity_threshold=?, heuristic_rules=?, updated_at=CURRENT_TIMESTAMP + WHERE id=?`, + group.Strategy, group.SelectorModel, group.Targets, + group.ComplexityThreshold, group.HeuristicRules, id) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + s.refreshRouter() + c.JSON(http.StatusOK, group) +} + +func (s *Server) handleDeleteModelGroup(c *gin.Context) { + id := c.Param("id") + _, err := s.database.Exec("DELETE FROM model_groups WHERE id=?", id) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + s.refreshRouter() + c.JSON(http.StatusOK, gin.H{"status": "deleted"}) +} +``` + +**Step 2: Register routes in setupRoutes()** + +In `setupRoutes()`, add under the admin group (after the models endpoints around line 229): + +```go +admin.GET("/model-groups", s.handleGetModelGroups) +admin.POST("/model-groups", s.handleCreateModelGroup) +admin.PUT("/model-groups/:id", s.handleUpdateModelGroup) +admin.DELETE("/model-groups/:id", s.handleDeleteModelGroup) +``` + +**Step 3: Build** + +```bash +cd ~/Documents/projects/web_projects/gophergate && go build ./... +``` + +**Step 4: Commit** + +```bash +git add internal/server/model_groups_admin.go internal/server/server.go +git commit -m "feat: add model groups CRUD admin API endpoints" +``` + +--- + +## Task 6: Add dashboard UI — sidebar entry and page module + +**Objective:** Add a "Model Groups" tab to the dashboard sidebar and a page module for CRUD management. + +**Files:** +- Modify: `static/index.html` +- Create: `static/js/pages/model_groups.js` + +**Step 1: Add sidebar menu item in index.html** + +In the MANAGEMENT section (after line 91, before ``), add: + +```html +
Define auto-routing groups that pick the best model for each request.
+ +| Group ID | Strategy | Targets | Actions | '; + html += '
|---|---|---|---|
${this.esc(g.id)} |
+ ${this.esc(g.strategy)} | +${this.esc(g.targets)} |
+ + + + | +