debug: add max_tokens trace logging to chat completions handler

Logs what max_tokens the client sends, whether gophergate injects one from the registry, and the final value forwarded to the provider. Helps trace output truncation issues.
2026-04-30 10:04:50 -04:00
parent 79571c6bdc
commit 4aa17b4fd2
2 changed files with 73 additions and 16 deletions
@@ -0,0 +1,45 @@
+package main
+
+import (
+	"fmt"
+	"github.com/jmoiron/sqlx"
+	_ "modernc.org/sqlite"
+)
+
+type MyNullTime struct {
+	Time  interface{}
+	Type  string
+}
+
+func (n *MyNullTime) Scan(value interface{}) error {
+	n.Time = value
+	n.Type = fmt.Sprintf("%T", value)
+	return nil
+}
+
+func main() {
+	db, err := sqlx.Connect("sqlite", "/home/newkirk/Documents/projects/web_projects/gophergate/data/backups/llm_proxy.db.20260303T205057Z")
+	if err != nil {
+		fmt.Println("connect err:", err)
+		return
+	}
+	defer db.Close()
+
+	// Test 1: Direct column scan type
+	var d MyNullTime
+	db.Get(&d, "SELECT last_used_at FROM client_tokens WHERE client_id = ? LIMIT 1", "sk-opencode")
+	fmt.Printf("direct SELECT: GoType=%s value=%v\n", d.Type, d.Time)
+
+	// Test 2: MAX aggregate scan type
+	var m MyNullTime
+	db.Get(&m, "SELECT MAX(last_used_at) FROM client_tokens WHERE client_id = ?", "sk-opencode")
+	fmt.Printf("MAX SELECT:    GoType=%s value=%v\n", m.Type, m.Time)
+
+	// Test 3: peek at the raw driver types
+	row := db.QueryRow("SELECT last_used_at, MAX(last_used_at) FROM client_tokens WHERE client_id = ? LIMIT 1", "sk-opencode")
+	var a, b interface{}
+	row.Scan(&a, &b)
+	fmt.Printf("\nRaw Scan:\n")
+	fmt.Printf("  last_used_at:     type=%T val=%v\n", a, a)
+	fmt.Printf("  MAX(last_used_at): type=%T val=%v\n", b, b)
+}
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"log"
 	"net/http"
 	"strings"
 	"sync"
@@ -373,24 +374,35 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
 		TopP:             req.TopP,
 		TopK:             req.TopK,
 		N:                req.N,
-		MaxTokens:        req.MaxTokens,
-		PresencePenalty:  req.PresencePenalty,
-		FrequencyPenalty: req.FrequencyPenalty,
-		Stream:           req.Stream != nil && *req.Stream,
-		Tools:            req.Tools,
-		ToolChoice:       req.ToolChoice,
-	}
+	MaxTokens:        req.MaxTokens,
+	PresencePenalty:  req.PresencePenalty,
+	FrequencyPenalty: req.FrequencyPenalty,
+	Stream:           req.Stream != nil && *req.Stream,
+	Tools:            req.Tools,
+	ToolChoice:       req.ToolChoice,
+}

-	// Inject max_tokens from model registry when client doesn't specify one.
-	// Prevents providers from applying a low default output cap.
-	if unifiedReq.MaxTokens == nil {
-		s.registryMu.RLock()
-		meta := s.registry.FindModel(modelID)
-		s.registryMu.RUnlock()
-		if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
-			unifiedReq.MaxTokens = &meta.Limit.Output
-		}
+// Inject max_tokens from model registry when client doesn't specify one.
+// Prevents providers from applying a low default output cap.
+// DEBUG: Trace max_tokens through the proxy
+clientMaxTokens := "nil"
+if unifiedReq.MaxTokens != nil {
+	clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
+}
+log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
+if unifiedReq.MaxTokens == nil {
+	s.registryMu.RLock()
+	meta := s.registry.FindModel(modelID)
+	s.registryMu.RUnlock()
+	if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
+		unifiedReq.MaxTokens = &meta.Limit.Output
+		log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
+	} else {
+		log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
 	}
+} else {
+	log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
+}

 	// Handle Stop sequences
 	if req.Stop != nil {