debug: add max_tokens trace logging to chat completions handler
Logs what max_tokens the client sends, whether gophergate injects one from the registry, and the final value forwarded to the provider. Helps trace output truncation issues.
This commit is contained in:
@@ -0,0 +1,45 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
_ "modernc.org/sqlite"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MyNullTime struct {
|
||||||
|
Time interface{}
|
||||||
|
Type string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *MyNullTime) Scan(value interface{}) error {
|
||||||
|
n.Time = value
|
||||||
|
n.Type = fmt.Sprintf("%T", value)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
db, err := sqlx.Connect("sqlite", "/home/newkirk/Documents/projects/web_projects/gophergate/data/backups/llm_proxy.db.20260303T205057Z")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("connect err:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
|
||||||
|
// Test 1: Direct column scan type
|
||||||
|
var d MyNullTime
|
||||||
|
db.Get(&d, "SELECT last_used_at FROM client_tokens WHERE client_id = ? LIMIT 1", "sk-opencode")
|
||||||
|
fmt.Printf("direct SELECT: GoType=%s value=%v\n", d.Type, d.Time)
|
||||||
|
|
||||||
|
// Test 2: MAX aggregate scan type
|
||||||
|
var m MyNullTime
|
||||||
|
db.Get(&m, "SELECT MAX(last_used_at) FROM client_tokens WHERE client_id = ?", "sk-opencode")
|
||||||
|
fmt.Printf("MAX SELECT: GoType=%s value=%v\n", m.Type, m.Time)
|
||||||
|
|
||||||
|
// Test 3: peek at the raw driver types
|
||||||
|
row := db.QueryRow("SELECT last_used_at, MAX(last_used_at) FROM client_tokens WHERE client_id = ? LIMIT 1", "sk-opencode")
|
||||||
|
var a, b interface{}
|
||||||
|
row.Scan(&a, &b)
|
||||||
|
fmt.Printf("\nRaw Scan:\n")
|
||||||
|
fmt.Printf(" last_used_at: type=%T val=%v\n", a, a)
|
||||||
|
fmt.Printf(" MAX(last_used_at): type=%T val=%v\n", b, b)
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -383,13 +384,24 @@ func (s *Server) handleChatCompletions(c *gin.Context) {
|
|||||||
|
|
||||||
// Inject max_tokens from model registry when client doesn't specify one.
|
// Inject max_tokens from model registry when client doesn't specify one.
|
||||||
// Prevents providers from applying a low default output cap.
|
// Prevents providers from applying a low default output cap.
|
||||||
|
// DEBUG: Trace max_tokens through the proxy
|
||||||
|
clientMaxTokens := "nil"
|
||||||
|
if unifiedReq.MaxTokens != nil {
|
||||||
|
clientMaxTokens = fmt.Sprintf("%d", *unifiedReq.MaxTokens)
|
||||||
|
}
|
||||||
|
log.Printf("[DEBUG] %s: client max_tokens=%s", modelID, clientMaxTokens)
|
||||||
if unifiedReq.MaxTokens == nil {
|
if unifiedReq.MaxTokens == nil {
|
||||||
s.registryMu.RLock()
|
s.registryMu.RLock()
|
||||||
meta := s.registry.FindModel(modelID)
|
meta := s.registry.FindModel(modelID)
|
||||||
s.registryMu.RUnlock()
|
s.registryMu.RUnlock()
|
||||||
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
if meta != nil && meta.Limit != nil && meta.Limit.Output > 0 {
|
||||||
unifiedReq.MaxTokens = &meta.Limit.Output
|
unifiedReq.MaxTokens = &meta.Limit.Output
|
||||||
|
log.Printf("[DEBUG] %s: injected registry max_tokens=%d", modelID, meta.Limit.Output)
|
||||||
|
} else {
|
||||||
|
log.Printf("[DEBUG] %s: no registry limit found, leaving max_tokens nil (provider default)", modelID)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
log.Printf("[DEBUG] %s: using client's max_tokens=%d", modelID, *unifiedReq.MaxTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle Stop sequences
|
// Handle Stop sequences
|
||||||
|
|||||||
Reference in New Issue
Block a user