feat: Phase 2 - reliability & observability
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

- Circuit breaker: proper thresholds (3 failures, 30s timeout)
- HTTP timeouts: 30s on all providers (was no timeout)
- Structured logging: slog replaces fmt.Printf throughout
- Stream errors: propagated as SSE error events to client
- Registry fetch: retry with backoff (3 attempts)
- Registry reads in dashboard protected by RWMutex
This commit is contained in:
2026-04-26 14:48:41 -04:00
parent 8a8d8d1477
commit 1f574d8134
15 changed files with 126 additions and 608 deletions
+6
View File
@@ -14,6 +14,7 @@ import (
"gophergate/internal/db"
"gophergate/internal/models"
"gophergate/internal/utils"
"log/slog"
"github.com/shirou/gopsutil/v3/cpu"
"github.com/shirou/gopsutil/v3/disk"
@@ -879,6 +880,7 @@ func (s *Server) handleGetProviders(c *gin.Context) {
// Get models for this provider from registry
var models []string
s.registryMu.RLock()
if s.registry != nil {
registryID := id
if id == "gemini" {
@@ -897,6 +899,7 @@ func (s *Server) handleGetProviders(c *gin.Context) {
}
}
}
s.registryMu.RUnlock()
// If it's ollama, also include models from config
if id == "ollama" {
@@ -1060,6 +1063,7 @@ func (s *Server) handleGetModels(c *gin.Context) {
}
var result []gin.H
s.registryMu.RLock()
if s.registry != nil {
for pID, pInfo := range s.registry.Providers {
proxyProvider, allowed := allowedRegistryProviders[pID]
@@ -1210,6 +1214,7 @@ func (s *Server) handleUpdateModel(c *gin.Context) {
// Find provider for this model
providerID := "unknown"
s.registryMu.RLock()
if s.registry != nil {
for pID, pInfo := range s.registry.Providers {
if _, ok := pInfo.Models[id]; ok {
@@ -1388,6 +1393,7 @@ func (s *Server) handleSystemMetrics(c *gin.Context) {
func (s *Server) handleGetSettings(c *gin.Context) {
providerCount := 0
modelCount := 0
s.registryMu.RLock()
if s.registry != nil {
providerCount = len(s.registry.Providers)
for _, p := range s.registry.Providers {