feat: add OpenAI Responses API support (POST /v1/responses)
Add full Responses API endpoint alongside existing Chat Completions, with identical logging/tracking/cost pipeline. New: - internal/models/responses.go — request/response/stream types + ToUsage() bridge - internal/providers/openai_responses.go — OpenAI Responses/ResponsesStream Modified: - provider.go — Responses()+ResponsesStream() added to Provider interface - helpers.go — BuildOpenAIResponsesBody, parsers, SSE stream reader - circuit_breaker.go — CB wraps Responses, passthrough for stream - server.go — POST /v1/responses route + handleResponses handler - all non-OpenAI providers — stub methods with clear error messages Logging: ResponsesUsage.ToUsage() bridges to models.Usage, feeding same logRequest() -> DB insert -> dashboard WS -> client stats -> cost calc pipeline. No schema or logger changes needed.
This commit is contained in:
+112
-4
@@ -189,7 +189,7 @@ func (s *Server) setupRoutes() {
|
||||
v1.POST("/chat/completions", s.handleChatCompletions)
|
||||
v1.POST("/images/generations", s.handleImageGenerations)
|
||||
v1.GET("/models", s.handleListModels)
|
||||
v1.GET("/responses", s.handleListResponses)
|
||||
v1.POST("/responses", s.handleResponses)
|
||||
}
|
||||
|
||||
// Dashboard API Group
|
||||
@@ -246,9 +246,117 @@ func (s *Server) setupRoutes() {
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Server) handleListResponses(c *gin.Context) {
|
||||
// This is a placeholder for the /v1/responses endpoint
|
||||
c.JSON(http.StatusOK, gin.H{"data": []interface{}{}})
|
||||
func (s *Server) handleResponses(c *gin.Context) {
|
||||
startTime := time.Now()
|
||||
var req models.ResponsesRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Select provider based on model name
|
||||
providerName := "openai" // default for Responses API
|
||||
modelLower := strings.ToLower(req.Model)
|
||||
if strings.HasPrefix(modelLower, "gemini/") || strings.Contains(modelLower, "gemini") || strings.HasPrefix(modelLower, "google/") {
|
||||
providerName = "gemini"
|
||||
} else if strings.HasPrefix(modelLower, "deepseek/") || (strings.Contains(modelLower, "deepseek") && !strings.Contains(modelLower, "ollama")) {
|
||||
providerName = "deepseek"
|
||||
} else if strings.HasPrefix(modelLower, "moonshot/") || strings.Contains(modelLower, "kimi") || strings.Contains(modelLower, "moonshot") {
|
||||
providerName = "moonshot"
|
||||
} else if strings.HasPrefix(modelLower, "grok/") || strings.Contains(modelLower, "grok") {
|
||||
providerName = "grok"
|
||||
} else if strings.HasPrefix(modelLower, "ollama/") ||
|
||||
strings.Contains(modelLower, "glm-") ||
|
||||
strings.Contains(modelLower, "qwen") ||
|
||||
strings.Contains(modelLower, "gemma") ||
|
||||
strings.Contains(modelLower, "llama") ||
|
||||
strings.Contains(modelLower, "mistral") ||
|
||||
strings.Contains(modelLower, "phi") ||
|
||||
strings.Contains(modelLower, "yi") ||
|
||||
strings.Contains(modelLower, "codellama") ||
|
||||
strings.Contains(modelLower, "command-r") {
|
||||
providerName = "ollama"
|
||||
}
|
||||
|
||||
provider, ok := s.providers[providerName]
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Provider %s not enabled or supported", providerName)})
|
||||
return
|
||||
}
|
||||
|
||||
// Strip common prefixes from model name
|
||||
modelID := req.Model
|
||||
prefixes := []string{"gemini/", "google/", "openai/", "deepseek/", "moonshot/", "grok/", "ollama/"}
|
||||
for _, p := range prefixes {
|
||||
if strings.HasPrefix(modelID, p) {
|
||||
modelID = strings.TrimPrefix(modelID, p)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Use the stripped model name for the actual API call
|
||||
req.Model = modelID
|
||||
|
||||
clientID := "default"
|
||||
if auth, ok := c.Get("auth"); ok {
|
||||
if authInfo, ok := auth.(models.AuthInfo); ok {
|
||||
clientID = authInfo.ClientID
|
||||
}
|
||||
}
|
||||
|
||||
stream := req.Stream != nil && *req.Stream
|
||||
|
||||
if stream {
|
||||
ch, err := provider.ResponsesStream(c.Request.Context(), &req)
|
||||
if err != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, nil, err, false)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
|
||||
var lastUsage *models.ResponsesUsage
|
||||
c.Stream(func(w io.Writer) bool {
|
||||
chunk, ok := <-ch
|
||||
if !ok {
|
||||
fmt.Fprintf(w, "data: [DONE]\n\n")
|
||||
if lastUsage != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, lastUsage.ToUsage(), nil, false)
|
||||
} else {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, nil, nil, false)
|
||||
}
|
||||
return false
|
||||
}
|
||||
// Capture usage from the response payload in streaming chunks
|
||||
if chunk.Response != nil && chunk.Response.Usage != nil {
|
||||
lastUsage = chunk.Response.Usage
|
||||
}
|
||||
data, err := json.Marshal(chunk)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
fmt.Fprintf(w, "data: %s\n\n", data)
|
||||
return true
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := provider.Responses(c.Request.Context(), &req)
|
||||
if err != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, nil, err, false)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if resp.Usage != nil {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, resp.Usage.ToUsage(), nil, false)
|
||||
} else {
|
||||
s.logRequest(startTime, clientID, providerName, req.Model, nil, nil, false)
|
||||
}
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func (s *Server) handleListModels(c *gin.Context) {
|
||||
|
||||
Reference in New Issue
Block a user