feat: add OpenAI Responses API support (POST /v1/responses)
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

Add full Responses API endpoint alongside existing Chat Completions,
with identical logging/tracking/cost pipeline.

New:
- internal/models/responses.go — request/response/stream types + ToUsage() bridge
- internal/providers/openai_responses.go — OpenAI Responses/ResponsesStream

Modified:
- provider.go — Responses()+ResponsesStream() added to Provider interface
- helpers.go — BuildOpenAIResponsesBody, parsers, SSE stream reader
- circuit_breaker.go — CB wraps Responses, passthrough for stream
- server.go — POST /v1/responses route + handleResponses handler
- all non-OpenAI providers — stub methods with clear error messages

Logging: ResponsesUsage.ToUsage() bridges to models.Usage, feeding same
logRequest() -> DB insert -> dashboard WS -> client stats -> cost calc
pipeline. No schema or logger changes needed.
This commit is contained in:
2026-05-02 16:38:17 -04:00
parent eb67287b56
commit e5ef39f327
11 changed files with 507 additions and 4 deletions
+141
View File
@@ -0,0 +1,141 @@
package models
import "encoding/json"
// Responses API request types
// ResponsesRequest maps to POST /v1/responses body (OpenAI Responses API format).
// The `input` field can be a string or an array of message objects.
type ResponsesRequest struct {
Model string `json:"model"`
Input json.RawMessage `json:"input"` // string or []ResponseInputMessage
Instructions string `json:"instructions,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
MaxOutputTokens *uint32 `json:"max_output_tokens,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
Stream *bool `json:"stream,omitempty"`
Tools json.RawMessage `json:"tools,omitempty"`
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
Store *bool `json:"store,omitempty"`
}
// ResponseInputMessage represents a single message in the input array.
type ResponseInputMessage struct {
Role string `json:"role"`
Content json.RawMessage `json:"content"` // string or []ContentPart
}
// Responses API response types
// ResponsesResponse maps to OpenAI /v1/responses response.
type ResponsesResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Model string `json:"model"`
Output []ResponsesOutputItem `json:"output"`
Usage *ResponsesUsage `json:"usage,omitempty"`
}
// ResponsesOutputItem represents an item in the output array.
// For messages: type="message", role, content[].
// For function calls: type="function_call", id, name, arguments, status.
type ResponsesOutputItem struct {
Type string `json:"type"`
Role string `json:"role,omitempty"`
Content []ResponsesOutputContent `json:"content,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Arguments string `json:"arguments,omitempty"`
Status string `json:"status,omitempty"`
}
// ResponsesOutputContent represents content parts within an output message.
type ResponsesOutputContent struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
Annotations []json.RawMessage `json:"annotations,omitempty"`
}
// ResponsesUsage maps to the usage block in Responses API.
type ResponsesUsage struct {
InputTokens uint32 `json:"input_tokens"`
OutputTokens uint32 `json:"output_tokens"`
TotalTokens uint32 `json:"total_tokens"`
InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"`
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
}
// ResponsesInputTokensDetails maps input token details.
type ResponsesInputTokensDetails struct {
CachedTokens uint32 `json:"cached_tokens"`
}
// ResponsesOutputTokensDetails maps output token details.
type ResponsesOutputTokensDetails struct {
ReasoningTokens uint32 `json:"reasoning_tokens"`
}
// ToUsage converts ResponsesUsage to the unified Usage model.
func (u *ResponsesUsage) ToUsage() *Usage {
usage := &Usage{
PromptTokens: u.InputTokens,
CompletionTokens: u.OutputTokens,
TotalTokens: u.TotalTokens,
}
if u.InputTokensDetails != nil && u.InputTokensDetails.CachedTokens > 0 {
usage.CacheReadTokens = &u.InputTokensDetails.CachedTokens
}
if u.OutputTokensDetails != nil && u.OutputTokensDetails.ReasoningTokens > 0 {
usage.ReasoningTokens = &u.OutputTokensDetails.ReasoningTokens
}
return usage
}
// ResponsesStreamChunk represents an SSE chunk from the Responses streaming endpoint.
type ResponsesStreamChunk struct {
Type string `json:"type"`
Response *ResponsesStreamPayload `json:"response,omitempty"`
Item *ResponsesStreamPayloadItem `json:"item,omitempty"`
Delta *ResponsesStreamDelta `json:"delta,omitempty"`
}
// ResponsesStreamPayload represents the "response" field in some SSE chunks.
type ResponsesStreamPayload struct {
Object string `json:"object"`
ID string `json:"id"`
Model string `json:"model"`
Usage *ResponsesUsage `json:"usage,omitempty"`
}
// ResponsesStreamPayloadItem represents the "item" field in SSE chunks.
type ResponsesStreamPayloadItem struct {
Type string `json:"type"`
Role string `json:"role,omitempty"`
Content []ResponsesOutputContent `json:"content,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Status string `json:"status,omitempty"`
}
// ResponsesStreamDelta represents a content delta in streaming.
type ResponsesStreamDelta struct {
ContentIndex int `json:"content_index"`
Type string `json:"type"`
Text string `json:"text,omitempty"`
}
// UnifiedResponsesRequest is the internal unified format for Responses API.
type UnifiedResponsesRequest struct {
ClientID string
Model string
Input string // normalized input text
InputMessages []ResponseInputMessage // structured input messages (if provided as array)
Instructions string
Temperature *float64
MaxOutputTokens *uint32
TopP *float64
Stream bool
Tools json.RawMessage
ToolChoice json.RawMessage
Store bool
}