feat: add image generation for OpenAI DALL-E and Gemini Imagen
New `/v1/images/generations` endpoint proxies DALL-E 2/3 (OpenAI)
and Imagen 3 (Gemini). Same auth/logging as chat completions.
- Add ImageGenerationRequest/Response models
- Extend Provider interface with ImageGeneration()
- OpenAI: forward to /v1/images/generations
- Gemini: call /v1beta/models/{model}:predict, map OpenAI params
- Circuit breaker wraps image gen like chat completions
- Model routing: dall-e* -> openai, imagen*/gemini* -> gemini
- Unsupported providers (deepseek/moonshot/grok/ollama) return error
- Fix pre-existing CachedContentTokenCount bug in StreamGemini
This commit is contained in:
@@ -75,6 +75,116 @@ type GeminiFunctionResponse struct {
|
||||
Response json.RawMessage `json:"response"`
|
||||
}
|
||||
|
||||
func (p *GeminiProvider) ImageGeneration(ctx context.Context, req *models.ImageGenerationRequest) (*models.ImageGenerationResponse, error) {
|
||||
// Gemini Imagen API: POST https://generativelanguage.googleapis.com/v1beta/models/{model}:predict
|
||||
// Map OpenAI-style params to Gemini Imagen params
|
||||
|
||||
n := uint32(1)
|
||||
if req.N != nil && *req.N > 0 {
|
||||
n = *req.N
|
||||
}
|
||||
|
||||
aspectRatio := "1:1"
|
||||
if req.Size != nil {
|
||||
aspectRatio = sizeToGeminiAspectRatio(*req.Size)
|
||||
}
|
||||
|
||||
// Build Imagen request
|
||||
imagenReq := map[string]interface{}{
|
||||
"instances": []map[string]interface{}{
|
||||
{"prompt": req.Prompt},
|
||||
},
|
||||
"parameters": map[string]interface{}{
|
||||
"sampleCount": n,
|
||||
"aspectRatio": aspectRatio,
|
||||
},
|
||||
}
|
||||
|
||||
// Model defaults to imagen-3.0-generate-001 if empty
|
||||
model := req.Model
|
||||
if model == "" {
|
||||
model = "imagen-3.0-generate-001"
|
||||
}
|
||||
|
||||
// Use v1beta for Imagen
|
||||
baseURL := p.config.BaseURL
|
||||
if !strings.Contains(baseURL, "v1beta") {
|
||||
baseURL = strings.Replace(baseURL, "/v1", "/v1beta", 1)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/models/%s:predict?key=%s", baseURL, model, p.apiKey)
|
||||
|
||||
resp, err := p.client.R().
|
||||
SetContext(ctx).
|
||||
SetHeader("Content-Type", "application/json").
|
||||
SetBody(imagenReq).
|
||||
Post(url)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gemini imagen request failed: %w", err)
|
||||
}
|
||||
|
||||
if !resp.IsSuccess() {
|
||||
return nil, fmt.Errorf("Gemini Imagen API error (%d): %s", resp.StatusCode(), resp.String())
|
||||
}
|
||||
|
||||
// Parse Imagen response
|
||||
var imagenResp struct {
|
||||
Predictions []struct {
|
||||
MimeType string `json:"mimeType"`
|
||||
BytesBase64Encoded string `json:"bytesBase64Encoded"`
|
||||
} `json:"predictions"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body(), &imagenResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse Imagen response: %w", err)
|
||||
}
|
||||
|
||||
respFormat := "url"
|
||||
if req.ResponseFormat != nil && *req.ResponseFormat == "b64_json" {
|
||||
respFormat = "b64_json"
|
||||
}
|
||||
|
||||
var data []models.ImageData
|
||||
for _, pred := range imagenResp.Predictions {
|
||||
imgData := models.ImageData{}
|
||||
if respFormat == "b64_json" {
|
||||
imgData.B64JSON = pred.BytesBase64Encoded
|
||||
} else {
|
||||
// Build a data URI since Gemini returns base64, not a URL
|
||||
mime := pred.MimeType
|
||||
if mime == "" {
|
||||
mime = "image/png"
|
||||
}
|
||||
imgData.URL = fmt.Sprintf("data:%s;base64,%s", mime, pred.BytesBase64Encoded)
|
||||
}
|
||||
data = append(data, imgData)
|
||||
}
|
||||
|
||||
result := &models.ImageGenerationResponse{
|
||||
Created: time.Now().Unix(),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// sizeToGeminiAspectRatio converts OpenAI size format (e.g. "1024x1024") to Gemini aspect ratio (e.g. "1:1")
|
||||
func sizeToGeminiAspectRatio(size string) string {
|
||||
switch size {
|
||||
case "1024x1024":
|
||||
return "1:1"
|
||||
case "1024x1792":
|
||||
return "9:16"
|
||||
case "1792x1024":
|
||||
return "16:9"
|
||||
case "256x256", "512x512":
|
||||
return "1:1"
|
||||
default:
|
||||
return "1:1"
|
||||
}
|
||||
}
|
||||
|
||||
func (p *GeminiProvider) ChatCompletion(ctx context.Context, req *models.UnifiedRequest) (*models.ChatCompletionResponse, error) {
|
||||
// Gemini mapping
|
||||
var contents []GeminiContent
|
||||
|
||||
Reference in New Issue
Block a user