fix(gemini): improve Gemini 3 stability and diagnostics
- Switch Gemini 3 models to v1beta for both streaming and non-streaming (better reasoning support). - Increase max_output_tokens cap to 65536 for reasoning-heavy models. - Elevate API URL and chunk tracing to INFO level for easier production debugging.
This commit is contained in:
@@ -563,9 +563,9 @@ impl GeminiProvider {
|
|||||||
/// Determine the appropriate base URL for the model.
|
/// Determine the appropriate base URL for the model.
|
||||||
/// "preview" models often require the v1beta endpoint, but newer promoted ones may be on v1.
|
/// "preview" models often require the v1beta endpoint, but newer promoted ones may be on v1.
|
||||||
fn get_base_url(&self, model: &str) -> String {
|
fn get_base_url(&self, model: &str) -> String {
|
||||||
// Only use v1beta for older preview models or specific "thinking" experimental models.
|
// Gemini 3 and other reasoning/preview models often perform better or
|
||||||
// Gemini 3.0+ models are typically released on v1 even in preview.
|
// strictly require v1beta for advanced features like thought_signatures.
|
||||||
if (model.contains("preview") && !model.contains("gemini-3")) || model.contains("thinking") {
|
if model.contains("preview") || model.contains("thinking") || model.contains("gemini-3") {
|
||||||
self.config.base_url.replace("/v1", "/v1beta")
|
self.config.base_url.replace("/v1", "/v1beta")
|
||||||
} else {
|
} else {
|
||||||
self.config.base_url.clone()
|
self.config.base_url.clone()
|
||||||
@@ -648,7 +648,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
temperature: request.temperature,
|
temperature: request.temperature,
|
||||||
top_p: request.top_p,
|
top_p: request.top_p,
|
||||||
top_k: request.top_k,
|
top_k: request.top_k,
|
||||||
max_output_tokens: request.max_tokens.map(|t| t.min(8192)),
|
max_output_tokens: request.max_tokens.map(|t| t.min(65536)),
|
||||||
stop_sequences,
|
stop_sequences,
|
||||||
candidate_count: request.n,
|
candidate_count: request.n,
|
||||||
});
|
});
|
||||||
@@ -663,7 +663,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let url = format!("{}/models/{}:generateContent", base_url, model);
|
let url = format!("{}/models/{}:generateContent", base_url, model);
|
||||||
tracing::debug!("Calling Gemini API: {}", url);
|
tracing::info!("Calling Gemini API: {}", url);
|
||||||
|
|
||||||
let response = self
|
let response = self
|
||||||
.client
|
.client
|
||||||
@@ -800,7 +800,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
temperature: request.temperature,
|
temperature: request.temperature,
|
||||||
top_p: request.top_p,
|
top_p: request.top_p,
|
||||||
top_k: request.top_k,
|
top_k: request.top_k,
|
||||||
max_output_tokens: request.max_tokens.map(|t| t.min(8192)),
|
max_output_tokens: request.max_tokens.map(|t| t.min(65536)),
|
||||||
stop_sequences,
|
stop_sequences,
|
||||||
candidate_count: request.n,
|
candidate_count: request.n,
|
||||||
});
|
});
|
||||||
@@ -818,7 +818,7 @@ impl super::Provider for GeminiProvider {
|
|||||||
"{}/models/{}:streamGenerateContent?alt=sse",
|
"{}/models/{}:streamGenerateContent?alt=sse",
|
||||||
base_url, model,
|
base_url, model,
|
||||||
);
|
);
|
||||||
tracing::debug!("Calling Gemini Stream API: {}", url);
|
tracing::info!("Calling Gemini Stream API: {}", url);
|
||||||
|
|
||||||
// Capture a clone of the request to probe for errors (Gemini 400s are common)
|
// Capture a clone of the request to probe for errors (Gemini 400s are common)
|
||||||
let probe_request = gemini_request.clone();
|
let probe_request = gemini_request.clone();
|
||||||
@@ -844,6 +844,10 @@ impl super::Provider for GeminiProvider {
|
|||||||
let gemini_response: GeminiStreamResponse = serde_json::from_str(&msg.data)
|
let gemini_response: GeminiStreamResponse = serde_json::from_str(&msg.data)
|
||||||
.map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?;
|
.map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?;
|
||||||
|
|
||||||
|
tracing::info!("Received Gemini stream chunk (candidates: {}, has_usage: {})",
|
||||||
|
gemini_response.candidates.len(),
|
||||||
|
gemini_response.usage_metadata.is_some()
|
||||||
|
);
|
||||||
// (rest of processing remains identical)
|
// (rest of processing remains identical)
|
||||||
|
|
||||||
// Extract usage from usageMetadata if present (reported on every/last chunk)
|
// Extract usage from usageMetadata if present (reported on every/last chunk)
|
||||||
|
|||||||
Reference in New Issue
Block a user