From 3d43948dbe4814d3934a921ea8c4539da5879f4e Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Thu, 5 Mar 2026 16:27:22 +0000 Subject: [PATCH] fix(gemini): improve Gemini 3 stability and diagnostics - Switch Gemini 3 models to v1beta for both streaming and non-streaming (better reasoning support). - Increase max_output_tokens cap to 65536 for reasoning-heavy models. - Elevate API URL and chunk tracing to INFO level for easier production debugging. --- src/providers/gemini.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/providers/gemini.rs b/src/providers/gemini.rs index c9d3b4ab..393963cf 100644 --- a/src/providers/gemini.rs +++ b/src/providers/gemini.rs @@ -563,9 +563,9 @@ impl GeminiProvider { /// Determine the appropriate base URL for the model. /// "preview" models often require the v1beta endpoint, but newer promoted ones may be on v1. fn get_base_url(&self, model: &str) -> String { - // Only use v1beta for older preview models or specific "thinking" experimental models. - // Gemini 3.0+ models are typically released on v1 even in preview. - if (model.contains("preview") && !model.contains("gemini-3")) || model.contains("thinking") { + // Gemini 3 and other reasoning/preview models often perform better or + // strictly require v1beta for advanced features like thought_signatures. + if model.contains("preview") || model.contains("thinking") || model.contains("gemini-3") { self.config.base_url.replace("/v1", "/v1beta") } else { self.config.base_url.clone() @@ -648,7 +648,7 @@ impl super::Provider for GeminiProvider { temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, - max_output_tokens: request.max_tokens.map(|t| t.min(8192)), + max_output_tokens: request.max_tokens.map(|t| t.min(65536)), stop_sequences, candidate_count: request.n, }); @@ -663,7 +663,7 @@ impl super::Provider for GeminiProvider { }; let url = format!("{}/models/{}:generateContent", base_url, model); - tracing::debug!("Calling Gemini API: {}", url); + tracing::info!("Calling Gemini API: {}", url); let response = self .client @@ -800,7 +800,7 @@ impl super::Provider for GeminiProvider { temperature: request.temperature, top_p: request.top_p, top_k: request.top_k, - max_output_tokens: request.max_tokens.map(|t| t.min(8192)), + max_output_tokens: request.max_tokens.map(|t| t.min(65536)), stop_sequences, candidate_count: request.n, }); @@ -818,7 +818,7 @@ impl super::Provider for GeminiProvider { "{}/models/{}:streamGenerateContent?alt=sse", base_url, model, ); - tracing::debug!("Calling Gemini Stream API: {}", url); + tracing::info!("Calling Gemini Stream API: {}", url); // Capture a clone of the request to probe for errors (Gemini 400s are common) let probe_request = gemini_request.clone(); @@ -844,6 +844,10 @@ impl super::Provider for GeminiProvider { let gemini_response: GeminiStreamResponse = serde_json::from_str(&msg.data) .map_err(|e| AppError::ProviderError(format!("Failed to parse stream chunk: {}", e)))?; + tracing::info!("Received Gemini stream chunk (candidates: {}, has_usage: {})", + gemini_response.candidates.len(), + gemini_response.usage_metadata.is_some() + ); // (rest of processing remains identical) // Extract usage from usageMetadata if present (reported on every/last chunk)