From cb5b92155025c5f5c056540c2d4e04286e556332 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 13:14:51 +0000 Subject: [PATCH 01/14] feat(openai): implement tool support for gpt-5.4 via Responses API - Implement polymorphic 'input' structure for /responses endpoint - Map 'tool' role to 'function_call_output' items - Handle assistant 'tool_calls' as separate 'function_call' items - Add synchronous and streaming parsers for function_call items - Fix 400 Bad Request 'Invalid value: tool' error --- src/providers/openai.rs | 214 +++++++++++++++++++++++++++++++++------- 1 file changed, 177 insertions(+), 37 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 7bc5d86c..5665e6be 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -112,10 +112,57 @@ impl super::Provider for OpenAIProvider { let messages_json = helpers::messages_to_openai_json(&request.messages).await?; let mut input_parts = Vec::new(); for m in &messages_json { - let mut role = m["role"].as_str().unwrap_or("user").to_string(); - // Newer models (gpt-5, o1) prefer "developer" over "system" - if role == "system" { - role = "developer".to_string(); + let role = m["role"].as_str().unwrap_or("user"); + + if role == "tool" { + input_parts.push(serde_json::json!({ + "type": "function_call_output", + "call_id": m.get("tool_call_id").and_then(|v| v.as_str()).unwrap_or(""), + "output": m.get("content").and_then(|v| v.as_str()).unwrap_or("") + })); + continue; + } + + if role == "assistant" && m.get("tool_calls").is_some() { + // Push message part if it exists + let content_val = m.get("content").cloned().unwrap_or(serde_json::json!("")); + if !content_val.is_null() && (content_val.is_array() && !content_val.as_array().unwrap().is_empty() || content_val.is_string() && !content_val.as_str().unwrap().is_empty()) { + let mut content = content_val.clone(); + if let Some(text) = content.as_str() { + content = serde_json::json!([{ "type": "output_text", "text": text }]); + } else if let Some(arr) = content.as_array_mut() { + for part in arr { + if let Some(obj) = part.as_object_mut() { + if obj.get("type").and_then(|v| v.as_str()) == Some("text") { + obj.insert("type".to_string(), serde_json::json!("output_text")); + } + } + } + } + input_parts.push(serde_json::json!({ + "type": "message", + "role": "assistant", + "content": content + })); + } + + // Push tool calls as separate items + if let Some(tcs) = m.get("tool_calls").and_then(|v| v.as_array()) { + for tc in tcs { + input_parts.push(serde_json::json!({ + "type": "function_call", + "call_id": tc["id"], + "name": tc["function"]["name"], + "arguments": tc["function"]["arguments"] + })); + } + } + continue; + } + + let mut mapped_role = role.to_string(); + if mapped_role == "system" { + mapped_role = "developer".to_string(); } let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([])); @@ -127,12 +174,11 @@ impl super::Provider for OpenAIProvider { if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) { match t { "text" => { - let new_type = if role == "assistant" { "output_text" } else { "input_text" }; + let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" }; part_obj.insert("type".to_string(), serde_json::json!(new_type)); } "image_url" => { - // Assistant typically doesn't have image_url in history this way, but for safety: - let new_type = if role == "assistant" { "output_image" } else { "input_image" }; + let new_type = if mapped_role == "assistant" { "output_image" } else { "input_image" }; part_obj.insert("type".to_string(), serde_json::json!(new_type)); if let Some(img_url) = part_obj.remove("image_url") { part_obj.insert("image".to_string(), img_url); @@ -144,12 +190,13 @@ impl super::Provider for OpenAIProvider { } } } else if let Some(text) = content.as_str() { - let new_type = if role == "assistant" { "output_text" } else { "input_text" }; + let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" }; content = serde_json::json!([{ "type": new_type, "text": text }]); } input_parts.push(serde_json::json!({ - "role": role, + "type": "message", + "role": mapped_role, "content": content })); } @@ -200,18 +247,43 @@ impl super::Provider for OpenAIProvider { // Normalize Responses API output into ProviderResponse let mut content_text = String::new(); + let mut tool_calls = Vec::new(); if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) { for out in output { - if let Some(contents) = out.get("content").and_then(|c| c.as_array()) { - for item in contents { - if let Some(text) = item.get("text").and_then(|t| t.as_str()) { - if !content_text.is_empty() { content_text.push_str("\n"); } - content_text.push_str(text); - } else if let Some(parts) = item.get("parts").and_then(|p| p.as_array()) { - for p in parts { - if let Some(t) = p.as_str() { + let item_type = out.get("type").and_then(|v| v.as_str()).unwrap_or(""); + match item_type { + "message" => { + if let Some(contents) = out.get("content").and_then(|c| c.as_array()) { + for item in contents { + if let Some(text) = item.get("text").and_then(|t| t.as_str()) { if !content_text.is_empty() { content_text.push_str("\n"); } - content_text.push_str(t); + content_text.push_str(text); + } + } + } + } + "function_call" => { + let id = out.get("call_id") + .or_else(|| out.get("item_id")) + .or_else(|| out.get("id")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let name = out.get("name").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let arguments = out.get("arguments").and_then(|v| v.as_str()).unwrap_or("").to_string(); + tool_calls.push(crate::models::ToolCall { + id, + call_type: "function".to_string(), + function: crate::models::FunctionCall { name, arguments }, + }); + } + _ => { + // Fallback for older/nested structure + if let Some(contents) = out.get("content").and_then(|c| c.as_array()) { + for item in contents { + if let Some(text) = item.get("text").and_then(|t| t.as_str()) { + if !content_text.is_empty() { content_text.push_str("\n"); } + content_text.push_str(text); } } } @@ -244,7 +316,7 @@ impl super::Provider for OpenAIProvider { Ok(ProviderResponse { content: content_text, reasoning_content: None, - tool_calls: None, + tool_calls: if tool_calls.is_empty() { None } else { Some(tool_calls) }, prompt_tokens, completion_tokens, reasoning_tokens: 0, @@ -379,10 +451,57 @@ impl super::Provider for OpenAIProvider { let messages_json = helpers::messages_to_openai_json(&request.messages).await?; let mut input_parts = Vec::new(); for m in &messages_json { - let mut role = m["role"].as_str().unwrap_or("user").to_string(); - // Newer models (gpt-5, o1) prefer "developer" over "system" - if role == "system" { - role = "developer".to_string(); + let role = m["role"].as_str().unwrap_or("user"); + + if role == "tool" { + input_parts.push(serde_json::json!({ + "type": "function_call_output", + "call_id": m.get("tool_call_id").and_then(|v| v.as_str()).unwrap_or(""), + "output": m.get("content").and_then(|v| v.as_str()).unwrap_or("") + })); + continue; + } + + if role == "assistant" && m.get("tool_calls").is_some() { + // Push message part if it exists + let content_val = m.get("content").cloned().unwrap_or(serde_json::json!("")); + if !content_val.is_null() && (content_val.is_array() && !content_val.as_array().unwrap().is_empty() || content_val.is_string() && !content_val.as_str().unwrap().is_empty()) { + let mut content = content_val.clone(); + if let Some(text) = content.as_str() { + content = serde_json::json!([{ "type": "output_text", "text": text }]); + } else if let Some(arr) = content.as_array_mut() { + for part in arr { + if let Some(obj) = part.as_object_mut() { + if obj.get("type").and_then(|v| v.as_str()) == Some("text") { + obj.insert("type".to_string(), serde_json::json!("output_text")); + } + } + } + } + input_parts.push(serde_json::json!({ + "type": "message", + "role": "assistant", + "content": content + })); + } + + // Push tool calls as separate items + if let Some(tcs) = m.get("tool_calls").and_then(|v| v.as_array()) { + for tc in tcs { + input_parts.push(serde_json::json!({ + "type": "function_call", + "call_id": tc["id"], + "name": tc["function"]["name"], + "arguments": tc["function"]["arguments"] + })); + } + } + continue; + } + + let mut mapped_role = role.to_string(); + if mapped_role == "system" { + mapped_role = "developer".to_string(); } let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([])); @@ -394,12 +513,11 @@ impl super::Provider for OpenAIProvider { if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) { match t { "text" => { - let new_type = if role == "assistant" { "output_text" } else { "input_text" }; + let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" }; part_obj.insert("type".to_string(), serde_json::json!(new_type)); } "image_url" => { - // Assistant typically doesn't have image_url in history this way, but for safety: - let new_type = if role == "assistant" { "output_image" } else { "input_image" }; + let new_type = if mapped_role == "assistant" { "output_image" } else { "input_image" }; part_obj.insert("type".to_string(), serde_json::json!(new_type)); if let Some(img_url) = part_obj.remove("image_url") { part_obj.insert("image".to_string(), img_url); @@ -411,12 +529,13 @@ impl super::Provider for OpenAIProvider { } } } else if let Some(text) = content.as_str() { - let new_type = if role == "assistant" { "output_text" } else { "input_text" }; + let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" }; content = serde_json::json!([{ "type": new_type, "text": text }]); } input_parts.push(serde_json::json!({ - "role": role, + "type": "message", + "role": mapped_role, "content": content })); } @@ -475,6 +594,7 @@ impl super::Provider for OpenAIProvider { // Responses API specific parsing for streaming let mut content = String::new(); let mut finish_reason = None; + let mut tool_calls = None; let event_type = chunk.get("type").and_then(|v| v.as_str()).unwrap_or(""); @@ -484,15 +604,35 @@ impl super::Provider for OpenAIProvider { content.push_str(delta); } } - "response.output_text.done" => { - if let Some(text) = chunk.get("text").and_then(|v| v.as_str()) { - // Some implementations send the full text at the end - // We usually prefer deltas, but if we haven't seen them, this is the fallback. - // However, if we're already yielding deltas, we might not want this. - // For now, let's just use it as a signal that we're done. - finish_reason = Some("stop".to_string()); + "response.item.delta" => { + if let Some(delta) = chunk.get("delta") { + let t = delta.get("type").and_then(|v| v.as_str()).unwrap_or(""); + if t == "function_call" { + let call_id = delta.get("call_id") + .or_else(|| chunk.get("item_id")) + .and_then(|v| v.as_str()); + let name = delta.get("name").and_then(|v| v.as_str()); + let arguments = delta.get("arguments").and_then(|v| v.as_str()); + + tool_calls = Some(vec![crate::models::ToolCallDelta { + index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32, + id: call_id.map(|s| s.to_string()), + call_type: Some("function".to_string()), + function: Some(crate::models::FunctionCallDelta { + name: name.map(|s| s.to_string()), + arguments: arguments.map(|s| s.to_string()), + }), + }]); + } else if t == "message" { + if let Some(text) = delta.get("text").and_then(|v| v.as_str()) { + content.push_str(text); + } + } } } + "response.output_text.done" | "response.item.done" => { + finish_reason = Some("stop".to_string()); + } "response.done" => { finish_reason = Some("stop".to_string()); } @@ -514,12 +654,12 @@ impl super::Provider for OpenAIProvider { } } - if !content.is_empty() || finish_reason.is_some() { + if !content.is_empty() || finish_reason.is_some() || tool_calls.is_some() { yield ProviderStreamChunk { content, reasoning_content: None, finish_reason, - tool_calls: None, + tool_calls, model: model.clone(), usage: None, }; From 275ce34d0590d3bf2cb1f599b0d44ce6aabdf695 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 13:51:36 +0000 Subject: [PATCH 02/14] fix(openai): fix missing tools and instructions in Responses API - Add 'tools' and 'tool_choice' parameters to streaming Responses API - Include 'name' field in message items for Responses API input - Use string content for text-only messages to improve instruction following - Fix subagents not triggering and files not being created --- src/providers/openai.rs | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 5665e6be..e64d6ad7 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -190,15 +190,20 @@ impl super::Provider for OpenAIProvider { } } } else if let Some(text) = content.as_str() { - let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" }; - content = serde_json::json!([{ "type": new_type, "text": text }]); + // If it's just a string, send it as a string instead of an array of objects + // as it's safer for standard conversational messages. + content = serde_json::json!(text); } - input_parts.push(serde_json::json!({ + let mut msg_item = serde_json::json!({ "type": "message", "role": mapped_role, "content": content - })); + }); + if let Some(name) = m.get("name") { + msg_item["name"] = name.clone(); + } + input_parts.push(msg_item); } let mut body = serde_json::json!({ @@ -223,6 +228,9 @@ impl super::Provider for OpenAIProvider { if let Some(tools) = &request.tools { body["tools"] = serde_json::json!(tools); } + if let Some(tool_choice) = &request.tool_choice { + body["tool_choice"] = serde_json::json!(tool_choice); + } let resp = self .client @@ -529,15 +537,20 @@ impl super::Provider for OpenAIProvider { } } } else if let Some(text) = content.as_str() { - let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" }; - content = serde_json::json!([{ "type": new_type, "text": text }]); + // If it's just a string, send it as a string instead of an array of objects + // as it's safer for standard conversational messages. + content = serde_json::json!(text); } - input_parts.push(serde_json::json!({ + let mut msg_item = serde_json::json!({ "type": "message", "role": mapped_role, "content": content - })); + }); + if let Some(name) = m.get("name") { + msg_item["name"] = name.clone(); + } + input_parts.push(msg_item); } let mut body = serde_json::json!({ @@ -560,6 +573,13 @@ impl super::Provider for OpenAIProvider { } } + if let Some(tools) = &request.tools { + body["tools"] = serde_json::json!(tools); + } + if let Some(tool_choice) = &request.tool_choice { + body["tool_choice"] = serde_json::json!(tool_choice); + } + let url = format!("{}/responses", self.config.base_url); let api_key = self.api_key.clone(); let model = request.model.clone(); From 83e0ad02405ac89d09e3252bc7180361e6fb87ee Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 14:00:49 +0000 Subject: [PATCH 03/14] fix(openai): flatten tools and tool_choice for Responses API - Map nested 'function' object to top-level fields - Support string and object-based 'tool_choice' formats - Fix 400 Bad Request 'Missing required parameter: tools[0].name' --- src/providers/openai.rs | 54 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index e64d6ad7..ed654295 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -226,10 +226,33 @@ impl super::Provider for OpenAIProvider { } if let Some(tools) = &request.tools { - body["tools"] = serde_json::json!(tools); + let flattened: Vec = tools.iter().map(|t| { + let mut obj = serde_json::json!({ + "type": t.tool_type, + "name": t.function.name, + }); + if let Some(desc) = &t.function.description { + obj["description"] = serde_json::json!(desc); + } + if let Some(params) = &t.function.parameters { + obj["parameters"] = params.clone(); + } + obj + }).collect(); + body["tools"] = serde_json::json!(flattened); } if let Some(tool_choice) = &request.tool_choice { - body["tool_choice"] = serde_json::json!(tool_choice); + match tool_choice { + crate::models::ToolChoice::Mode(mode) => { + body["tool_choice"] = serde_json::json!(mode); + } + crate::models::ToolChoice::Specific(specific) => { + body["tool_choice"] = serde_json::json!({ + "type": specific.choice_type, + "name": specific.function.name, + }); + } + } } let resp = self @@ -574,10 +597,33 @@ impl super::Provider for OpenAIProvider { } if let Some(tools) = &request.tools { - body["tools"] = serde_json::json!(tools); + let flattened: Vec = tools.iter().map(|t| { + let mut obj = serde_json::json!({ + "type": t.tool_type, + "name": t.function.name, + }); + if let Some(desc) = &t.function.description { + obj["description"] = serde_json::json!(desc); + } + if let Some(params) = &t.function.parameters { + obj["parameters"] = params.clone(); + } + obj + }).collect(); + body["tools"] = serde_json::json!(flattened); } if let Some(tool_choice) = &request.tool_choice { - body["tool_choice"] = serde_json::json!(tool_choice); + match tool_choice { + crate::models::ToolChoice::Mode(mode) => { + body["tool_choice"] = serde_json::json!(mode); + } + crate::models::ToolChoice::Specific(specific) => { + body["tool_choice"] = serde_json::json!({ + "type": specific.choice_type, + "name": specific.function.name, + }); + } + } } let url = format!("{}/responses", self.config.base_url); From d0be16d8e3b7a87ff9ae23b38441acbfc6ce940f Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 14:28:38 +0000 Subject: [PATCH 04/14] fix(openai): parse embedded 'tool_uses' JSON for gpt-5.4 parallel calls - Add static parse_tool_uses_json helper to extract embedded tool calls - Update synchronous and streaming Responses API parsers to detect tool_uses blocks - Strip tool_uses JSON from content to prevent raw JSON leakage to client - Resolve lifetime issues by avoiding &self capture in streaming closure --- src/providers/openai.rs | 90 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index ed654295..9791f989 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -36,6 +36,57 @@ impl OpenAIProvider { pricing: app_config.pricing.openai.clone(), }) } + + /// GPT-5.4 models sometimes emit parallel tool calls as a JSON block starting with + /// '{"tool_uses":' inside a text message instead of discrete function_call items. + /// This method attempts to extract and parse such tool calls. + pub fn parse_tool_uses_json(text: &str) -> Vec { + let mut calls = Vec::new(); + if let Some(start) = text.find("{\"tool_uses\":") { + // Find the end of the JSON block by matching braces + let sub = &text[start..]; + let mut brace_count = 0; + let mut end_idx = 0; + let mut found = false; + + for (i, c) in sub.char_indices() { + if c == '{' { brace_count += 1; } + else if c == '}' { + brace_count -= 1; + if brace_count == 0 { + end_idx = i + 1; + found = true; + break; + } + } + } + + if found { + let json_str = &sub[..end_idx]; + if let Ok(val) = serde_json::from_str::(json_str) { + if let Some(uses) = val.get("tool_uses").and_then(|u| u.as_array()) { + for (idx, u) in uses.iter().enumerate() { + let name = u.get("recipient_name") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + // Strip "functions." prefix if present + .replace("functions.", ""); + let arguments = u.get("parameters") + .map(|v| v.to_string()) + .unwrap_or_else(|| "{}".to_string()); + + calls.push(crate::models::ToolCall { + id: format!("call_tu_{}_{}", uuid::Uuid::new_v4().to_string()[..8].to_string(), idx), + call_type: "function".to_string(), + function: crate::models::FunctionCall { name, arguments }, + }); + } + } + } + } + } + calls + } } #[async_trait] @@ -344,6 +395,16 @@ impl super::Provider for OpenAIProvider { let completion_tokens = resp_json.get("usage").and_then(|u| u.get("completion_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32; let total_tokens = resp_json.get("usage").and_then(|u| u.get("total_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32; + // GPT-5.4 parallel tool calls might be embedded in content_text as a JSON block + let embedded_calls = Self::parse_tool_uses_json(&content_text); + if !embedded_calls.is_empty() { + // Strip the JSON part from content_text to keep it clean + if let Some(start) = content_text.find("{\"tool_uses\":") { + content_text = content_text[..start].to_string(); + } + tool_calls.extend(embedded_calls); + } + Ok(ProviderResponse { content: content_text, reasoning_content: None, @@ -720,6 +781,35 @@ impl super::Provider for OpenAIProvider { } } + // GPT-5.4 parallel tool calls might be embedded in content as a JSON block + let embedded_calls = Self::parse_tool_uses_json(&content); + + if !embedded_calls.is_empty() { + // Strip the JSON part from content to keep it clean + if let Some(start) = content.find("{\"tool_uses\":") { + content = content[..start].to_string(); + } + + // Convert ToolCall to ToolCallDelta for streaming + let deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { + crate::models::ToolCallDelta { + index: idx as u32, + id: Some(tc.id), + call_type: Some("function".to_string()), + function: Some(crate::models::FunctionCallDelta { + name: Some(tc.function.name), + arguments: Some(tc.function.arguments), + }), + } + }).collect(); + + if let Some(ref mut existing) = tool_calls { + existing.extend(deltas); + } else { + tool_calls = Some(deltas); + } + } + if !content.is_empty() || finish_reason.is_some() || tool_calls.is_some() { yield ProviderStreamChunk { content, From 2e4318d84b2e5ed574a767241f25f6568f1e78fa Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 15:04:15 +0000 Subject: [PATCH 05/14] fix(openai): improve gpt-5.4 parallel tool call intercepting - Implement cross-delta content buffering in streaming Responses API - Wait for full 'tool_uses' JSON block before yielding to client - Handle 'to=multi_tool_use.parallel' preamble by buffering - Fix stream error probe to not request a new stream - Remove raw JSON leakage from streaming content --- src/providers/openai.rs | 138 ++++++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 49 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 9791f989..19c8c81c 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -704,6 +704,8 @@ impl super::Provider for OpenAIProvider { let stream = async_stream::try_stream! { let mut es = es; + let mut content_buffer = String::new(); + while let Some(event) = es.next().await { match event { Ok(reqwest_eventsource::Event::Message(msg)) => { @@ -719,7 +721,6 @@ impl super::Provider for OpenAIProvider { yield p_chunk?; } else { // Responses API specific parsing for streaming - let mut content = String::new(); let mut finish_reason = None; let mut tool_calls = None; @@ -728,7 +729,7 @@ impl super::Provider for OpenAIProvider { match event_type { "response.output_text.delta" => { if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) { - content.push_str(delta); + content_buffer.push_str(delta); } } "response.item.delta" => { @@ -752,67 +753,101 @@ impl super::Provider for OpenAIProvider { }]); } else if t == "message" { if let Some(text) = delta.get("text").and_then(|v| v.as_str()) { - content.push_str(text); + content_buffer.push_str(text); } } } } - "response.output_text.done" | "response.item.done" => { + "response.output_text.done" | "response.item.done" | "response.done" => { finish_reason = Some("stop".to_string()); } - "response.done" => { - finish_reason = Some("stop".to_string()); - } - _ => { - // Fallback to older nested structure if present - if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) { - for out in output { - if let Some(contents) = out.get("content").and_then(|c| c.as_array()) { - for item in contents { - if let Some(text) = item.get("text").and_then(|t| t.as_str()) { - content.push_str(text); - } else if let Some(delta) = item.get("delta").and_then(|d| d.get("text")).and_then(|t| t.as_str()) { - content.push_str(delta); + _ => {} + } + + // Process content_buffer to extract embedded tool calls or yield text + if !content_buffer.is_empty() { + // If we see the start of a tool call block, we wait for the full block + if content_buffer.contains("{\"tool_uses\":") { + let embedded_calls = Self::parse_tool_uses_json(&content_buffer); + if !embedded_calls.is_empty() { + if let Some(start) = content_buffer.find("{\"tool_uses\":") { + // Yield text before the JSON block + let preamble = content_buffer[..start].to_string(); + if !preamble.is_empty() { + yield ProviderStreamChunk { + content: preamble, + reasoning_content: None, + finish_reason: None, + tool_calls: None, + model: model.clone(), + usage: None, + }; + } + + // Yield the tool calls + let deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { + crate::models::ToolCallDelta { + index: idx as u32, + id: Some(tc.id), + call_type: Some("function".to_string()), + function: Some(crate::models::FunctionCallDelta { + name: Some(tc.function.name), + arguments: Some(tc.function.arguments), + }), + } + }).collect(); + + yield ProviderStreamChunk { + content: String::new(), + reasoning_content: None, + finish_reason: None, + tool_calls: Some(deltas), + model: model.clone(), + usage: None, + }; + + // Remove the processed part from buffer + // We need to find the end index correctly + let sub = &content_buffer[start..]; + let mut brace_count = 0; + let mut end_idx = 0; + for (i, c) in sub.char_indices() { + if c == '{' { brace_count += 1; } + else if c == '}' { + brace_count -= 1; + if brace_count == 0 { + end_idx = start + i + 1; + break; } } } + if end_idx > 0 { + content_buffer = content_buffer[end_idx..].to_string(); + } else { + content_buffer.clear(); + } } } - } - } - - // GPT-5.4 parallel tool calls might be embedded in content as a JSON block - let embedded_calls = Self::parse_tool_uses_json(&content); - - if !embedded_calls.is_empty() { - // Strip the JSON part from content to keep it clean - if let Some(start) = content.find("{\"tool_uses\":") { - content = content[..start].to_string(); - } - - // Convert ToolCall to ToolCallDelta for streaming - let deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { - crate::models::ToolCallDelta { - index: idx as u32, - id: Some(tc.id), - call_type: Some("function".to_string()), - function: Some(crate::models::FunctionCallDelta { - name: Some(tc.function.name), - arguments: Some(tc.function.arguments), - }), - } - }).collect(); - - if let Some(ref mut existing) = tool_calls { - existing.extend(deltas); + // If we have "{"tool_uses":" but no full block yet, we just wait (don't yield) + } else if content_buffer.contains("to=multi_tool_use.parallel") { + // Wait for the JSON block that usually follows } else { - tool_calls = Some(deltas); + // Standard text, yield and clear buffer + let content = std::mem::take(&mut content_buffer); + yield ProviderStreamChunk { + content, + reasoning_content: None, + finish_reason: None, + tool_calls: None, + model: model.clone(), + usage: None, + }; } } - if !content.is_empty() || finish_reason.is_some() || tool_calls.is_some() { + if finish_reason.is_some() || tool_calls.is_some() { yield ProviderStreamChunk { - content, + content: String::new(), reasoning_content: None, finish_reason, tool_calls, @@ -825,11 +860,16 @@ impl super::Provider for OpenAIProvider { Ok(_) => continue, Err(e) => { // Attempt to probe for the actual error body + let mut probe_body_no_stream = probe_body.clone(); + if let Some(obj) = probe_body_no_stream.as_object_mut() { + obj.remove("stream"); + } + let probe_resp = probe_client .post(&url) .header("Authorization", format!("Bearer {}", api_key)) - .header("Accept", "application/json") // Ask for JSON during probe - .json(&probe_body) + .header("Accept", "application/json") + .json(&probe_body_no_stream) .send() .await; From 441270317c5fafbdc3b7a9a3457a916bc332dbe4 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 15:07:17 +0000 Subject: [PATCH 06/14] fix(openai): strip internal metadata from gpt-5.4 responses - Add strip_internal_metadata helper to remove prefixes like 'to=multi_tool_use.parallel' - Clean up Thai text preambles reported in the journal - Apply metadata stripping to both synchronous and streaming response paths - Improve visual quality of proxied model responses --- src/providers/openai.rs | 49 ++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 19c8c81c..88c841a5 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -43,6 +43,7 @@ impl OpenAIProvider { pub fn parse_tool_uses_json(text: &str) -> Vec { let mut calls = Vec::new(); if let Some(start) = text.find("{\"tool_uses\":") { + // ... (rest of method unchanged) // Find the end of the JSON block by matching braces let sub = &text[start..]; let mut brace_count = 0; @@ -87,6 +88,27 @@ impl OpenAIProvider { } calls } + + /// Strips internal metadata prefixes like 'to=multi_tool_use.parallel' from model responses. + pub fn strip_internal_metadata(text: &str) -> String { + let mut result = text.to_string(); + + // Patterns to strip + let patterns = [ + "to=multi_tool_use.parallel", + "to=functions.multi_tool_use", + "ส่งเงินบาทไทยjson", // User reported Thai text preamble + ]; + + for p in patterns { + if let Some(start) = result.find(p) { + // Remove the pattern and any whitespace around it + result.replace_range(start..start + p.len(), ""); + } + } + + result.trim().to_string() + } } #[async_trait] @@ -404,6 +426,8 @@ impl super::Provider for OpenAIProvider { } tool_calls.extend(embedded_calls); } + + content_text = Self::strip_internal_metadata(&content_text); Ok(ProviderResponse { content: content_text, @@ -773,9 +797,10 @@ impl super::Provider for OpenAIProvider { if let Some(start) = content_buffer.find("{\"tool_uses\":") { // Yield text before the JSON block let preamble = content_buffer[..start].to_string(); - if !preamble.is_empty() { + let stripped_preamble = Self::strip_internal_metadata(&preamble); + if !stripped_preamble.is_empty() { yield ProviderStreamChunk { - content: preamble, + content: stripped_preamble, reasoning_content: None, finish_reason: None, tool_calls: None, @@ -785,6 +810,7 @@ impl super::Provider for OpenAIProvider { } // Yield the tool calls + // ... (rest of tool call yielding unchanged) let deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { crate::models::ToolCallDelta { index: idx as u32, @@ -834,14 +860,17 @@ impl super::Provider for OpenAIProvider { } else { // Standard text, yield and clear buffer let content = std::mem::take(&mut content_buffer); - yield ProviderStreamChunk { - content, - reasoning_content: None, - finish_reason: None, - tool_calls: None, - model: model.clone(), - usage: None, - }; + let stripped_content = Self::strip_internal_metadata(&content); + if !stripped_content.is_empty() { + yield ProviderStreamChunk { + content: stripped_content, + reasoning_content: None, + finish_reason: None, + tool_calls: None, + model: model.clone(), + usage: None, + }; + } } } From cb619f92866128fa5b1820c4792b28cd7645d990 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 15:17:56 +0000 Subject: [PATCH 07/14] fix(openai): improve Responses API stream robustness and diagnostics - Implement final buffer flush in streaming path to prevent data loss - Increase probe response body logging to 500 characters - Ensure internal metadata is stripped even on final flush - Fix potential hang when stream ends without explicit [DONE] event --- src/providers/openai.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 88c841a5..9f7708ec 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -907,8 +907,9 @@ impl super::Provider for OpenAIProvider { let status = r.status(); let body = r.text().await.unwrap_or_default(); if status.is_success() { - tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", body); - Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", body)))?; + let preview = if body.len() > 500 { format!("{}...", &body[..500]) } else { body.clone() }; + tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", preview); + Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", preview)))?; } else { tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, body); Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, body)))?; @@ -922,6 +923,21 @@ impl super::Provider for OpenAIProvider { } } } + + // Final flush of content_buffer if not empty + if !content_buffer.is_empty() { + let stripped = Self::strip_internal_metadata(&content_buffer); + if !stripped.is_empty() { + yield ProviderStreamChunk { + content: stripped, + reasoning_content: None, + finish_reason: None, + tool_calls: None, + model: model.clone(), + usage: None, + }; + } + } }; Ok(Box::pin(stream)) From 7c2a317c015a6e364e8237bb71ad7347ccfc7b56 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 15:32:08 +0000 Subject: [PATCH 08/14] fix(openai): add missing stream parameter for Responses API - The OpenAI Responses API actually requires the 'stream: true' parameter in the JSON body, contrary to some documentation summaries. - Omitting it caused the API to return a full application/json response instead of SSE text/event-stream, leading to stream failures and probe warnings in the proxy logs. --- src/providers/openai.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 9f7708ec..f554f424 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -664,7 +664,6 @@ impl super::Provider for OpenAIProvider { let mut body = serde_json::json!({ "model": request.model, "input": input_parts, - "stream": true, }); // Add standard parameters @@ -711,6 +710,7 @@ impl super::Provider for OpenAIProvider { } } + body["stream"] = serde_json::json!(true); let url = format!("{}/responses", self.config.base_url); let api_key = self.api_key.clone(); let model = request.model.clone(); @@ -889,16 +889,11 @@ impl super::Provider for OpenAIProvider { Ok(_) => continue, Err(e) => { // Attempt to probe for the actual error body - let mut probe_body_no_stream = probe_body.clone(); - if let Some(obj) = probe_body_no_stream.as_object_mut() { - obj.remove("stream"); - } - let probe_resp = probe_client .post(&url) .header("Authorization", format!("Bearer {}", api_key)) .header("Accept", "application/json") - .json(&probe_body_no_stream) + .json(&probe_body) .send() .await; From 24a898c9a7286f2d97fd6fff9811f3ce06e6762b Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 15:39:18 +0000 Subject: [PATCH 09/14] fix(openai): gracefully handle stream endings - The Responses API ends streams without a final '[DONE]' message. - This causes reqwest_eventsource to return Error::StreamEnded. - Previously, this was treated as a premature termination, triggering an error probe. - We now explicitly match and break on Err(StreamEnded) for normal completion. --- src/providers/openai.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index f554f424..d1cc139f 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -524,6 +524,7 @@ impl super::Provider for OpenAIProvider { } } Ok(_) => continue, + Err(reqwest_eventsource::Error::StreamEnded) => break, Err(e) => { // Attempt to probe for the actual error body let probe_resp = probe_client @@ -887,6 +888,7 @@ impl super::Provider for OpenAIProvider { } } Ok(_) => continue, + Err(reqwest_eventsource::Error::StreamEnded) => break, Err(e) => { // Attempt to probe for the actual error body let probe_resp = probe_client From 79dc8fe409aa1e02af669f4d4ec5397fc17f654c Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 16:13:13 +0000 Subject: [PATCH 10/14] fix(openai): correctly parse Responses API tool call events - The Responses API does not use 'response.item.delta' for tool calls. - It uses 'response.output_item.added' to initialize the function call. - It uses 'response.function_call_arguments.delta' for the payload stream. - Updated the streaming parser to catch these events and correctly yield ToolCallDelta objects. - This restores proper streaming of tool calls back to the client. --- src/providers/openai.rs | 43 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index d1cc139f..20b2ae7f 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -757,15 +757,11 @@ impl super::Provider for OpenAIProvider { content_buffer.push_str(delta); } } - "response.item.delta" => { - if let Some(delta) = chunk.get("delta") { - let t = delta.get("type").and_then(|v| v.as_str()).unwrap_or(""); - if t == "function_call" { - let call_id = delta.get("call_id") - .or_else(|| chunk.get("item_id")) - .and_then(|v| v.as_str()); - let name = delta.get("name").and_then(|v| v.as_str()); - let arguments = delta.get("arguments").and_then(|v| v.as_str()); + "response.output_item.added" => { + if let Some(item) = chunk.get("item") { + if item.get("type").and_then(|v| v.as_str()) == Some("function_call") { + let call_id = item.get("call_id").and_then(|v| v.as_str()); + let name = item.get("name").and_then(|v| v.as_str()); tool_calls = Some(vec![crate::models::ToolCallDelta { index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32, @@ -773,17 +769,26 @@ impl super::Provider for OpenAIProvider { call_type: Some("function".to_string()), function: Some(crate::models::FunctionCallDelta { name: name.map(|s| s.to_string()), - arguments: arguments.map(|s| s.to_string()), + arguments: Some("".to_string()), // Start with empty arguments }), }]); - } else if t == "message" { - if let Some(text) = delta.get("text").and_then(|v| v.as_str()) { - content_buffer.push_str(text); - } } } } - "response.output_text.done" | "response.item.done" | "response.done" => { + "response.function_call_arguments.delta" => { + if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) { + tool_calls = Some(vec![crate::models::ToolCallDelta { + index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32, + id: None, + call_type: None, + function: Some(crate::models::FunctionCallDelta { + name: None, + arguments: Some(delta.to_string()), + }), + }]); + } + } + "response.output_text.done" | "response.item.done" | "response.completed" => { finish_reason = Some("stop".to_string()); } _ => {} @@ -890,12 +895,18 @@ impl super::Provider for OpenAIProvider { Ok(_) => continue, Err(reqwest_eventsource::Error::StreamEnded) => break, Err(e) => { + tracing::error!("Responses stream encountered an error: {}", e); // Attempt to probe for the actual error body + let mut probe_body_no_stream = probe_body.clone(); + if let Some(obj) = probe_body_no_stream.as_object_mut() { + obj.remove("stream"); + } + let probe_resp = probe_client .post(&url) .header("Authorization", format!("Bearer {}", api_key)) .header("Accept", "application/json") - .json(&probe_body) + .json(&probe_body_no_stream) .send() .await; From 1cac45502a4a972ae0032bb97d247a0978e45669 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 17:48:55 +0000 Subject: [PATCH 11/14] fix(openai): fix stream whitespace loss and finish_reason for gpt-5.4 - Remove overzealous .trim() in strip_internal_metadata which destroyed whitespace between text stream chunks, causing client hangs - Fix finish_reason logic to only yield once at the end of the stream - Correctly yield finish_reason: 'tool_calls' instead of 'stop' when tool calls are generated --- src/providers/openai.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 20b2ae7f..ff2de314 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -102,12 +102,12 @@ impl OpenAIProvider { for p in patterns { if let Some(start) = result.find(p) { - // Remove the pattern and any whitespace around it + // Remove the pattern result.replace_range(start..start + p.len(), ""); } } - result.trim().to_string() + result } } @@ -730,6 +730,7 @@ impl super::Provider for OpenAIProvider { let stream = async_stream::try_stream! { let mut es = es; let mut content_buffer = String::new(); + let mut has_tool_calls = false; while let Some(event) = es.next().await { match event { @@ -760,6 +761,7 @@ impl super::Provider for OpenAIProvider { "response.output_item.added" => { if let Some(item) = chunk.get("item") { if item.get("type").and_then(|v| v.as_str()) == Some("function_call") { + has_tool_calls = true; let call_id = item.get("call_id").and_then(|v| v.as_str()); let name = item.get("name").and_then(|v| v.as_str()); @@ -777,6 +779,7 @@ impl super::Provider for OpenAIProvider { } "response.function_call_arguments.delta" => { if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) { + has_tool_calls = true; tool_calls = Some(vec![crate::models::ToolCallDelta { index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32, id: None, @@ -788,8 +791,8 @@ impl super::Provider for OpenAIProvider { }]); } } - "response.output_text.done" | "response.item.done" | "response.completed" => { - finish_reason = Some("stop".to_string()); + "response.completed" => { + finish_reason = Some(if has_tool_calls { "tool_calls".to_string() } else { "stop".to_string() }); } _ => {} } @@ -800,6 +803,7 @@ impl super::Provider for OpenAIProvider { if content_buffer.contains("{\"tool_uses\":") { let embedded_calls = Self::parse_tool_uses_json(&content_buffer); if !embedded_calls.is_empty() { + has_tool_calls = true; if let Some(start) = content_buffer.find("{\"tool_uses\":") { // Yield text before the JSON block let preamble = content_buffer[..start].to_string(); From 66e8b114b9716ac564afcbe8074e6119b3b1605e Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 18:05:37 +0000 Subject: [PATCH 12/14] fix(openai): split embedded tool_calls into standard chunk format - Standard OpenAI clients expect tool_calls to be streamed as two parts: 1. Initialization chunk containing 'id', 'type', and 'name', with empty 'arguments'. 2. Payload chunk(s) containing 'arguments', with 'id', 'type', and 'name' omitted. - Previously, the proxy was yielding all fields in a single chunk when parsing the custom 'tool_uses' JSON from gpt-5.4, causing strict clients like opencode to fail silently when delegating parallel tasks. - The proxy now splits the extracted JSON into the correct two-chunk sequence, restoring subagent compatibility. --- src/providers/openai.rs | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index ff2de314..ecb6300f 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -819,15 +819,37 @@ impl super::Provider for OpenAIProvider { }; } - // Yield the tool calls - // ... (rest of tool call yielding unchanged) - let deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { + // Yield the tool calls in two chunks to mimic standard streaming behavior + // Chunk 1: Initialization (id, name) + let init_deltas: Vec = embedded_calls.iter().enumerate().map(|(idx, tc)| { crate::models::ToolCallDelta { index: idx as u32, - id: Some(tc.id), + id: Some(tc.id.clone()), call_type: Some("function".to_string()), function: Some(crate::models::FunctionCallDelta { - name: Some(tc.function.name), + name: Some(tc.function.name.clone()), + arguments: Some("".to_string()), + }), + } + }).collect(); + + yield ProviderStreamChunk { + content: String::new(), + reasoning_content: None, + finish_reason: None, + tool_calls: Some(init_deltas), + model: model.clone(), + usage: None, + }; + + // Chunk 2: Payload (arguments) + let arg_deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { + crate::models::ToolCallDelta { + index: idx as u32, + id: None, + call_type: None, + function: Some(crate::models::FunctionCallDelta { + name: None, arguments: Some(tc.function.arguments), }), } @@ -837,7 +859,7 @@ impl super::Provider for OpenAIProvider { content: String::new(), reasoning_content: None, finish_reason: None, - tool_calls: Some(deltas), + tool_calls: Some(arg_deltas), model: model.clone(), usage: None, }; From 4de457cc5ebc86b073748c2f183d23e386582905 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 18:26:27 +0000 Subject: [PATCH 13/14] fix(openai): correctly map tool_call indexes in Responses API stream - The OpenAI Responses API uses 'output_index' to identify items in the response. - If a response starts with text (output_index 0) followed by a tool call (output_index 1), the standard Chat Completions streaming format requires the first tool call to have index 0. - Previously, the proxy was passing output_index (1) as the tool_call index, causing client-side SDKs to fail parsing the stream and silently drop the tool calls. - Implemented a local mapping within the stream to ensure tool_call indexes are always dense and start at 0. --- src/providers/openai.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index ecb6300f..240ea6b1 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -731,6 +731,8 @@ impl super::Provider for OpenAIProvider { let mut es = es; let mut content_buffer = String::new(); let mut has_tool_calls = false; + let mut tool_index_map = std::collections::HashMap::::new(); + let mut next_tool_index = 0u32; while let Some(event) = es.next().await { match event { @@ -765,8 +767,15 @@ impl super::Provider for OpenAIProvider { let call_id = item.get("call_id").and_then(|v| v.as_str()); let name = item.get("name").and_then(|v| v.as_str()); + let out_idx = chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32; + let tc_idx = *tool_index_map.entry(out_idx).or_insert_with(|| { + let i = next_tool_index; + next_tool_index += 1; + i + }); + tool_calls = Some(vec![crate::models::ToolCallDelta { - index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32, + index: tc_idx, id: call_id.map(|s| s.to_string()), call_type: Some("function".to_string()), function: Some(crate::models::FunctionCallDelta { @@ -780,8 +789,16 @@ impl super::Provider for OpenAIProvider { "response.function_call_arguments.delta" => { if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) { has_tool_calls = true; + + let out_idx = chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32; + let tc_idx = *tool_index_map.entry(out_idx).or_insert_with(|| { + let i = next_tool_index; + next_tool_index += 1; + i + }); + tool_calls = Some(vec![crate::models::ToolCallDelta { - index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32, + index: tc_idx, id: None, call_type: None, function: Some(crate::models::FunctionCallDelta { From 57aa0aa70e8d6e44ae30b00ab926660bdeda3517 Mon Sep 17 00:00:00 2001 From: hobokenchicken Date: Wed, 18 Mar 2026 18:31:24 +0000 Subject: [PATCH 14/14] fix(openai): unify tool call indexing for both standard and embedded calls - Sequential next_tool_index is now used for both Responses API 'function_call' events and the proxy's 'tool_uses' JSON extraction. - This ensures tool_calls arrays in the stream always start at index 0 and are dense, even if standard and embedded calls were somehow mixed. - Fixed 'payload_idx' logic to correctly align argument chunks with their initialization chunks. --- src/providers/openai.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/providers/openai.rs b/src/providers/openai.rs index 240ea6b1..c7c796e8 100644 --- a/src/providers/openai.rs +++ b/src/providers/openai.rs @@ -838,9 +838,11 @@ impl super::Provider for OpenAIProvider { // Yield the tool calls in two chunks to mimic standard streaming behavior // Chunk 1: Initialization (id, name) - let init_deltas: Vec = embedded_calls.iter().enumerate().map(|(idx, tc)| { + let init_deltas: Vec = embedded_calls.iter().map(|tc| { + let tc_idx = next_tool_index; + next_tool_index += 1; crate::models::ToolCallDelta { - index: idx as u32, + index: tc_idx, id: Some(tc.id.clone()), call_type: Some("function".to_string()), function: Some(crate::models::FunctionCallDelta { @@ -860,9 +862,13 @@ impl super::Provider for OpenAIProvider { }; // Chunk 2: Payload (arguments) - let arg_deltas: Vec = embedded_calls.into_iter().enumerate().map(|(idx, tc)| { + // Reset temp index for payload chunk + let mut payload_idx = next_tool_index - embedded_calls.len() as u32; + let arg_deltas: Vec = embedded_calls.into_iter().map(|tc| { + let tc_idx = payload_idx; + payload_idx += 1; crate::models::ToolCallDelta { - index: idx as u32, + index: tc_idx, id: None, call_type: None, function: Some(crate::models::FunctionCallDelta {