From cb5b92155025c5f5c056540c2d4e04286e556332 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 13:14:51 +0000
Subject: [PATCH 01/14] feat(openai): implement tool support for gpt-5.4 via
 Responses API

- Implement polymorphic 'input' structure for /responses endpoint
- Map 'tool' role to 'function_call_output' items
- Handle assistant 'tool_calls' as separate 'function_call' items
- Add synchronous and streaming parsers for function_call items
- Fix 400 Bad Request 'Invalid value: tool' error
---
 src/providers/openai.rs | 214 +++++++++++++++++++++++++++++++++-------
 1 file changed, 177 insertions(+), 37 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 7bc5d86c..5665e6be 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -112,10 +112,57 @@ impl super::Provider for OpenAIProvider {
         let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
         let mut input_parts = Vec::new();
         for m in &messages_json {
-            let mut role = m["role"].as_str().unwrap_or("user").to_string();
-            // Newer models (gpt-5, o1) prefer "developer" over "system"
-            if role == "system" {
-                role = "developer".to_string();
+            let role = m["role"].as_str().unwrap_or("user");
+            
+            if role == "tool" {
+                input_parts.push(serde_json::json!({
+                    "type": "function_call_output",
+                    "call_id": m.get("tool_call_id").and_then(|v| v.as_str()).unwrap_or(""),
+                    "output": m.get("content").and_then(|v| v.as_str()).unwrap_or("")
+                }));
+                continue;
+            }
+
+            if role == "assistant" && m.get("tool_calls").is_some() {
+                 // Push message part if it exists
+                 let content_val = m.get("content").cloned().unwrap_or(serde_json::json!(""));
+                 if !content_val.is_null() && (content_val.is_array() && !content_val.as_array().unwrap().is_empty() || content_val.is_string() && !content_val.as_str().unwrap().is_empty()) {
+                     let mut content = content_val.clone();
+                     if let Some(text) = content.as_str() {
+                         content = serde_json::json!([{ "type": "output_text", "text": text }]);
+                     } else if let Some(arr) = content.as_array_mut() {
+                         for part in arr {
+                             if let Some(obj) = part.as_object_mut() {
+                                 if obj.get("type").and_then(|v| v.as_str()) == Some("text") {
+                                     obj.insert("type".to_string(), serde_json::json!("output_text"));
+                                 }
+                             }
+                         }
+                     }
+                     input_parts.push(serde_json::json!({
+                         "type": "message",
+                         "role": "assistant",
+                         "content": content
+                     }));
+                 }
+                 
+                 // Push tool calls as separate items
+                 if let Some(tcs) = m.get("tool_calls").and_then(|v| v.as_array()) {
+                     for tc in tcs {
+                         input_parts.push(serde_json::json!({
+                             "type": "function_call",
+                             "call_id": tc["id"],
+                             "name": tc["function"]["name"],
+                             "arguments": tc["function"]["arguments"]
+                         }));
+                     }
+                 }
+                 continue;
+            }
+
+            let mut mapped_role = role.to_string();
+            if mapped_role == "system" {
+                mapped_role = "developer".to_string();
             }
             
             let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
@@ -127,12 +174,11 @@ impl super::Provider for OpenAIProvider {
                         if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
                             match t {
                                 "text" => {
-                                    let new_type = if role == "assistant" { "output_text" } else { "input_text" };
+                                    let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" };
                                     part_obj.insert("type".to_string(), serde_json::json!(new_type));
                                 }
                                 "image_url" => {
-                                    // Assistant typically doesn't have image_url in history this way, but for safety:
-                                    let new_type = if role == "assistant" { "output_image" } else { "input_image" };
+                                    let new_type = if mapped_role == "assistant" { "output_image" } else { "input_image" };
                                     part_obj.insert("type".to_string(), serde_json::json!(new_type));
                                     if let Some(img_url) = part_obj.remove("image_url") {
                                         part_obj.insert("image".to_string(), img_url);
@@ -144,12 +190,13 @@ impl super::Provider for OpenAIProvider {
                     }
                 }
             } else if let Some(text) = content.as_str() {
-                let new_type = if role == "assistant" { "output_text" } else { "input_text" };
+                let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" };
                 content = serde_json::json!([{ "type": new_type, "text": text }]);
             }
 
             input_parts.push(serde_json::json!({
-                "role": role,
+                "type": "message",
+                "role": mapped_role,
                 "content": content
             }));
         }
@@ -200,18 +247,43 @@ impl super::Provider for OpenAIProvider {
 
         // Normalize Responses API output into ProviderResponse
         let mut content_text = String::new();
+        let mut tool_calls = Vec::new();
         if let Some(output) = resp_json.get("output").and_then(|o| o.as_array()) {
             for out in output {
-                if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
-                    for item in contents {
-                        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
-                            if !content_text.is_empty() { content_text.push_str("\n"); }
-                            content_text.push_str(text);
-                        } else if let Some(parts) = item.get("parts").and_then(|p| p.as_array()) {
-                            for p in parts {
-                                if let Some(t) = p.as_str() {
+                let item_type = out.get("type").and_then(|v| v.as_str()).unwrap_or("");
+                match item_type {
+                    "message" => {
+                        if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
+                            for item in contents {
+                                if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
                                     if !content_text.is_empty() { content_text.push_str("\n"); }
-                                    content_text.push_str(t);
+                                    content_text.push_str(text);
+                                }
+                            }
+                        }
+                    }
+                    "function_call" => {
+                        let id = out.get("call_id")
+                            .or_else(|| out.get("item_id"))
+                            .or_else(|| out.get("id"))
+                            .and_then(|v| v.as_str())
+                            .unwrap_or("")
+                            .to_string();
+                        let name = out.get("name").and_then(|v| v.as_str()).unwrap_or("").to_string();
+                        let arguments = out.get("arguments").and_then(|v| v.as_str()).unwrap_or("").to_string();
+                        tool_calls.push(crate::models::ToolCall {
+                            id,
+                            call_type: "function".to_string(),
+                            function: crate::models::FunctionCall { name, arguments },
+                        });
+                    }
+                    _ => {
+                        // Fallback for older/nested structure
+                        if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
+                            for item in contents {
+                                if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
+                                    if !content_text.is_empty() { content_text.push_str("\n"); }
+                                    content_text.push_str(text);
                                 }
                             }
                         }
@@ -244,7 +316,7 @@ impl super::Provider for OpenAIProvider {
         Ok(ProviderResponse {
             content: content_text,
             reasoning_content: None,
-            tool_calls: None,
+            tool_calls: if tool_calls.is_empty() { None } else { Some(tool_calls) },
             prompt_tokens,
             completion_tokens,
             reasoning_tokens: 0,
@@ -379,10 +451,57 @@ impl super::Provider for OpenAIProvider {
         let messages_json = helpers::messages_to_openai_json(&request.messages).await?;
         let mut input_parts = Vec::new();
         for m in &messages_json {
-            let mut role = m["role"].as_str().unwrap_or("user").to_string();
-            // Newer models (gpt-5, o1) prefer "developer" over "system"
-            if role == "system" {
-                role = "developer".to_string();
+            let role = m["role"].as_str().unwrap_or("user");
+            
+            if role == "tool" {
+                input_parts.push(serde_json::json!({
+                    "type": "function_call_output",
+                    "call_id": m.get("tool_call_id").and_then(|v| v.as_str()).unwrap_or(""),
+                    "output": m.get("content").and_then(|v| v.as_str()).unwrap_or("")
+                }));
+                continue;
+            }
+
+            if role == "assistant" && m.get("tool_calls").is_some() {
+                 // Push message part if it exists
+                 let content_val = m.get("content").cloned().unwrap_or(serde_json::json!(""));
+                 if !content_val.is_null() && (content_val.is_array() && !content_val.as_array().unwrap().is_empty() || content_val.is_string() && !content_val.as_str().unwrap().is_empty()) {
+                     let mut content = content_val.clone();
+                     if let Some(text) = content.as_str() {
+                         content = serde_json::json!([{ "type": "output_text", "text": text }]);
+                     } else if let Some(arr) = content.as_array_mut() {
+                         for part in arr {
+                             if let Some(obj) = part.as_object_mut() {
+                                 if obj.get("type").and_then(|v| v.as_str()) == Some("text") {
+                                     obj.insert("type".to_string(), serde_json::json!("output_text"));
+                                 }
+                             }
+                         }
+                     }
+                     input_parts.push(serde_json::json!({
+                         "type": "message",
+                         "role": "assistant",
+                         "content": content
+                     }));
+                 }
+                 
+                 // Push tool calls as separate items
+                 if let Some(tcs) = m.get("tool_calls").and_then(|v| v.as_array()) {
+                     for tc in tcs {
+                         input_parts.push(serde_json::json!({
+                             "type": "function_call",
+                             "call_id": tc["id"],
+                             "name": tc["function"]["name"],
+                             "arguments": tc["function"]["arguments"]
+                         }));
+                     }
+                 }
+                 continue;
+            }
+
+            let mut mapped_role = role.to_string();
+            if mapped_role == "system" {
+                mapped_role = "developer".to_string();
             }
             
             let mut content = m.get("content").cloned().unwrap_or(serde_json::json!([]));
@@ -394,12 +513,11 @@ impl super::Provider for OpenAIProvider {
                         if let Some(t) = part_obj.get("type").and_then(|v| v.as_str()) {
                             match t {
                                 "text" => {
-                                    let new_type = if role == "assistant" { "output_text" } else { "input_text" };
+                                    let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" };
                                     part_obj.insert("type".to_string(), serde_json::json!(new_type));
                                 }
                                 "image_url" => {
-                                    // Assistant typically doesn't have image_url in history this way, but for safety:
-                                    let new_type = if role == "assistant" { "output_image" } else { "input_image" };
+                                    let new_type = if mapped_role == "assistant" { "output_image" } else { "input_image" };
                                     part_obj.insert("type".to_string(), serde_json::json!(new_type));
                                     if let Some(img_url) = part_obj.remove("image_url") {
                                         part_obj.insert("image".to_string(), img_url);
@@ -411,12 +529,13 @@ impl super::Provider for OpenAIProvider {
                     }
                 }
             } else if let Some(text) = content.as_str() {
-                let new_type = if role == "assistant" { "output_text" } else { "input_text" };
+                let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" };
                 content = serde_json::json!([{ "type": new_type, "text": text }]);
             }
 
             input_parts.push(serde_json::json!({
-                "role": role,
+                "type": "message",
+                "role": mapped_role,
                 "content": content
             }));
         }
@@ -475,6 +594,7 @@ impl super::Provider for OpenAIProvider {
                             // Responses API specific parsing for streaming
                             let mut content = String::new();
                             let mut finish_reason = None;
+                            let mut tool_calls = None;
                             
                             let event_type = chunk.get("type").and_then(|v| v.as_str()).unwrap_or("");
                             
@@ -484,15 +604,35 @@ impl super::Provider for OpenAIProvider {
                                         content.push_str(delta);
                                     }
                                 }
-                                "response.output_text.done" => {
-                                    if let Some(text) = chunk.get("text").and_then(|v| v.as_str()) {
-                                        // Some implementations send the full text at the end
-                                        // We usually prefer deltas, but if we haven't seen them, this is the fallback.
-                                        // However, if we're already yielding deltas, we might not want this.
-                                        // For now, let's just use it as a signal that we're done.
-                                        finish_reason = Some("stop".to_string());
+                                "response.item.delta" => {
+                                    if let Some(delta) = chunk.get("delta") {
+                                        let t = delta.get("type").and_then(|v| v.as_str()).unwrap_or("");
+                                        if t == "function_call" {
+                                            let call_id = delta.get("call_id")
+                                                .or_else(|| chunk.get("item_id"))
+                                                .and_then(|v| v.as_str());
+                                            let name = delta.get("name").and_then(|v| v.as_str());
+                                            let arguments = delta.get("arguments").and_then(|v| v.as_str());
+                                            
+                                            tool_calls = Some(vec![crate::models::ToolCallDelta {
+                                                index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
+                                                id: call_id.map(|s| s.to_string()),
+                                                call_type: Some("function".to_string()),
+                                                function: Some(crate::models::FunctionCallDelta {
+                                                    name: name.map(|s| s.to_string()),
+                                                    arguments: arguments.map(|s| s.to_string()),
+                                                }),
+                                            }]);
+                                        } else if t == "message" {
+                                            if let Some(text) = delta.get("text").and_then(|v| v.as_str()) {
+                                                content.push_str(text);
+                                            }
+                                        }
                                     }
                                 }
+                                "response.output_text.done" | "response.item.done" => {
+                                    finish_reason = Some("stop".to_string());
+                                }
                                 "response.done" => {
                                     finish_reason = Some("stop".to_string());
                                 }
@@ -514,12 +654,12 @@ impl super::Provider for OpenAIProvider {
                                 }
                             }
 
-                            if !content.is_empty() || finish_reason.is_some() {
+                            if !content.is_empty() || finish_reason.is_some() || tool_calls.is_some() {
                                 yield ProviderStreamChunk {
                                     content,
                                     reasoning_content: None,
                                     finish_reason,
-                                    tool_calls: None,
+                                    tool_calls,
                                     model: model.clone(),
                                     usage: None,
                                 };

From 275ce34d0590d3bf2cb1f599b0d44ce6aabdf695 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 13:51:36 +0000
Subject: [PATCH 02/14] fix(openai): fix missing tools and instructions in
 Responses API

- Add 'tools' and 'tool_choice' parameters to streaming Responses API
- Include 'name' field in message items for Responses API input
- Use string content for text-only messages to improve instruction following
- Fix subagents not triggering and files not being created
---
 src/providers/openai.rs | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 5665e6be..e64d6ad7 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -190,15 +190,20 @@ impl super::Provider for OpenAIProvider {
                     }
                 }
             } else if let Some(text) = content.as_str() {
-                let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" };
-                content = serde_json::json!([{ "type": new_type, "text": text }]);
+                // If it's just a string, send it as a string instead of an array of objects
+                // as it's safer for standard conversational messages.
+                content = serde_json::json!(text);
             }
 
-            input_parts.push(serde_json::json!({
+            let mut msg_item = serde_json::json!({
                 "type": "message",
                 "role": mapped_role,
                 "content": content
-            }));
+            });
+            if let Some(name) = m.get("name") {
+                msg_item["name"] = name.clone();
+            }
+            input_parts.push(msg_item);
         }
 
         let mut body = serde_json::json!({
@@ -223,6 +228,9 @@ impl super::Provider for OpenAIProvider {
         if let Some(tools) = &request.tools {
             body["tools"] = serde_json::json!(tools);
         }
+        if let Some(tool_choice) = &request.tool_choice {
+            body["tool_choice"] = serde_json::json!(tool_choice);
+        }
 
         let resp = self
             .client
@@ -529,15 +537,20 @@ impl super::Provider for OpenAIProvider {
                     }
                 }
             } else if let Some(text) = content.as_str() {
-                let new_type = if mapped_role == "assistant" { "output_text" } else { "input_text" };
-                content = serde_json::json!([{ "type": new_type, "text": text }]);
+                // If it's just a string, send it as a string instead of an array of objects
+                // as it's safer for standard conversational messages.
+                content = serde_json::json!(text);
             }
 
-            input_parts.push(serde_json::json!({
+            let mut msg_item = serde_json::json!({
                 "type": "message",
                 "role": mapped_role,
                 "content": content
-            }));
+            });
+            if let Some(name) = m.get("name") {
+                msg_item["name"] = name.clone();
+            }
+            input_parts.push(msg_item);
         }
 
         let mut body = serde_json::json!({
@@ -560,6 +573,13 @@ impl super::Provider for OpenAIProvider {
             }
         }
 
+        if let Some(tools) = &request.tools {
+            body["tools"] = serde_json::json!(tools);
+        }
+        if let Some(tool_choice) = &request.tool_choice {
+            body["tool_choice"] = serde_json::json!(tool_choice);
+        }
+
         let url = format!("{}/responses", self.config.base_url);
         let api_key = self.api_key.clone();
         let model = request.model.clone();

From 83e0ad02405ac89d09e3252bc7180361e6fb87ee Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 14:00:49 +0000
Subject: [PATCH 03/14] fix(openai): flatten tools and tool_choice for
 Responses API

- Map nested 'function' object to top-level fields
- Support string and object-based 'tool_choice' formats
- Fix 400 Bad Request 'Missing required parameter: tools[0].name'
---
 src/providers/openai.rs | 54 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index e64d6ad7..ed654295 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -226,10 +226,33 @@ impl super::Provider for OpenAIProvider {
         }
 
         if let Some(tools) = &request.tools {
-            body["tools"] = serde_json::json!(tools);
+            let flattened: Vec<serde_json::Value> = tools.iter().map(|t| {
+                let mut obj = serde_json::json!({
+                    "type": t.tool_type,
+                    "name": t.function.name,
+                });
+                if let Some(desc) = &t.function.description {
+                    obj["description"] = serde_json::json!(desc);
+                }
+                if let Some(params) = &t.function.parameters {
+                    obj["parameters"] = params.clone();
+                }
+                obj
+            }).collect();
+            body["tools"] = serde_json::json!(flattened);
         }
         if let Some(tool_choice) = &request.tool_choice {
-            body["tool_choice"] = serde_json::json!(tool_choice);
+            match tool_choice {
+                crate::models::ToolChoice::Mode(mode) => {
+                    body["tool_choice"] = serde_json::json!(mode);
+                }
+                crate::models::ToolChoice::Specific(specific) => {
+                    body["tool_choice"] = serde_json::json!({
+                        "type": specific.choice_type,
+                        "name": specific.function.name,
+                    });
+                }
+            }
         }
 
         let resp = self
@@ -574,10 +597,33 @@ impl super::Provider for OpenAIProvider {
         }
 
         if let Some(tools) = &request.tools {
-            body["tools"] = serde_json::json!(tools);
+            let flattened: Vec<serde_json::Value> = tools.iter().map(|t| {
+                let mut obj = serde_json::json!({
+                    "type": t.tool_type,
+                    "name": t.function.name,
+                });
+                if let Some(desc) = &t.function.description {
+                    obj["description"] = serde_json::json!(desc);
+                }
+                if let Some(params) = &t.function.parameters {
+                    obj["parameters"] = params.clone();
+                }
+                obj
+            }).collect();
+            body["tools"] = serde_json::json!(flattened);
         }
         if let Some(tool_choice) = &request.tool_choice {
-            body["tool_choice"] = serde_json::json!(tool_choice);
+            match tool_choice {
+                crate::models::ToolChoice::Mode(mode) => {
+                    body["tool_choice"] = serde_json::json!(mode);
+                }
+                crate::models::ToolChoice::Specific(specific) => {
+                    body["tool_choice"] = serde_json::json!({
+                        "type": specific.choice_type,
+                        "name": specific.function.name,
+                    });
+                }
+            }
         }
 
         let url = format!("{}/responses", self.config.base_url);

From d0be16d8e3b7a87ff9ae23b38441acbfc6ce940f Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 14:28:38 +0000
Subject: [PATCH 04/14] fix(openai): parse embedded 'tool_uses' JSON for
 gpt-5.4 parallel calls

- Add static parse_tool_uses_json helper to extract embedded tool calls
- Update synchronous and streaming Responses API parsers to detect tool_uses blocks
- Strip tool_uses JSON from content to prevent raw JSON leakage to client
- Resolve lifetime issues by avoiding &self capture in streaming closure
---
 src/providers/openai.rs | 90 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index ed654295..9791f989 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -36,6 +36,57 @@ impl OpenAIProvider {
             pricing: app_config.pricing.openai.clone(),
         })
     }
+
+    /// GPT-5.4 models sometimes emit parallel tool calls as a JSON block starting with 
+    /// '{"tool_uses":' inside a text message instead of discrete function_call items.
+    /// This method attempts to extract and parse such tool calls.
+    pub fn parse_tool_uses_json(text: &str) -> Vec<crate::models::ToolCall> {
+        let mut calls = Vec::new();
+        if let Some(start) = text.find("{\"tool_uses\":") {
+            // Find the end of the JSON block by matching braces
+            let sub = &text[start..];
+            let mut brace_count = 0;
+            let mut end_idx = 0;
+            let mut found = false;
+            
+            for (i, c) in sub.char_indices() {
+                if c == '{' { brace_count += 1; }
+                else if c == '}' {
+                    brace_count -= 1;
+                    if brace_count == 0 {
+                        end_idx = i + 1;
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            if found {
+                let json_str = &sub[..end_idx];
+                if let Ok(val) = serde_json::from_str::<serde_json::Value>(json_str) {
+                    if let Some(uses) = val.get("tool_uses").and_then(|u| u.as_array()) {
+                        for (idx, u) in uses.iter().enumerate() {
+                            let name = u.get("recipient_name")
+                                .and_then(|v| v.as_str())
+                                .unwrap_or("unknown")
+                                // Strip "functions." prefix if present
+                                .replace("functions.", "");
+                            let arguments = u.get("parameters")
+                                .map(|v| v.to_string())
+                                .unwrap_or_else(|| "{}".to_string());
+                            
+                            calls.push(crate::models::ToolCall {
+                                id: format!("call_tu_{}_{}", uuid::Uuid::new_v4().to_string()[..8].to_string(), idx),
+                                call_type: "function".to_string(),
+                                function: crate::models::FunctionCall { name, arguments },
+                            });
+                        }
+                    }
+                }
+            }
+        }
+        calls
+    }
 }
 
 #[async_trait]
@@ -344,6 +395,16 @@ impl super::Provider for OpenAIProvider {
         let completion_tokens = resp_json.get("usage").and_then(|u| u.get("completion_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
         let total_tokens = resp_json.get("usage").and_then(|u| u.get("total_tokens")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
 
+        // GPT-5.4 parallel tool calls might be embedded in content_text as a JSON block
+        let embedded_calls = Self::parse_tool_uses_json(&content_text);
+        if !embedded_calls.is_empty() {
+            // Strip the JSON part from content_text to keep it clean
+            if let Some(start) = content_text.find("{\"tool_uses\":") {
+                content_text = content_text[..start].to_string();
+            }
+            tool_calls.extend(embedded_calls);
+        }
+
         Ok(ProviderResponse {
             content: content_text,
             reasoning_content: None,
@@ -720,6 +781,35 @@ impl super::Provider for OpenAIProvider {
                                 }
                             }
 
+                            // GPT-5.4 parallel tool calls might be embedded in content as a JSON block
+                            let embedded_calls = Self::parse_tool_uses_json(&content);
+
+                            if !embedded_calls.is_empty() {
+                                // Strip the JSON part from content to keep it clean
+                                if let Some(start) = content.find("{\"tool_uses\":") {
+                                    content = content[..start].to_string();
+                                }
+                                
+                                // Convert ToolCall to ToolCallDelta for streaming
+                                let deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
+                                    crate::models::ToolCallDelta {
+                                        index: idx as u32,
+                                        id: Some(tc.id),
+                                        call_type: Some("function".to_string()),
+                                        function: Some(crate::models::FunctionCallDelta {
+                                            name: Some(tc.function.name),
+                                            arguments: Some(tc.function.arguments),
+                                        }),
+                                    }
+                                }).collect();
+                                
+                                if let Some(ref mut existing) = tool_calls {
+                                    existing.extend(deltas);
+                                } else {
+                                    tool_calls = Some(deltas);
+                                }
+                            }
+
                             if !content.is_empty() || finish_reason.is_some() || tool_calls.is_some() {
                                 yield ProviderStreamChunk {
                                     content,

From 2e4318d84b2e5ed574a767241f25f6568f1e78fa Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 15:04:15 +0000
Subject: [PATCH 05/14] fix(openai): improve gpt-5.4 parallel tool call
 intercepting

- Implement cross-delta content buffering in streaming Responses API
- Wait for full 'tool_uses' JSON block before yielding to client
- Handle 'to=multi_tool_use.parallel' preamble by buffering
- Fix stream error probe to not request a new stream
- Remove raw JSON leakage from streaming content
---
 src/providers/openai.rs | 138 ++++++++++++++++++++++++++--------------
 1 file changed, 89 insertions(+), 49 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 9791f989..19c8c81c 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -704,6 +704,8 @@ impl super::Provider for OpenAIProvider {
 
         let stream = async_stream::try_stream! {
             let mut es = es;
+            let mut content_buffer = String::new();
+            
             while let Some(event) = es.next().await {
                 match event {
                     Ok(reqwest_eventsource::Event::Message(msg)) => {
@@ -719,7 +721,6 @@ impl super::Provider for OpenAIProvider {
                             yield p_chunk?;
                         } else {
                             // Responses API specific parsing for streaming
-                            let mut content = String::new();
                             let mut finish_reason = None;
                             let mut tool_calls = None;
                             
@@ -728,7 +729,7 @@ impl super::Provider for OpenAIProvider {
                             match event_type {
                                 "response.output_text.delta" => {
                                     if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
-                                        content.push_str(delta);
+                                        content_buffer.push_str(delta);
                                     }
                                 }
                                 "response.item.delta" => {
@@ -752,67 +753,101 @@ impl super::Provider for OpenAIProvider {
                                             }]);
                                         } else if t == "message" {
                                             if let Some(text) = delta.get("text").and_then(|v| v.as_str()) {
-                                                content.push_str(text);
+                                                content_buffer.push_str(text);
                                             }
                                         }
                                     }
                                 }
-                                "response.output_text.done" | "response.item.done" => {
+                                "response.output_text.done" | "response.item.done" | "response.done" => {
                                     finish_reason = Some("stop".to_string());
                                 }
-                                "response.done" => {
-                                    finish_reason = Some("stop".to_string());
-                                }
-                                _ => {
-                                    // Fallback to older nested structure if present
-                                    if let Some(output) = chunk.get("output").and_then(|o| o.as_array()) {
-                                        for out in output {
-                                            if let Some(contents) = out.get("content").and_then(|c| c.as_array()) {
-                                                for item in contents {
-                                                    if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
-                                                        content.push_str(text);
-                                                    } else if let Some(delta) = item.get("delta").and_then(|d| d.get("text")).and_then(|t| t.as_str()) {
-                                                        content.push_str(delta);
+                                _ => {}
+                            }
+
+                            // Process content_buffer to extract embedded tool calls or yield text
+                            if !content_buffer.is_empty() {
+                                // If we see the start of a tool call block, we wait for the full block
+                                if content_buffer.contains("{\"tool_uses\":") {
+                                    let embedded_calls = Self::parse_tool_uses_json(&content_buffer);
+                                    if !embedded_calls.is_empty() {
+                                        if let Some(start) = content_buffer.find("{\"tool_uses\":") {
+                                            // Yield text before the JSON block
+                                            let preamble = content_buffer[..start].to_string();
+                                            if !preamble.is_empty() {
+                                                yield ProviderStreamChunk {
+                                                    content: preamble,
+                                                    reasoning_content: None,
+                                                    finish_reason: None,
+                                                    tool_calls: None,
+                                                    model: model.clone(),
+                                                    usage: None,
+                                                };
+                                            }
+                                            
+                                            // Yield the tool calls
+                                            let deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
+                                                crate::models::ToolCallDelta {
+                                                    index: idx as u32,
+                                                    id: Some(tc.id),
+                                                    call_type: Some("function".to_string()),
+                                                    function: Some(crate::models::FunctionCallDelta {
+                                                        name: Some(tc.function.name),
+                                                        arguments: Some(tc.function.arguments),
+                                                    }),
+                                                }
+                                            }).collect();
+                                            
+                                            yield ProviderStreamChunk {
+                                                content: String::new(),
+                                                reasoning_content: None,
+                                                finish_reason: None,
+                                                tool_calls: Some(deltas),
+                                                model: model.clone(),
+                                                usage: None,
+                                            };
+                                            
+                                            // Remove the processed part from buffer
+                                            // We need to find the end index correctly
+                                            let sub = &content_buffer[start..];
+                                            let mut brace_count = 0;
+                                            let mut end_idx = 0;
+                                            for (i, c) in sub.char_indices() {
+                                                if c == '{' { brace_count += 1; }
+                                                else if c == '}' {
+                                                    brace_count -= 1;
+                                                    if brace_count == 0 {
+                                                        end_idx = start + i + 1;
+                                                        break;
                                                     }
                                                 }
                                             }
+                                            if end_idx > 0 {
+                                                content_buffer = content_buffer[end_idx..].to_string();
+                                            } else {
+                                                content_buffer.clear();
+                                            }
                                         }
                                     }
-                                }
-                            }
-
-                            // GPT-5.4 parallel tool calls might be embedded in content as a JSON block
-                            let embedded_calls = Self::parse_tool_uses_json(&content);
-
-                            if !embedded_calls.is_empty() {
-                                // Strip the JSON part from content to keep it clean
-                                if let Some(start) = content.find("{\"tool_uses\":") {
-                                    content = content[..start].to_string();
-                                }
-                                
-                                // Convert ToolCall to ToolCallDelta for streaming
-                                let deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
-                                    crate::models::ToolCallDelta {
-                                        index: idx as u32,
-                                        id: Some(tc.id),
-                                        call_type: Some("function".to_string()),
-                                        function: Some(crate::models::FunctionCallDelta {
-                                            name: Some(tc.function.name),
-                                            arguments: Some(tc.function.arguments),
-                                        }),
-                                    }
-                                }).collect();
-                                
-                                if let Some(ref mut existing) = tool_calls {
-                                    existing.extend(deltas);
+                                    // If we have "{"tool_uses":" but no full block yet, we just wait (don't yield)
+                                } else if content_buffer.contains("to=multi_tool_use.parallel") {
+                                    // Wait for the JSON block that usually follows
                                 } else {
-                                    tool_calls = Some(deltas);
+                                    // Standard text, yield and clear buffer
+                                    let content = std::mem::take(&mut content_buffer);
+                                    yield ProviderStreamChunk {
+                                        content,
+                                        reasoning_content: None,
+                                        finish_reason: None,
+                                        tool_calls: None,
+                                        model: model.clone(),
+                                        usage: None,
+                                    };
                                 }
                             }
 
-                            if !content.is_empty() || finish_reason.is_some() || tool_calls.is_some() {
+                            if finish_reason.is_some() || tool_calls.is_some() {
                                 yield ProviderStreamChunk {
-                                    content,
+                                    content: String::new(),
                                     reasoning_content: None,
                                     finish_reason,
                                     tool_calls,
@@ -825,11 +860,16 @@ impl super::Provider for OpenAIProvider {
                     Ok(_) => continue,
                     Err(e) => {
                          // Attempt to probe for the actual error body
+                        let mut probe_body_no_stream = probe_body.clone();
+                        if let Some(obj) = probe_body_no_stream.as_object_mut() {
+                            obj.remove("stream");
+                        }
+
                         let probe_resp = probe_client
                             .post(&url)
                             .header("Authorization", format!("Bearer {}", api_key))
-                            .header("Accept", "application/json") // Ask for JSON during probe
-                            .json(&probe_body)
+                            .header("Accept", "application/json")
+                            .json(&probe_body_no_stream)
                             .send()
                             .await;
                         

From 441270317c5fafbdc3b7a9a3457a916bc332dbe4 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 15:07:17 +0000
Subject: [PATCH 06/14] fix(openai): strip internal metadata from gpt-5.4
 responses

- Add strip_internal_metadata helper to remove prefixes like 'to=multi_tool_use.parallel'
- Clean up Thai text preambles reported in the journal
- Apply metadata stripping to both synchronous and streaming response paths
- Improve visual quality of proxied model responses
---
 src/providers/openai.rs | 49 ++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 19c8c81c..88c841a5 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -43,6 +43,7 @@ impl OpenAIProvider {
     pub fn parse_tool_uses_json(text: &str) -> Vec<crate::models::ToolCall> {
         let mut calls = Vec::new();
         if let Some(start) = text.find("{\"tool_uses\":") {
+            // ... (rest of method unchanged)
             // Find the end of the JSON block by matching braces
             let sub = &text[start..];
             let mut brace_count = 0;
@@ -87,6 +88,27 @@ impl OpenAIProvider {
         }
         calls
     }
+
+    /// Strips internal metadata prefixes like 'to=multi_tool_use.parallel' from model responses.
+    pub fn strip_internal_metadata(text: &str) -> String {
+        let mut result = text.to_string();
+        
+        // Patterns to strip
+        let patterns = [
+            "to=multi_tool_use.parallel",
+            "to=functions.multi_tool_use",
+            "ส่งเงินบาทไทยjson", // User reported Thai text preamble
+        ];
+
+        for p in patterns {
+            if let Some(start) = result.find(p) {
+                // Remove the pattern and any whitespace around it
+                result.replace_range(start..start + p.len(), "");
+            }
+        }
+
+        result.trim().to_string()
+    }
 }
 
 #[async_trait]
@@ -404,6 +426,8 @@ impl super::Provider for OpenAIProvider {
             }
             tool_calls.extend(embedded_calls);
         }
+        
+        content_text = Self::strip_internal_metadata(&content_text);
 
         Ok(ProviderResponse {
             content: content_text,
@@ -773,9 +797,10 @@ impl super::Provider for OpenAIProvider {
                                         if let Some(start) = content_buffer.find("{\"tool_uses\":") {
                                             // Yield text before the JSON block
                                             let preamble = content_buffer[..start].to_string();
-                                            if !preamble.is_empty() {
+                                            let stripped_preamble = Self::strip_internal_metadata(&preamble);
+                                            if !stripped_preamble.is_empty() {
                                                 yield ProviderStreamChunk {
-                                                    content: preamble,
+                                                    content: stripped_preamble,
                                                     reasoning_content: None,
                                                     finish_reason: None,
                                                     tool_calls: None,
@@ -785,6 +810,7 @@ impl super::Provider for OpenAIProvider {
                                             }
                                             
                                             // Yield the tool calls
+                                            // ... (rest of tool call yielding unchanged)
                                             let deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
                                                 crate::models::ToolCallDelta {
                                                     index: idx as u32,
@@ -834,14 +860,17 @@ impl super::Provider for OpenAIProvider {
                                 } else {
                                     // Standard text, yield and clear buffer
                                     let content = std::mem::take(&mut content_buffer);
-                                    yield ProviderStreamChunk {
-                                        content,
-                                        reasoning_content: None,
-                                        finish_reason: None,
-                                        tool_calls: None,
-                                        model: model.clone(),
-                                        usage: None,
-                                    };
+                                    let stripped_content = Self::strip_internal_metadata(&content);
+                                    if !stripped_content.is_empty() {
+                                        yield ProviderStreamChunk {
+                                            content: stripped_content,
+                                            reasoning_content: None,
+                                            finish_reason: None,
+                                            tool_calls: None,
+                                            model: model.clone(),
+                                            usage: None,
+                                        };
+                                    }
                                 }
                             }
 

From cb619f92866128fa5b1820c4792b28cd7645d990 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 15:17:56 +0000
Subject: [PATCH 07/14] fix(openai): improve Responses API stream robustness
 and diagnostics

- Implement final buffer flush in streaming path to prevent data loss
- Increase probe response body logging to 500 characters
- Ensure internal metadata is stripped even on final flush
- Fix potential hang when stream ends without explicit [DONE] event
---
 src/providers/openai.rs | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 88c841a5..9f7708ec 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -907,8 +907,9 @@ impl super::Provider for OpenAIProvider {
                                 let status = r.status();
                                 let body = r.text().await.unwrap_or_default();
                                 if status.is_success() {
-                                    tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", body);
-                                    Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", body)))?;
+                                    let preview = if body.len() > 500 { format!("{}...", &body[..500]) } else { body.clone() };
+                                    tracing::warn!("Responses stream ended prematurely but probe returned 200 OK. Body: {}", preview);
+                                    Err(AppError::ProviderError(format!("Responses stream ended (server sent 200 OK with body: {})", preview)))?;
                                 } else {
                                     tracing::error!("OpenAI Responses Stream Error Probe ({}): {}", status, body);
                                     Err(AppError::ProviderError(format!("OpenAI Responses API error ({}): {}", status, body)))?;
@@ -922,6 +923,21 @@ impl super::Provider for OpenAIProvider {
                     }
                 }
             }
+
+            // Final flush of content_buffer if not empty
+            if !content_buffer.is_empty() {
+                let stripped = Self::strip_internal_metadata(&content_buffer);
+                if !stripped.is_empty() {
+                    yield ProviderStreamChunk {
+                        content: stripped,
+                        reasoning_content: None,
+                        finish_reason: None,
+                        tool_calls: None,
+                        model: model.clone(),
+                        usage: None,
+                    };
+                }
+            }
         };
 
         Ok(Box::pin(stream))

From 7c2a317c015a6e364e8237bb71ad7347ccfc7b56 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 15:32:08 +0000
Subject: [PATCH 08/14] fix(openai): add missing stream parameter for Responses
 API

- The OpenAI Responses API actually requires the 'stream: true'
parameter in the JSON body, contrary to some documentation summaries.
- Omitting it caused the API to return a full application/json
response instead of SSE text/event-stream, leading to stream failures
and probe warnings in the proxy logs.
---
 src/providers/openai.rs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 9f7708ec..f554f424 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -664,7 +664,6 @@ impl super::Provider for OpenAIProvider {
         let mut body = serde_json::json!({
             "model": request.model,
             "input": input_parts,
-            "stream": true,
         });
 
         // Add standard parameters
@@ -711,6 +710,7 @@ impl super::Provider for OpenAIProvider {
             }
         }
 
+        body["stream"] = serde_json::json!(true);
         let url = format!("{}/responses", self.config.base_url);
         let api_key = self.api_key.clone();
         let model = request.model.clone();
@@ -889,16 +889,11 @@ impl super::Provider for OpenAIProvider {
                     Ok(_) => continue,
                     Err(e) => {
                          // Attempt to probe for the actual error body
-                        let mut probe_body_no_stream = probe_body.clone();
-                        if let Some(obj) = probe_body_no_stream.as_object_mut() {
-                            obj.remove("stream");
-                        }
-
                         let probe_resp = probe_client
                             .post(&url)
                             .header("Authorization", format!("Bearer {}", api_key))
                             .header("Accept", "application/json")
-                            .json(&probe_body_no_stream)
+                            .json(&probe_body)
                             .send()
                             .await;
                         

From 24a898c9a7286f2d97fd6fff9811f3ce06e6762b Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 15:39:18 +0000
Subject: [PATCH 09/14] fix(openai): gracefully handle stream endings

- The Responses API ends streams without a final '[DONE]' message.
- This causes reqwest_eventsource to return Error::StreamEnded.
- Previously, this was treated as a premature termination, triggering an error probe.
- We now explicitly match and break on Err(StreamEnded) for normal completion.
---
 src/providers/openai.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index f554f424..d1cc139f 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -524,6 +524,7 @@ impl super::Provider for OpenAIProvider {
                         }
                     }
                     Ok(_) => continue,
+                    Err(reqwest_eventsource::Error::StreamEnded) => break,
                     Err(e) => {
                         // Attempt to probe for the actual error body
                         let probe_resp = probe_client
@@ -887,6 +888,7 @@ impl super::Provider for OpenAIProvider {
                         }
                     }
                     Ok(_) => continue,
+                    Err(reqwest_eventsource::Error::StreamEnded) => break,
                     Err(e) => {
                          // Attempt to probe for the actual error body
                         let probe_resp = probe_client

From 79dc8fe409aa1e02af669f4d4ec5397fc17f654c Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 16:13:13 +0000
Subject: [PATCH 10/14] fix(openai): correctly parse Responses API tool call
 events

- The Responses API does not use 'response.item.delta' for tool calls.
- It uses 'response.output_item.added' to initialize the function call.
- It uses 'response.function_call_arguments.delta' for the payload stream.
- Updated the streaming parser to catch these events and correctly yield ToolCallDelta objects.
- This restores proper streaming of tool calls back to the client.
---
 src/providers/openai.rs | 43 ++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index d1cc139f..20b2ae7f 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -757,15 +757,11 @@ impl super::Provider for OpenAIProvider {
                                         content_buffer.push_str(delta);
                                     }
                                 }
-                                "response.item.delta" => {
-                                    if let Some(delta) = chunk.get("delta") {
-                                        let t = delta.get("type").and_then(|v| v.as_str()).unwrap_or("");
-                                        if t == "function_call" {
-                                            let call_id = delta.get("call_id")
-                                                .or_else(|| chunk.get("item_id"))
-                                                .and_then(|v| v.as_str());
-                                            let name = delta.get("name").and_then(|v| v.as_str());
-                                            let arguments = delta.get("arguments").and_then(|v| v.as_str());
+                                "response.output_item.added" => {
+                                    if let Some(item) = chunk.get("item") {
+                                        if item.get("type").and_then(|v| v.as_str()) == Some("function_call") {
+                                            let call_id = item.get("call_id").and_then(|v| v.as_str());
+                                            let name = item.get("name").and_then(|v| v.as_str());
                                             
                                             tool_calls = Some(vec![crate::models::ToolCallDelta {
                                                 index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
@@ -773,17 +769,26 @@ impl super::Provider for OpenAIProvider {
                                                 call_type: Some("function".to_string()),
                                                 function: Some(crate::models::FunctionCallDelta {
                                                     name: name.map(|s| s.to_string()),
-                                                    arguments: arguments.map(|s| s.to_string()),
+                                                    arguments: Some("".to_string()), // Start with empty arguments
                                                 }),
                                             }]);
-                                        } else if t == "message" {
-                                            if let Some(text) = delta.get("text").and_then(|v| v.as_str()) {
-                                                content_buffer.push_str(text);
-                                            }
                                         }
                                     }
                                 }
-                                "response.output_text.done" | "response.item.done" | "response.done" => {
+                                "response.function_call_arguments.delta" => {
+                                    if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
+                                        tool_calls = Some(vec![crate::models::ToolCallDelta {
+                                            index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
+                                            id: None,
+                                            call_type: None,
+                                            function: Some(crate::models::FunctionCallDelta {
+                                                name: None,
+                                                arguments: Some(delta.to_string()),
+                                            }),
+                                        }]);
+                                    }
+                                }
+                                "response.output_text.done" | "response.item.done" | "response.completed" => {
                                     finish_reason = Some("stop".to_string());
                                 }
                                 _ => {}
@@ -890,12 +895,18 @@ impl super::Provider for OpenAIProvider {
                     Ok(_) => continue,
                     Err(reqwest_eventsource::Error::StreamEnded) => break,
                     Err(e) => {
+                        tracing::error!("Responses stream encountered an error: {}", e);
                          // Attempt to probe for the actual error body
+                        let mut probe_body_no_stream = probe_body.clone();
+                        if let Some(obj) = probe_body_no_stream.as_object_mut() {
+                            obj.remove("stream");
+                        }
+
                         let probe_resp = probe_client
                             .post(&url)
                             .header("Authorization", format!("Bearer {}", api_key))
                             .header("Accept", "application/json")
-                            .json(&probe_body)
+                            .json(&probe_body_no_stream)
                             .send()
                             .await;
                         

From 1cac45502a4a972ae0032bb97d247a0978e45669 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 17:48:55 +0000
Subject: [PATCH 11/14] fix(openai): fix stream whitespace loss and
 finish_reason for gpt-5.4

- Remove overzealous .trim() in strip_internal_metadata which destroyed whitespace between text stream chunks, causing client hangs
- Fix finish_reason logic to only yield once at the end of the stream
- Correctly yield finish_reason: 'tool_calls' instead of 'stop' when tool calls are generated
---
 src/providers/openai.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 20b2ae7f..ff2de314 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -102,12 +102,12 @@ impl OpenAIProvider {
 
         for p in patterns {
             if let Some(start) = result.find(p) {
-                // Remove the pattern and any whitespace around it
+                // Remove the pattern
                 result.replace_range(start..start + p.len(), "");
             }
         }
 
-        result.trim().to_string()
+        result
     }
 }
 
@@ -730,6 +730,7 @@ impl super::Provider for OpenAIProvider {
         let stream = async_stream::try_stream! {
             let mut es = es;
             let mut content_buffer = String::new();
+            let mut has_tool_calls = false;
             
             while let Some(event) = es.next().await {
                 match event {
@@ -760,6 +761,7 @@ impl super::Provider for OpenAIProvider {
                                 "response.output_item.added" => {
                                     if let Some(item) = chunk.get("item") {
                                         if item.get("type").and_then(|v| v.as_str()) == Some("function_call") {
+                                            has_tool_calls = true;
                                             let call_id = item.get("call_id").and_then(|v| v.as_str());
                                             let name = item.get("name").and_then(|v| v.as_str());
                                             
@@ -777,6 +779,7 @@ impl super::Provider for OpenAIProvider {
                                 }
                                 "response.function_call_arguments.delta" => {
                                     if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
+                                        has_tool_calls = true;
                                         tool_calls = Some(vec![crate::models::ToolCallDelta {
                                             index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
                                             id: None,
@@ -788,8 +791,8 @@ impl super::Provider for OpenAIProvider {
                                         }]);
                                     }
                                 }
-                                "response.output_text.done" | "response.item.done" | "response.completed" => {
-                                    finish_reason = Some("stop".to_string());
+                                "response.completed" => {
+                                    finish_reason = Some(if has_tool_calls { "tool_calls".to_string() } else { "stop".to_string() });
                                 }
                                 _ => {}
                             }
@@ -800,6 +803,7 @@ impl super::Provider for OpenAIProvider {
                                 if content_buffer.contains("{\"tool_uses\":") {
                                     let embedded_calls = Self::parse_tool_uses_json(&content_buffer);
                                     if !embedded_calls.is_empty() {
+                                        has_tool_calls = true;
                                         if let Some(start) = content_buffer.find("{\"tool_uses\":") {
                                             // Yield text before the JSON block
                                             let preamble = content_buffer[..start].to_string();

From 66e8b114b9716ac564afcbe8074e6119b3b1605e Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 18:05:37 +0000
Subject: [PATCH 12/14] fix(openai): split embedded tool_calls into standard
 chunk format

- Standard OpenAI clients expect tool_calls to be streamed as two parts:
  1. Initialization chunk containing 'id', 'type', and 'name', with empty 'arguments'.
  2. Payload chunk(s) containing 'arguments', with 'id', 'type', and 'name' omitted.
- Previously, the proxy was yielding all fields in a single chunk when parsing the custom 'tool_uses' JSON from gpt-5.4, causing strict clients like opencode to fail silently when delegating parallel tasks.
- The proxy now splits the extracted JSON into the correct two-chunk sequence, restoring subagent compatibility.
---
 src/providers/openai.rs | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index ff2de314..ecb6300f 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -819,15 +819,37 @@ impl super::Provider for OpenAIProvider {
                                                 };
                                             }
                                             
-                                            // Yield the tool calls
-                                            // ... (rest of tool call yielding unchanged)
-                                            let deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
+                                            // Yield the tool calls in two chunks to mimic standard streaming behavior
+                                            // Chunk 1: Initialization (id, name)
+                                            let init_deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.iter().enumerate().map(|(idx, tc)| {
                                                 crate::models::ToolCallDelta {
                                                     index: idx as u32,
-                                                    id: Some(tc.id),
+                                                    id: Some(tc.id.clone()),
                                                     call_type: Some("function".to_string()),
                                                     function: Some(crate::models::FunctionCallDelta {
-                                                        name: Some(tc.function.name),
+                                                        name: Some(tc.function.name.clone()),
+                                                        arguments: Some("".to_string()),
+                                                    }),
+                                                }
+                                            }).collect();
+                                            
+                                            yield ProviderStreamChunk {
+                                                content: String::new(),
+                                                reasoning_content: None,
+                                                finish_reason: None,
+                                                tool_calls: Some(init_deltas),
+                                                model: model.clone(),
+                                                usage: None,
+                                            };
+
+                                            // Chunk 2: Payload (arguments)
+                                            let arg_deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
+                                                crate::models::ToolCallDelta {
+                                                    index: idx as u32,
+                                                    id: None,
+                                                    call_type: None,
+                                                    function: Some(crate::models::FunctionCallDelta {
+                                                        name: None,
                                                         arguments: Some(tc.function.arguments),
                                                     }),
                                                 }
@@ -837,7 +859,7 @@ impl super::Provider for OpenAIProvider {
                                                 content: String::new(),
                                                 reasoning_content: None,
                                                 finish_reason: None,
-                                                tool_calls: Some(deltas),
+                                                tool_calls: Some(arg_deltas),
                                                 model: model.clone(),
                                                 usage: None,
                                             };

From 4de457cc5ebc86b073748c2f183d23e386582905 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 18:26:27 +0000
Subject: [PATCH 13/14] fix(openai): correctly map tool_call indexes in
 Responses API stream

- The OpenAI Responses API uses 'output_index' to identify items in the response.
- If a response starts with text (output_index 0) followed by a tool call (output_index 1), the standard Chat Completions streaming format requires the first tool call to have index 0.
- Previously, the proxy was passing output_index (1) as the tool_call index, causing client-side SDKs to fail parsing the stream and silently drop the tool calls.
- Implemented a local mapping within the stream to ensure tool_call indexes are always dense and start at 0.
---
 src/providers/openai.rs | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index ecb6300f..240ea6b1 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -731,6 +731,8 @@ impl super::Provider for OpenAIProvider {
             let mut es = es;
             let mut content_buffer = String::new();
             let mut has_tool_calls = false;
+            let mut tool_index_map = std::collections::HashMap::<u32, u32>::new();
+            let mut next_tool_index = 0u32;
             
             while let Some(event) = es.next().await {
                 match event {
@@ -765,8 +767,15 @@ impl super::Provider for OpenAIProvider {
                                             let call_id = item.get("call_id").and_then(|v| v.as_str());
                                             let name = item.get("name").and_then(|v| v.as_str());
                                             
+                                            let out_idx = chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+                                            let tc_idx = *tool_index_map.entry(out_idx).or_insert_with(|| {
+                                                let i = next_tool_index;
+                                                next_tool_index += 1;
+                                                i
+                                            });
+
                                             tool_calls = Some(vec![crate::models::ToolCallDelta {
-                                                index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
+                                                index: tc_idx,
                                                 id: call_id.map(|s| s.to_string()),
                                                 call_type: Some("function".to_string()),
                                                 function: Some(crate::models::FunctionCallDelta {
@@ -780,8 +789,16 @@ impl super::Provider for OpenAIProvider {
                                 "response.function_call_arguments.delta" => {
                                     if let Some(delta) = chunk.get("delta").and_then(|v| v.as_str()) {
                                         has_tool_calls = true;
+                                        
+                                        let out_idx = chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+                                        let tc_idx = *tool_index_map.entry(out_idx).or_insert_with(|| {
+                                            let i = next_tool_index;
+                                            next_tool_index += 1;
+                                            i
+                                        });
+
                                         tool_calls = Some(vec![crate::models::ToolCallDelta {
-                                            index: chunk.get("output_index").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
+                                            index: tc_idx,
                                             id: None,
                                             call_type: None,
                                             function: Some(crate::models::FunctionCallDelta {

From 57aa0aa70e8d6e44ae30b00ab926660bdeda3517 Mon Sep 17 00:00:00 2001
From: hobokenchicken <dustin@dustin.coffee>
Date: Wed, 18 Mar 2026 18:31:24 +0000
Subject: [PATCH 14/14] fix(openai): unify tool call indexing for both standard
 and embedded calls

- Sequential next_tool_index is now used for both Responses API 'function_call' events and the proxy's 'tool_uses' JSON extraction.
- This ensures tool_calls arrays in the stream always start at index 0 and are dense, even if standard and embedded calls were somehow mixed.
- Fixed 'payload_idx' logic to correctly align argument chunks with their initialization chunks.
---
 src/providers/openai.rs | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/providers/openai.rs b/src/providers/openai.rs
index 240ea6b1..c7c796e8 100644
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@@ -838,9 +838,11 @@ impl super::Provider for OpenAIProvider {
                                             
                                             // Yield the tool calls in two chunks to mimic standard streaming behavior
                                             // Chunk 1: Initialization (id, name)
-                                            let init_deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.iter().enumerate().map(|(idx, tc)| {
+                                            let init_deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.iter().map(|tc| {
+                                                let tc_idx = next_tool_index;
+                                                next_tool_index += 1;
                                                 crate::models::ToolCallDelta {
-                                                    index: idx as u32,
+                                                    index: tc_idx,
                                                     id: Some(tc.id.clone()),
                                                     call_type: Some("function".to_string()),
                                                     function: Some(crate::models::FunctionCallDelta {
@@ -860,9 +862,13 @@ impl super::Provider for OpenAIProvider {
                                             };
 
                                             // Chunk 2: Payload (arguments)
-                                            let arg_deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().enumerate().map(|(idx, tc)| {
+                                            // Reset temp index for payload chunk
+                                            let mut payload_idx = next_tool_index - embedded_calls.len() as u32;
+                                            let arg_deltas: Vec<crate::models::ToolCallDelta> = embedded_calls.into_iter().map(|tc| {
+                                                let tc_idx = payload_idx;
+                                                payload_idx += 1;
                                                 crate::models::ToolCallDelta {
-                                                    index: idx as u32,
+                                                    index: tc_idx,
                                                     id: None,
                                                     call_type: None,
                                                     function: Some(crate::models::FunctionCallDelta {