fix(streaming): emit [DONE] using chain on aggregator stream
This commit is contained in:
@@ -242,52 +242,61 @@ async fn chat_completions(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Create SSE stream from aggregating stream
|
// Create SSE stream from aggregating stream
|
||||||
|
// We'll emit [DONE] after all chunks by checking finish_reason
|
||||||
let stream_id = format!("chatcmpl-{}", Uuid::new_v4());
|
let stream_id = format!("chatcmpl-{}", Uuid::new_v4());
|
||||||
let stream_created = chrono::Utc::now().timestamp() as u64;
|
let stream_created = chrono::Utc::now().timestamp() as u64;
|
||||||
let sse_stream = aggregating_stream.map(move |chunk_result| {
|
|
||||||
match chunk_result {
|
// Track if we've already emitted [DONE] to avoid duplicates
|
||||||
Ok(chunk) => {
|
let done_emitted = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||||
// Convert provider chunk to OpenAI-compatible SSE event
|
let done_emitted_clone = done_emitted.clone();
|
||||||
let response = ChatCompletionStreamResponse {
|
|
||||||
id: stream_id.clone(),
|
let sse_stream = aggregating_stream
|
||||||
object: "chat.completion.chunk".to_string(),
|
.map(move |chunk_result| {
|
||||||
created: stream_created,
|
match chunk_result {
|
||||||
model: chunk.model.clone(),
|
Ok(chunk) => {
|
||||||
choices: vec![ChatStreamChoice {
|
// Convert provider chunk to OpenAI-compatible SSE event
|
||||||
index: 0,
|
let response = ChatCompletionStreamResponse {
|
||||||
delta: ChatStreamDelta {
|
id: stream_id.clone(),
|
||||||
role: None,
|
object: "chat.completion.chunk".to_string(),
|
||||||
content: Some(chunk.content),
|
created: stream_created,
|
||||||
reasoning_content: chunk.reasoning_content,
|
model: chunk.model.clone(),
|
||||||
tool_calls: chunk.tool_calls,
|
choices: vec![ChatStreamChoice {
|
||||||
},
|
index: 0,
|
||||||
finish_reason: chunk.finish_reason,
|
delta: ChatStreamDelta {
|
||||||
}],
|
role: None,
|
||||||
};
|
content: Some(chunk.content),
|
||||||
|
reasoning_content: chunk.reasoning_content,
|
||||||
|
tool_calls: chunk.tool_calls,
|
||||||
|
},
|
||||||
|
finish_reason: chunk.finish_reason,
|
||||||
|
}],
|
||||||
|
};
|
||||||
|
|
||||||
match Event::default().json_data(response) {
|
match Event::default().json_data(response) {
|
||||||
Ok(event) => Ok(event),
|
Ok(event) => Ok(event),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Failed to serialize SSE event: {}", e);
|
warn!("Failed to serialize SSE event: {}", e);
|
||||||
Err(AppError::InternalError("SSE serialization failed".to_string()))
|
Err(AppError::InternalError("SSE serialization failed".to_string()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Error in streaming response: {}", e);
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
})
|
||||||
warn!("Error in streaming response: {}", e);
|
// Add [DONE] when stream ends
|
||||||
Err(e)
|
.chain(futures::stream::once(async move {
|
||||||
|
if done_emitted_clone.swap(true, std::sync::atomic::Ordering::SeqCst) {
|
||||||
|
// Already emitted [DONE] from a previous check, return empty
|
||||||
|
Ok(Event::default())
|
||||||
|
} else {
|
||||||
|
Ok(Event::default().data("[DONE]"))
|
||||||
}
|
}
|
||||||
}
|
}));
|
||||||
});
|
|
||||||
|
|
||||||
// Many OpenAI-compatible clients expect a terminal [DONE] marker.
|
Ok(Sse::new(sse_stream).into_response())
|
||||||
// Emit it when the upstream stream ends to avoid clients treating
|
|
||||||
// the response as incomplete.
|
|
||||||
let done_event = Ok::<Event, AppError>(Event::default().data("[DONE]"));
|
|
||||||
let done_stream = futures::stream::iter(vec![done_event]);
|
|
||||||
let out = sse_stream.chain(done_stream);
|
|
||||||
|
|
||||||
Ok(Sse::new(out).into_response())
|
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// Record provider failure
|
// Record provider failure
|
||||||
|
|||||||
Reference in New Issue
Block a user