1- //! Message response processing stage: non-streaming response processing
1+ //! Message response processing stage: streaming and non-streaming response processing
22//!
3- //! Collects the backend response, converts it to an Anthropic `Message`,
4- //! and stores it as FinalResponse::Messages.
5- //! Streaming support will be added in a follow-up PR.
3+ //! - For streaming: Spawns background task and returns SSE response (early exit)
4+ //! - For non-streaming: Collects the backend response, converts it to an Anthropic `Message`,
5+ //! and stores it as FinalResponse::Messages.
6+
7+ use std:: sync:: Arc ;
68
79use async_trait:: async_trait;
810use axum:: response:: Response ;
911use tracing:: error;
1012
11- use crate :: routers:: {
12- error,
13- grpc:: {
14- common:: stages:: PipelineStage ,
15- context:: { FinalResponse , RequestContext } ,
16- regular:: processor,
13+ use crate :: {
14+ core:: AttachedBody ,
15+ routers:: {
16+ error,
17+ grpc:: {
18+ common:: stages:: PipelineStage ,
19+ context:: { FinalResponse , RequestContext } ,
20+ regular:: { processor, streaming} ,
21+ } ,
1722 } ,
1823} ;
1924
20- /// Message response processing stage (non-streaming only)
25+ /// Message response processing stage
2126pub ( crate ) struct MessageResponseProcessingStage {
2227 processor : processor:: ResponseProcessor ,
28+ streaming_processor : Arc < streaming:: StreamingProcessor > ,
2329}
2430
2531impl MessageResponseProcessingStage {
26- pub fn new ( processor : processor:: ResponseProcessor ) -> Self {
27- Self { processor }
32+ pub fn new (
33+ processor : processor:: ResponseProcessor ,
34+ streaming_processor : Arc < streaming:: StreamingProcessor > ,
35+ ) -> Self {
36+ Self {
37+ processor,
38+ streaming_processor,
39+ }
2840 }
2941}
3042
3143#[ async_trait]
3244impl PipelineStage for MessageResponseProcessingStage {
3345 async fn execute ( & self , ctx : & mut RequestContext ) -> Result < Option < Response > , Response > {
46+ let is_streaming = ctx. is_streaming ( ) ;
47+
3448 // Extract execution result
3549 let execution_result = ctx. state . response . execution_result . take ( ) . ok_or_else ( || {
3650 error ! (
@@ -66,6 +80,28 @@ impl PipelineStage for MessageResponseProcessingStage {
6680 )
6781 } ) ?;
6882
83+ if is_streaming {
84+ // Streaming: use StreamingProcessor and return SSE response
85+ let response = self
86+ . streaming_processor
87+ . clone ( )
88+ . process_messages_streaming_response (
89+ execution_result,
90+ ctx. messages_request_arc ( ) ,
91+ dispatch,
92+ tokenizer,
93+ ) ;
94+
95+ // Attach load guards for RAII lifecycle
96+ let response = match ctx. state . load_guards . take ( ) {
97+ Some ( guards) => AttachedBody :: wrap_response ( response, guards) ,
98+ None => response,
99+ } ;
100+
101+ return Ok ( Some ( response) ) ;
102+ }
103+
104+ // Non-streaming: delegate to ResponseProcessor
69105 let messages_request = ctx. messages_request_arc ( ) ;
70106
71107 let stop_decoder = ctx. state . response . stop_decoder . as_mut ( ) . ok_or_else ( || {
@@ -79,7 +115,6 @@ impl PipelineStage for MessageResponseProcessingStage {
79115 )
80116 } ) ?;
81117
82- // Non-streaming: delegate to ResponseProcessor
83118 let response = self
84119 . processor
85120 . process_non_streaming_messages_response (
0 commit comments