@@ -321,7 +321,7 @@ impl ShuffleWriterExec {
321321 let mut stream = plan. execute ( input_partition, context) ?;
322322
323323 if memory_mode {
324- // Use in-memory shuffle storage
324+ // Use in-memory shuffle storage with configurable format
325325 Self :: execute_shuffle_write_memory (
326326 & job_id,
327327 stage_id,
@@ -330,6 +330,7 @@ impl ShuffleWriterExec {
330330 output_partitioning,
331331 write_metrics,
332332 now,
333+ shuffle_format,
333334 )
334335 . await
335336 } else {
@@ -499,6 +500,7 @@ impl ShuffleWriterExec {
499500 }
500501
501502 /// Executes shuffle write to in-memory storage.
503+ #[ allow( clippy:: too_many_arguments) ]
502504 async fn execute_shuffle_write_memory (
503505 job_id : & str ,
504506 stage_id : usize ,
@@ -509,6 +511,7 @@ impl ShuffleWriterExec {
509511 output_partitioning : Option < Partitioning > ,
510512 write_metrics : ShuffleWriteMetrics ,
511513 now : Instant ,
514+ shuffle_format : ShuffleFormat ,
512515 ) -> Result < Vec < ShuffleWritePartition > > {
513516 let shuffle_manager = global_shuffle_manager ( ) ;
514517 let schema = stream. schema ( ) ;
@@ -538,14 +541,15 @@ impl ShuffleWriterExec {
538541 input_partition,
539542 ) ;
540543
541- // Store in the global shuffle manager
542- let data = ShufflePartitionData :: new ( schema. clone ( ) , batches) ;
544+ // Store in the global shuffle manager using the configured format
545+ let data =
546+ Self :: create_partition_data ( schema. clone ( ) , batches, shuffle_format) ?;
543547 shuffle_manager. store_partition ( key. clone ( ) , data) ;
544548
545549 timer. done ( ) ;
546550
547551 info ! (
548- "Executed partition {} to memory in {} seconds. Batches: {}, Rows: {}, Bytes: {}" ,
552+ "Executed partition {} to memory ({shuffle_format}) in {} seconds. Batches: {}, Rows: {}, Bytes: {}" ,
549553 input_partition,
550554 now. elapsed( ) . as_secs( ) ,
551555 num_batches,
@@ -622,12 +626,16 @@ impl ShuffleWriterExec {
622626 for ( i, w) in mem_writers. into_iter ( ) . enumerate ( ) {
623627 if let Some ( w) = w {
624628 debug ! (
625- "Finished writing shuffle partition {} to memory. Batches: {}. Rows: {}. Bytes: {}." ,
629+ "Finished writing shuffle partition {} to memory ({shuffle_format}) . Batches: {}. Rows: {}. Bytes: {}." ,
626630 i, w. num_batches, w. num_rows, w. num_bytes
627631 ) ;
628632
629- // Store in the global shuffle manager
630- let data = ShufflePartitionData :: new ( schema. clone ( ) , w. batches ) ;
633+ // Store in the global shuffle manager using the configured format
634+ let data = Self :: create_partition_data (
635+ schema. clone ( ) ,
636+ w. batches ,
637+ shuffle_format,
638+ ) ?;
631639 shuffle_manager. store_partition ( w. key . clone ( ) , data) ;
632640
633641 part_locs. push ( ShuffleWritePartition {
@@ -647,6 +655,45 @@ impl ShuffleWriterExec {
647655 ) ) ,
648656 }
649657 }
658+
659+ /// Creates partition data in the specified format (Arrow or Vortex).
660+ fn create_partition_data (
661+ schema : SchemaRef ,
662+ batches : Vec < RecordBatch > ,
663+ format : ShuffleFormat ,
664+ ) -> Result < ShufflePartitionData > {
665+ match format {
666+ ShuffleFormat :: ArrowIpc => Ok ( ShufflePartitionData :: new ( schema, batches) ) ,
667+ #[ cfg( feature = "vortex" ) ]
668+ ShuffleFormat :: Vortex => {
669+ use vortex_array:: ArrayRef ;
670+ use vortex_array:: arrow:: FromArrowArray ;
671+
672+ let mut arrays = Vec :: with_capacity ( batches. len ( ) ) ;
673+ let mut total_rows = 0u64 ;
674+ let mut total_bytes = 0u64 ;
675+
676+ for batch in batches {
677+ total_rows += batch. num_rows ( ) as u64 ;
678+ // Convert Arrow RecordBatch to Vortex Array
679+ let vortex_array = ArrayRef :: from_arrow ( & batch, false ) ;
680+ total_bytes += vortex_array. nbytes ( ) ;
681+ arrays. push ( vortex_array) ;
682+ }
683+
684+ Ok ( ShufflePartitionData :: new_vortex (
685+ schema,
686+ arrays,
687+ total_rows,
688+ total_bytes,
689+ ) )
690+ }
691+ #[ cfg( not( feature = "vortex" ) ) ]
692+ ShuffleFormat :: Vortex => Err ( DataFusionError :: NotImplemented (
693+ "Vortex format requires the 'vortex' feature to be enabled" . to_string ( ) ,
694+ ) ) ,
695+ }
696+ }
650697}
651698
652699impl DisplayAs for ShuffleWriterExec {
0 commit comments