@@ -15,6 +15,8 @@ use cudarc::driver::result as cuda_driver;
1515use futures:: future:: BoxFuture ;
1616use vortex:: array:: ArrayRef ;
1717use vortex:: array:: Canonical ;
18+ use vortex:: array:: ExecutionCtx ;
19+ use vortex:: array:: IntoArray ;
1820use vortex:: array:: arrays:: DecimalArray ;
1921use vortex:: array:: arrays:: Dict ;
2022use vortex:: array:: arrays:: DictArray ;
@@ -35,6 +37,7 @@ use vortex::array::arrays::extension::ExtensionArrayExt;
3537use vortex:: array:: arrays:: fixed_size_list:: FixedSizeListArrayExt ;
3638use vortex:: array:: arrays:: fixed_size_list:: FixedSizeListDataParts ;
3739use vortex:: array:: arrays:: list:: ListDataParts ;
40+ use vortex:: array:: arrays:: listview:: ListViewArrayExt ;
3841use vortex:: array:: arrays:: listview:: list_from_list_view;
3942use vortex:: array:: arrays:: primitive:: PrimitiveDataParts ;
4043use vortex:: array:: arrays:: struct_:: StructDataParts ;
@@ -63,10 +66,12 @@ use crate::CudaExecutionCtx;
6366use crate :: arrow:: ARROW_DEVICE_CUDA ;
6467use crate :: arrow:: ArrowArray ;
6568use crate :: arrow:: ArrowDeviceArray ;
69+ use crate :: arrow:: ArrowDeviceArrayWithSchema ;
6670use crate :: arrow:: ExportDeviceArray ;
6771use crate :: arrow:: PrivateData ;
6872use crate :: arrow:: SyncEvent ;
6973use crate :: arrow:: arrow_device_export_dictionary_codes_dtype;
74+ use crate :: arrow:: arrow_schema_for_array;
7075use crate :: arrow:: cuda_decimal_value_type;
7176use crate :: arrow:: list_view:: export_device_list_view;
7277use crate :: cub:: exclusive_sum_i32;
@@ -95,6 +100,92 @@ impl ExportDeviceArray for CanonicalDeviceArrayExport {
95100 reserved : Default :: default ( ) ,
96101 } )
97102 }
103+
104+ async fn export_device_array_with_schema (
105+ & self ,
106+ array : ArrayRef ,
107+ ctx : & mut CudaExecutionCtx ,
108+ ) -> VortexResult < ArrowDeviceArrayWithSchema > {
109+ let array = rebuild_array_for_export_schema ( array, ctx. execution_ctx ( ) ) ?;
110+ let schema = arrow_schema_for_array ( & array, ctx) ?;
111+ let array = self . export_device_array ( array, ctx) . await ?;
112+ Ok ( ArrowDeviceArrayWithSchema { schema, array } )
113+ }
114+ }
115+
116+ /// Rebuild arrays whose exported layout differs from their original layout.
117+ fn rebuild_array_for_export_schema (
118+ array : ArrayRef ,
119+ ctx : & mut ExecutionCtx ,
120+ ) -> VortexResult < ArrayRef > {
121+ let array = match array. try_downcast :: < Dict > ( ) {
122+ Ok ( dict) => {
123+ let parts = dict. into_parts ( ) ;
124+ let values = rebuild_array_for_export_schema ( parts. values , ctx) ?;
125+ return Ok ( DictArray :: try_new ( parts. codes , values) ?. into_array ( ) ) ;
126+ }
127+ Err ( array) => array,
128+ } ;
129+ let array = match array. try_downcast :: < Struct > ( ) {
130+ Ok ( struct_array) => {
131+ let len = struct_array. len ( ) ;
132+ let StructDataParts {
133+ struct_fields,
134+ fields,
135+ validity,
136+ } = struct_array. into_data_parts ( ) ;
137+ let fields = fields
138+ . iter ( )
139+ . map ( |field| rebuild_array_for_export_schema ( field. clone ( ) , ctx) )
140+ . collect :: < VortexResult < Vec < _ > > > ( ) ?;
141+ return Ok (
142+ StructArray :: try_new ( struct_fields. names ( ) . clone ( ) , fields, len, validity) ?
143+ . into_array ( ) ,
144+ ) ;
145+ }
146+ Err ( array) => array,
147+ } ;
148+ let array = match array. try_downcast :: < List > ( ) {
149+ Ok ( list) => {
150+ let ListDataParts {
151+ elements,
152+ offsets,
153+ validity,
154+ ..
155+ } = list. into_data_parts ( ) ;
156+ let elements = rebuild_array_for_export_schema ( elements, ctx) ?;
157+ return Ok ( ListArray :: try_new ( elements, offsets, validity) ?. into_array ( ) ) ;
158+ }
159+ Err ( array) => array,
160+ } ;
161+ let array = match array. try_downcast :: < FixedSizeList > ( ) {
162+ Ok ( fixed_size_list) => {
163+ let len = fixed_size_list. len ( ) ;
164+ let list_size = fixed_size_list. list_size ( ) ;
165+ let FixedSizeListDataParts {
166+ elements, validity, ..
167+ } = fixed_size_list. into_data_parts ( ) ;
168+ let elements = rebuild_array_for_export_schema ( elements, ctx) ?;
169+ return Ok (
170+ FixedSizeListArray :: try_new ( elements, list_size, validity, len) ?. into_array ( ) ,
171+ ) ;
172+ }
173+ Err ( array) => array,
174+ } ;
175+ let array = match array. try_downcast :: < ListView > ( ) {
176+ Ok ( listview)
177+ if listview. as_ref ( ) . is_host ( ) && listview. elements ( ) . as_opt :: < Dict > ( ) . is_some ( ) =>
178+ {
179+ return rebuild_array_for_export_schema (
180+ list_from_list_view ( listview, ctx) ?. into_array ( ) ,
181+ ctx,
182+ ) ;
183+ }
184+ Ok ( listview) => return Ok ( listview. into_array ( ) ) ,
185+ Err ( array) => array,
186+ } ;
187+
188+ Ok ( array)
98189}
99190
100191/// Export arrays whose Arrow layout depends on their concrete children before CUDA
@@ -2139,7 +2230,7 @@ mod tests {
21392230 }
21402231
21412232 #[ crate :: test]
2142- async fn test_export_host_non_contiguous_dictionary_list_view_preserves_dictionary_child ( )
2233+ async fn test_export_host_non_contiguous_dictionary_list_view_schema_matches_rebuilt_child ( )
21432234 -> VortexResult < ( ) > {
21442235 let mut ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
21452236 . vortex_expect ( "failed to create execution context" ) ;
@@ -2165,7 +2256,13 @@ mod tests {
21652256 "" ,
21662257 Field :: new(
21672258 Field :: LIST_FIELD_DEFAULT_NAME ,
2168- DataType :: Dictionary ( Box :: new( DataType :: Int16 ) , Box :: new( DataType :: Int32 ) ) ,
2259+ DataType :: Dictionary (
2260+ Box :: new( DataType :: Int64 ) ,
2261+ Box :: new( DataType :: Dictionary (
2262+ Box :: new( DataType :: Int16 ) ,
2263+ Box :: new( DataType :: Int32 ) ,
2264+ ) ) ,
2265+ ) ,
21692266 true ,
21702267 ) ,
21712268 false ,
@@ -2180,6 +2277,57 @@ mod tests {
21802277 assert ! ( !dict_child. dictionary. is_null( ) ) ;
21812278 assert_eq ! ( dict_child. length, 5 ) ;
21822279 assert_eq ! ( dict_child. n_buffers, 2 ) ;
2280+ let nested_dict = unsafe { & * dict_child. dictionary } ;
2281+ assert ! ( !nested_dict. dictionary. is_null( ) ) ;
2282+
2283+ unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
2284+ Ok ( ( ) )
2285+ }
2286+
2287+ // Regression test: with an average list size >= 128 the host list-view rebuild picks its
2288+ // list-by-list strategy, which may canonicalize Dict elements. The schema must describe the
2289+ // rebuilt child layout.
2290+ #[ crate :: test]
2291+ async fn test_export_host_large_lists_dictionary_list_view_schema_matches_rebuilt_child ( )
2292+ -> VortexResult < ( ) > {
2293+ let mut ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
2294+ . vortex_expect ( "failed to create execution context" ) ;
2295+
2296+ let elements = DictArray :: try_new (
2297+ PrimitiveArray :: from_option_iter (
2298+ ( 0 ..256u32 ) . map ( |i| ( i % 5 != 0 ) . then_some ( ( i % 3 ) as u8 ) ) ,
2299+ )
2300+ . into_array ( ) ,
2301+ PrimitiveArray :: from_iter ( [ 10i32 , 20 , 30 ] ) . into_array ( ) ,
2302+ ) ?
2303+ . into_array ( ) ;
2304+ let array = ListViewArray :: new (
2305+ elements,
2306+ PrimitiveArray :: from_iter ( [ 128i32 , 0 ] ) . into_array ( ) ,
2307+ PrimitiveArray :: from_iter ( [ 128i32 , 128 ] ) . into_array ( ) ,
2308+ Validity :: NonNullable ,
2309+ )
2310+ . into_array ( ) ;
2311+ let mut exported = array. export_device_array_with_schema ( & mut ctx) . await ?;
2312+
2313+ let field = Field :: try_from ( & exported. schema ) ?;
2314+ assert_eq ! (
2315+ field,
2316+ Field :: new_list(
2317+ "" ,
2318+ Field :: new( Field :: LIST_FIELD_DEFAULT_NAME , DataType :: Int32 , true ) ,
2319+ false ,
2320+ )
2321+ ) ;
2322+ assert_eq ! (
2323+ private_data_buffer_i32_values( & exported. array. array, 1 ) ?,
2324+ [ 0 , 128 , 256 ]
2325+ ) ;
2326+ let list_children = unsafe { std:: slice:: from_raw_parts ( exported. array . array . children , 1 ) } ;
2327+ let child = unsafe { & * list_children[ 0 ] } ;
2328+ assert ! ( child. dictionary. is_null( ) ) ;
2329+ assert_eq ! ( child. length, 256 ) ;
2330+ assert_eq ! ( child. n_buffers, 2 ) ;
21832331
21842332 unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
21852333 Ok ( ( ) )
0 commit comments