@@ -16,6 +16,8 @@ use futures::future::BoxFuture;
1616use futures:: future:: join;
1717use vortex:: array:: ArrayRef ;
1818use vortex:: array:: Canonical ;
19+ use vortex:: array:: ExecutionCtx ;
20+ use vortex:: array:: IntoArray ;
1921use vortex:: array:: arrays:: DecimalArray ;
2022use vortex:: array:: arrays:: Dict ;
2123use vortex:: array:: arrays:: DictArray ;
@@ -36,6 +38,7 @@ use vortex::array::arrays::extension::ExtensionArrayExt;
3638use vortex:: array:: arrays:: fixed_size_list:: FixedSizeListArrayExt ;
3739use vortex:: array:: arrays:: fixed_size_list:: FixedSizeListDataParts ;
3840use vortex:: array:: arrays:: list:: ListDataParts ;
41+ use vortex:: array:: arrays:: listview:: ListViewArrayExt ;
3942use vortex:: array:: arrays:: listview:: list_from_list_view;
4043use vortex:: array:: arrays:: primitive:: PrimitiveDataParts ;
4144use vortex:: array:: arrays:: struct_:: StructDataParts ;
@@ -64,10 +67,12 @@ use crate::CudaExecutionCtx;
6467use crate :: arrow:: ARROW_DEVICE_CUDA ;
6568use crate :: arrow:: ArrowArray ;
6669use crate :: arrow:: ArrowDeviceArray ;
70+ use crate :: arrow:: ArrowDeviceArrayWithSchema ;
6771use crate :: arrow:: ExportDeviceArray ;
6872use crate :: arrow:: PrivateData ;
6973use crate :: arrow:: SyncEvent ;
7074use crate :: arrow:: arrow_device_export_dictionary_codes_dtype;
75+ use crate :: arrow:: arrow_schema_for_array;
7176use crate :: arrow:: cuda_decimal_value_type;
7277use crate :: arrow:: list_view:: export_device_list_view;
7378use crate :: cub:: exclusive_sum_i32;
@@ -96,6 +101,92 @@ impl ExportDeviceArray for CanonicalDeviceArrayExport {
96101 reserved : Default :: default ( ) ,
97102 } )
98103 }
104+
105+ async fn export_device_array_with_schema (
106+ & self ,
107+ array : ArrayRef ,
108+ ctx : & mut CudaExecutionCtx ,
109+ ) -> VortexResult < ArrowDeviceArrayWithSchema > {
110+ let array = rebuild_array_for_export_schema ( array, ctx. execution_ctx ( ) ) ?;
111+ let schema = arrow_schema_for_array ( & array, ctx) ?;
112+ let array = self . export_device_array ( array, ctx) . await ?;
113+ Ok ( ArrowDeviceArrayWithSchema { schema, array } )
114+ }
115+ }
116+
117+ /// Rebuild arrays whose exported layout differs from their original layout.
118+ fn rebuild_array_for_export_schema (
119+ array : ArrayRef ,
120+ ctx : & mut ExecutionCtx ,
121+ ) -> VortexResult < ArrayRef > {
122+ let array = match array. try_downcast :: < Dict > ( ) {
123+ Ok ( dict) => {
124+ let parts = dict. into_parts ( ) ;
125+ let values = rebuild_array_for_export_schema ( parts. values , ctx) ?;
126+ return Ok ( DictArray :: try_new ( parts. codes , values) ?. into_array ( ) ) ;
127+ }
128+ Err ( array) => array,
129+ } ;
130+ let array = match array. try_downcast :: < Struct > ( ) {
131+ Ok ( struct_array) => {
132+ let len = struct_array. len ( ) ;
133+ let StructDataParts {
134+ struct_fields,
135+ fields,
136+ validity,
137+ } = struct_array. into_data_parts ( ) ;
138+ let fields = fields
139+ . iter ( )
140+ . map ( |field| rebuild_array_for_export_schema ( field. clone ( ) , ctx) )
141+ . collect :: < VortexResult < Vec < _ > > > ( ) ?;
142+ return Ok (
143+ StructArray :: try_new ( struct_fields. names ( ) . clone ( ) , fields, len, validity) ?
144+ . into_array ( ) ,
145+ ) ;
146+ }
147+ Err ( array) => array,
148+ } ;
149+ let array = match array. try_downcast :: < List > ( ) {
150+ Ok ( list) => {
151+ let ListDataParts {
152+ elements,
153+ offsets,
154+ validity,
155+ ..
156+ } = list. into_data_parts ( ) ;
157+ let elements = rebuild_array_for_export_schema ( elements, ctx) ?;
158+ return Ok ( ListArray :: try_new ( elements, offsets, validity) ?. into_array ( ) ) ;
159+ }
160+ Err ( array) => array,
161+ } ;
162+ let array = match array. try_downcast :: < FixedSizeList > ( ) {
163+ Ok ( fixed_size_list) => {
164+ let len = fixed_size_list. len ( ) ;
165+ let list_size = fixed_size_list. list_size ( ) ;
166+ let FixedSizeListDataParts {
167+ elements, validity, ..
168+ } = fixed_size_list. into_data_parts ( ) ;
169+ let elements = rebuild_array_for_export_schema ( elements, ctx) ?;
170+ return Ok (
171+ FixedSizeListArray :: try_new ( elements, list_size, validity, len) ?. into_array ( ) ,
172+ ) ;
173+ }
174+ Err ( array) => array,
175+ } ;
176+ let array = match array. try_downcast :: < ListView > ( ) {
177+ Ok ( listview)
178+ if listview. as_ref ( ) . is_host ( ) && listview. elements ( ) . as_opt :: < Dict > ( ) . is_some ( ) =>
179+ {
180+ return rebuild_array_for_export_schema (
181+ list_from_list_view ( listview, ctx) ?. into_array ( ) ,
182+ ctx,
183+ ) ;
184+ }
185+ Ok ( listview) => return Ok ( listview. into_array ( ) ) ,
186+ Err ( array) => array,
187+ } ;
188+
189+ Ok ( array)
99190}
100191
101192/// Export arrays whose Arrow layout depends on their concrete children before CUDA
@@ -2136,7 +2227,7 @@ mod tests {
21362227 }
21372228
21382229 #[ crate :: test]
2139- async fn test_export_host_non_contiguous_dictionary_list_view_preserves_dictionary_child ( )
2230+ async fn test_export_host_non_contiguous_dictionary_list_view_schema_matches_rebuilt_child ( )
21402231 -> VortexResult < ( ) > {
21412232 let mut ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
21422233 . vortex_expect ( "failed to create execution context" ) ;
@@ -2162,7 +2253,13 @@ mod tests {
21622253 "" ,
21632254 Field :: new(
21642255 Field :: LIST_FIELD_DEFAULT_NAME ,
2165- DataType :: Dictionary ( Box :: new( DataType :: Int16 ) , Box :: new( DataType :: Int32 ) ) ,
2256+ DataType :: Dictionary (
2257+ Box :: new( DataType :: Int64 ) ,
2258+ Box :: new( DataType :: Dictionary (
2259+ Box :: new( DataType :: Int16 ) ,
2260+ Box :: new( DataType :: Int32 ) ,
2261+ ) ) ,
2262+ ) ,
21662263 true ,
21672264 ) ,
21682265 false ,
@@ -2177,6 +2274,57 @@ mod tests {
21772274 assert ! ( !dict_child. dictionary. is_null( ) ) ;
21782275 assert_eq ! ( dict_child. length, 5 ) ;
21792276 assert_eq ! ( dict_child. n_buffers, 2 ) ;
2277+ let nested_dict = unsafe { & * dict_child. dictionary } ;
2278+ assert ! ( !nested_dict. dictionary. is_null( ) ) ;
2279+
2280+ unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
2281+ Ok ( ( ) )
2282+ }
2283+
2284+ // Regression test: with an average list size >= 128 the host list-view rebuild picks its
2285+ // list-by-list strategy, which may canonicalize Dict elements. The schema must describe the
2286+ // rebuilt child layout.
2287+ #[ crate :: test]
2288+ async fn test_export_host_large_lists_dictionary_list_view_schema_matches_rebuilt_child ( )
2289+ -> VortexResult < ( ) > {
2290+ let mut ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
2291+ . vortex_expect ( "failed to create execution context" ) ;
2292+
2293+ let elements = DictArray :: try_new (
2294+ PrimitiveArray :: from_option_iter (
2295+ ( 0 ..256u32 ) . map ( |i| ( i % 5 != 0 ) . then_some ( ( i % 3 ) as u8 ) ) ,
2296+ )
2297+ . into_array ( ) ,
2298+ PrimitiveArray :: from_iter ( [ 10i32 , 20 , 30 ] ) . into_array ( ) ,
2299+ ) ?
2300+ . into_array ( ) ;
2301+ let array = ListViewArray :: new (
2302+ elements,
2303+ PrimitiveArray :: from_iter ( [ 128i32 , 0 ] ) . into_array ( ) ,
2304+ PrimitiveArray :: from_iter ( [ 128i32 , 128 ] ) . into_array ( ) ,
2305+ Validity :: NonNullable ,
2306+ )
2307+ . into_array ( ) ;
2308+ let mut exported = array. export_device_array_with_schema ( & mut ctx) . await ?;
2309+
2310+ let field = Field :: try_from ( & exported. schema ) ?;
2311+ assert_eq ! (
2312+ field,
2313+ Field :: new_list(
2314+ "" ,
2315+ Field :: new( Field :: LIST_FIELD_DEFAULT_NAME , DataType :: Int32 , true ) ,
2316+ false ,
2317+ )
2318+ ) ;
2319+ assert_eq ! (
2320+ private_data_buffer_i32_values( & exported. array. array, 1 ) ?,
2321+ [ 0 , 128 , 256 ]
2322+ ) ;
2323+ let list_children = unsafe { std:: slice:: from_raw_parts ( exported. array . array . children , 1 ) } ;
2324+ let child = unsafe { & * list_children[ 0 ] } ;
2325+ assert ! ( child. dictionary. is_null( ) ) ;
2326+ assert_eq ! ( child. length, 256 ) ;
2327+ assert_eq ! ( child. n_buffers, 2 ) ;
21802328
21812329 unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
21822330 Ok ( ( ) )
0 commit comments