@@ -21,6 +21,14 @@ pub enum Error {
2121 source : datafusion:: arrow:: error:: ArrowError ,
2222 } ,
2323
24+ UnableToCastColumn {
25+ source : datafusion:: arrow:: error:: ArrowError ,
26+ column_index : usize ,
27+ column_name : String ,
28+ from_type : DataType ,
29+ to_type : DataType ,
30+ } ,
31+
2432 UnexpectedNumberOfColumns {
2533 expected : usize ,
2634 found : usize ,
@@ -35,6 +43,18 @@ impl std::fmt::Display for Error {
3543 Error :: UnableToConvertRecordBatch { source } => {
3644 write ! ( f, "Unable to convert record batch: {source}" )
3745 }
46+ Error :: UnableToCastColumn {
47+ source,
48+ column_index,
49+ column_name,
50+ from_type,
51+ to_type,
52+ } => {
53+ write ! (
54+ f,
55+ "Unable to cast column {column_index} '{column_name}' from {from_type} to {to_type}: {source}"
56+ )
57+ }
3858 Error :: UnexpectedNumberOfColumns { expected, found } => {
3959 write ! (
4060 f,
@@ -70,73 +90,81 @@ pub fn try_cast_to(record_batch: RecordBatch, expected_schema: SchemaRef) -> Res
7090 . enumerate ( )
7191 . map ( |( i, expected_field) | {
7292 let record_batch_col = record_batch. column ( i) ;
93+ let from_type = record_batch_col. data_type ( ) . clone ( ) ;
94+ let to_type = expected_field. data_type ( ) . clone ( ) ;
95+ let make_err = |e| Error :: UnableToCastColumn {
96+ source : e,
97+ column_index : i,
98+ column_name : expected_field. name ( ) . clone ( ) ,
99+ from_type : from_type. clone ( ) ,
100+ to_type : to_type. clone ( ) ,
101+ } ;
73102
74103 match ( record_batch_col. data_type ( ) , expected_field. data_type ( ) ) {
75104 ( DataType :: Utf8 , DataType :: List ( item_type) ) => {
76105 cast_string_to_list :: < i32 > ( & Arc :: clone ( record_batch_col) , item_type)
77- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } )
106+ . map_err ( make_err )
78107 }
79108 ( DataType :: Utf8 , DataType :: LargeList ( item_type) ) => {
80109 cast_string_to_large_list :: < i32 > ( & Arc :: clone ( record_batch_col) , item_type)
81- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } )
110+ . map_err ( make_err )
82111 }
83112 ( DataType :: Utf8 , DataType :: FixedSizeList ( item_type, value_length) ) => {
84113 cast_string_to_fixed_size_list :: < i32 > (
85114 & Arc :: clone ( record_batch_col) ,
86115 item_type,
87116 * value_length,
88117 )
89- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } )
118+ . map_err ( make_err )
90119 }
91120 ( DataType :: Utf8 , DataType :: Struct ( _) ) => cast_string_to_struct :: < i32 > (
92121 & Arc :: clone ( record_batch_col) ,
93122 expected_field. clone ( ) ,
94123 )
95- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } ) ,
124+ . map_err ( make_err ) ,
96125 ( DataType :: LargeUtf8 , DataType :: List ( item_type) ) => {
97126 cast_string_to_list :: < i64 > ( & Arc :: clone ( record_batch_col) , item_type)
98- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } )
127+ . map_err ( make_err )
99128 }
100129 ( DataType :: LargeUtf8 , DataType :: LargeList ( item_type) ) => {
101130 cast_string_to_large_list :: < i64 > ( & Arc :: clone ( record_batch_col) , item_type)
102- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } )
131+ . map_err ( make_err )
103132 }
104133 ( DataType :: LargeUtf8 , DataType :: FixedSizeList ( item_type, value_length) ) => {
105134 cast_string_to_fixed_size_list :: < i64 > (
106135 & Arc :: clone ( record_batch_col) ,
107136 item_type,
108137 * value_length,
109138 )
110- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } )
139+ . map_err ( make_err )
111140 }
112141 ( DataType :: LargeUtf8 , DataType :: Struct ( _) ) => cast_string_to_struct :: < i64 > (
113142 & Arc :: clone ( record_batch_col) ,
114143 expected_field. clone ( ) ,
115144 )
116- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } ) ,
145+ . map_err ( make_err ) ,
117146 (
118147 DataType :: Interval ( IntervalUnit :: MonthDayNano ) ,
119148 DataType :: Interval ( IntervalUnit :: YearMonth ) ,
120149 ) => cast_interval_monthdaynano_to_yearmonth ( & Arc :: clone ( record_batch_col) )
121- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } ) ,
150+ . map_err ( make_err ) ,
122151 (
123152 DataType :: Interval ( IntervalUnit :: MonthDayNano ) ,
124153 DataType :: Interval ( IntervalUnit :: DayTime ) ,
125154 ) => cast_interval_monthdaynano_to_daytime ( & Arc :: clone ( record_batch_col) )
126- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } ) ,
155+ . map_err ( make_err ) ,
127156 _ => cast ( & Arc :: clone ( record_batch_col) , expected_field. data_type ( ) )
128- . map_err ( |e| Error :: UnableToConvertRecordBatch { source : e } ) ,
157+ . map_err ( make_err ) ,
129158 }
130159 } )
131160 . collect :: < Result < Vec < Arc < dyn Array > > > > ( )
132- . map_err ( |e | {
161+ . inspect_err ( |_ | {
133162 tracing:: debug!(
134163 actual_schema = ?actual_schema,
135164 expected_schema = ?expected_schema,
136165 "Cast error in try_cast_to"
137166 ) ;
138167 tracing:: trace!( record_batch = ?record_batch, "Record batch contents" ) ;
139- e
140168 } ) ?;
141169
142170 RecordBatch :: try_new ( expected_schema. clone ( ) , cols) . map_err ( |e| {
0 commit comments