@@ -17,9 +17,10 @@ limitations under the License.
1717//! Pretty printing utilities for Arrow `RecordBatch`es with data type display.
1818
1919use arrow:: array:: RecordBatch ;
20- use arrow:: datatypes:: { DataType , SchemaRef , TimeUnit } ;
20+ use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef , TimeUnit } ;
2121use arrow:: error:: ArrowError ;
2222use arrow:: util:: pretty:: pretty_format_batches;
23+ use std:: sync:: Arc ;
2324
2425/// Formats Arrow `RecordBatch`es with data types displayed below column names.
2526///
@@ -32,12 +33,50 @@ pub fn format_batches_with_types(batches: &[RecordBatch]) -> Result<String, Arro
3233 }
3334
3435 let schema = batches[ 0 ] . schema ( ) ;
35- let formatted = pretty_format_batches ( batches) ?;
36+ let type_strings: Vec < String > = schema
37+ . fields ( )
38+ . iter ( )
39+ . map ( |f| format_data_type ( f. data_type ( ) ) )
40+ . collect ( ) ;
41+
42+ // Pad column names so Arrow allocates enough width to fit the type strings.
43+ let padded_batches = with_padded_names_for_types ( batches, & type_strings) ?;
44+ let formatted = pretty_format_batches ( & padded_batches) ?;
3645 let output = formatted. to_string ( ) ;
3746
3847 Ok ( insert_type_row ( & output, & schema) )
3948}
4049
50+ /// Returns copies of `batches` whose schema has each column name padded with
51+ /// trailing spaces so that `pretty_format_batches` will allocate a column width
52+ /// at least as wide as the corresponding type string. The original schema
53+ /// (with unpadded names) is used for the displayed header row.
54+ fn with_padded_names_for_types (
55+ batches : & [ RecordBatch ] ,
56+ type_strings : & [ String ] ,
57+ ) -> Result < Vec < RecordBatch > , ArrowError > {
58+ let schema = batches[ 0 ] . schema ( ) ;
59+ let fields: Vec < Field > = schema
60+ . fields ( )
61+ . iter ( )
62+ . zip ( type_strings. iter ( ) )
63+ . map ( |( field, type_str) | {
64+ let name = field. name ( ) ;
65+ let padded_name = if name. len ( ) < type_str. len ( ) {
66+ format ! ( "{name:<width$}" , width = type_str. len( ) )
67+ } else {
68+ name. clone ( )
69+ } ;
70+ Field :: new ( padded_name, field. data_type ( ) . clone ( ) , field. is_nullable ( ) )
71+ } )
72+ . collect ( ) ;
73+ let padded_schema = Arc :: new ( Schema :: new ( fields) ) ;
74+ batches
75+ . iter ( )
76+ . map ( |batch| RecordBatch :: try_new ( Arc :: clone ( & padded_schema) , batch. columns ( ) . to_vec ( ) ) )
77+ . collect ( )
78+ }
79+
4180/// Insert a type row after the header row in the formatted table,
4281/// and center the column names.
4382fn insert_type_row ( formatted : & str , schema : & SchemaRef ) -> String {
@@ -312,4 +351,34 @@ mod tests {
312351 let widths = parse_column_widths ( separator) ;
313352 assert_eq ! ( widths, vec![ 4 , 6 , 2 ] ) ;
314353 }
354+
355+ #[ test]
356+ fn test_type_wider_than_column_name ( ) {
357+ // "id" (2 chars) with type "varchar" (7 chars): the type must not overflow its cell.
358+ let schema = Arc :: new ( Schema :: new ( vec ! [
359+ Field :: new( "id" , DataType :: Utf8 , true ) ,
360+ Field :: new( "name" , DataType :: Utf8 , true ) ,
361+ ] ) ) ;
362+ let batch = RecordBatch :: try_new (
363+ Arc :: clone ( & schema) ,
364+ vec ! [
365+ Arc :: new( arrow:: array:: StringArray :: from( vec![ Some ( "1" ) ] ) ) ,
366+ Arc :: new( arrow:: array:: StringArray :: from( vec![ Some ( "Alice" ) ] ) ) ,
367+ ] ,
368+ )
369+ . expect ( "creating test batch" ) ;
370+
371+ let formatted = format_batches_with_types ( & [ batch] ) . expect ( "formatting should succeed" ) ;
372+
373+ // Every line must have the same length (borders are straight columns).
374+ let lines: Vec < & str > = formatted. lines ( ) . collect ( ) ;
375+ let first_len = lines[ 0 ] . len ( ) ;
376+ for ( i, line) in lines. iter ( ) . enumerate ( ) {
377+ assert_eq ! (
378+ line. len( ) ,
379+ first_len,
380+ "line {i} has different width than line 0:\n {formatted}"
381+ ) ;
382+ }
383+ }
315384}
0 commit comments