Skip to content

Commit c4946a6

Browse files
authored
Improve cli table layout (spiceai#10725)
* Improve cli table layout * Fix snapshots
1 parent 9acb874 commit c4946a6

5 files changed

Lines changed: 1891 additions & 1822 deletions

crates/repl/src/pretty.rs

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ limitations under the License.
1717
//! Pretty printing utilities for Arrow `RecordBatch`es with data type display.
1818
1919
use arrow::array::RecordBatch;
20-
use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
20+
use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
2121
use arrow::error::ArrowError;
2222
use arrow::util::pretty::pretty_format_batches;
23+
use std::sync::Arc;
2324

2425
/// Formats Arrow `RecordBatch`es with data types displayed below column names.
2526
///
@@ -32,12 +33,50 @@ pub fn format_batches_with_types(batches: &[RecordBatch]) -> Result<String, Arro
3233
}
3334

3435
let schema = batches[0].schema();
35-
let formatted = pretty_format_batches(batches)?;
36+
let type_strings: Vec<String> = schema
37+
.fields()
38+
.iter()
39+
.map(|f| format_data_type(f.data_type()))
40+
.collect();
41+
42+
// Pad column names so Arrow allocates enough width to fit the type strings.
43+
let padded_batches = with_padded_names_for_types(batches, &type_strings)?;
44+
let formatted = pretty_format_batches(&padded_batches)?;
3645
let output = formatted.to_string();
3746

3847
Ok(insert_type_row(&output, &schema))
3948
}
4049

50+
/// Returns copies of `batches` whose schema has each column name padded with
51+
/// trailing spaces so that `pretty_format_batches` will allocate a column width
52+
/// at least as wide as the corresponding type string. The original schema
53+
/// (with unpadded names) is used for the displayed header row.
54+
fn with_padded_names_for_types(
55+
batches: &[RecordBatch],
56+
type_strings: &[String],
57+
) -> Result<Vec<RecordBatch>, ArrowError> {
58+
let schema = batches[0].schema();
59+
let fields: Vec<Field> = schema
60+
.fields()
61+
.iter()
62+
.zip(type_strings.iter())
63+
.map(|(field, type_str)| {
64+
let name = field.name();
65+
let padded_name = if name.len() < type_str.len() {
66+
format!("{name:<width$}", width = type_str.len())
67+
} else {
68+
name.clone()
69+
};
70+
Field::new(padded_name, field.data_type().clone(), field.is_nullable())
71+
})
72+
.collect();
73+
let padded_schema = Arc::new(Schema::new(fields));
74+
batches
75+
.iter()
76+
.map(|batch| RecordBatch::try_new(Arc::clone(&padded_schema), batch.columns().to_vec()))
77+
.collect()
78+
}
79+
4180
/// Insert a type row after the header row in the formatted table,
4281
/// and center the column names.
4382
fn insert_type_row(formatted: &str, schema: &SchemaRef) -> String {
@@ -312,4 +351,34 @@ mod tests {
312351
let widths = parse_column_widths(separator);
313352
assert_eq!(widths, vec![4, 6, 2]);
314353
}
354+
355+
#[test]
356+
fn test_type_wider_than_column_name() {
357+
// "id" (2 chars) with type "varchar" (7 chars): the type must not overflow its cell.
358+
let schema = Arc::new(Schema::new(vec![
359+
Field::new("id", DataType::Utf8, true),
360+
Field::new("name", DataType::Utf8, true),
361+
]));
362+
let batch = RecordBatch::try_new(
363+
Arc::clone(&schema),
364+
vec![
365+
Arc::new(arrow::array::StringArray::from(vec![Some("1")])),
366+
Arc::new(arrow::array::StringArray::from(vec![Some("Alice")])),
367+
],
368+
)
369+
.expect("creating test batch");
370+
371+
let formatted = format_batches_with_types(&[batch]).expect("formatting should succeed");
372+
373+
// Every line must have the same length (borders are straight columns).
374+
let lines: Vec<&str> = formatted.lines().collect();
375+
let first_len = lines[0].len();
376+
for (i, line) in lines.iter().enumerate() {
377+
assert_eq!(
378+
line.len(),
379+
first_len,
380+
"line {i} has different width than line 0:\n{formatted}"
381+
);
382+
}
383+
}
315384
}

0 commit comments

Comments
 (0)