Skip to content

Commit 5e789e8

Browse files
committed
Add supported for incorrectly ordered but the correct set of columns in from_pyarrow
1 parent 81dbfc9 commit 5e789e8

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

quivr/tables.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,18 @@ def from_pyarrow(
196196
# Only metadata differs; replace without touching data buffers
197197
table = table.replace_schema_metadata(desired_schema.metadata)
198198
# else: already perfectly matches; keep as-is
199+
elif set(table.schema.names) == set(schema.names):
200+
# Columns are the same but ordered differently; reorder by name first
201+
reordered = table.select(list(schema.names))
202+
if reordered.schema.equals(schema, check_metadata=False):
203+
# Only metadata may differ after reorder
204+
if not reordered.schema.equals(desired_schema, check_metadata=True):
205+
table = reordered.replace_schema_metadata(desired_schema.metadata)
206+
else:
207+
table = reordered
208+
else:
209+
# Types/nullability differ; cast after reorder to aligned names
210+
table = reordered.cast(desired_schema)
199211
else:
200212
# Fallback: perform Arrow cast to coerce to the desired schema (may raise if names missing)
201213
table = table.cast(desired_schema)

test/test_tables.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2051,3 +2051,14 @@ def test_bench_from_pyarrow_needs_cast(benchmark, N):
20512051
table = pa.table({"x": x, "y": y})
20522052

20532053
benchmark(Pair.from_pyarrow, table)
2054+
2055+
2056+
@pytest.mark.parametrize("N", [100_000, 1_000_000, 10_000_000])
2057+
@pytest.mark.benchmark(group="from-pyarrow")
2058+
def test_bench_from_pyarrow_reorder_only(benchmark, N):
2059+
x = pa.array(np.random.randint(low=-10000, high=1000, size=N), type=pa.int64())
2060+
y = pa.array(np.random.randint(low=-10000, high=1000, size=N), type=pa.int64())
2061+
# Build with matching types but columns in opposite order
2062+
table = pa.table({"y": y, "x": x})
2063+
2064+
benchmark(Pair.from_pyarrow, table)

0 commit comments

Comments
 (0)