Skip to content

Commit 9363d69

Browse files
feat: add arrow map alias support (#31)
Signed-off-by: Luke Kim <80174+lukekim@users.noreply.github.com> Co-authored-by: Luke Kim <80174+lukekim@users.noreply.github.com>
1 parent b5da5b6 commit 9363d69

5 files changed

Lines changed: 140 additions & 4 deletions

File tree

vortex-array/src/arrays/extension/compute/cast.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ fn convert_temporal_value(value: i64, multiply: i64, divide: i64) -> VortexResul
175175
vortex_bail!(ComputeError: "Date value {value} overflows target timestamp range");
176176
}
177177

178-
Ok(scaled as i64)
178+
i64::try_from(scaled)
179+
.map_err(|_| vortex_error::vortex_err!(ComputeError: "Date value {value} overflows target timestamp range"))
179180
}
180181

181182
register_kernel!(CastKernelAdapter(ExtensionVTable).lift());
@@ -291,7 +292,10 @@ mod tests {
291292
let storage = output.storage().to_primitive();
292293
assert_eq!(storage.scalar_at(0).as_primitive().as_::<i64>(), Some(0));
293294
assert!(storage.scalar_at(1).is_null());
294-
assert_eq!(storage.scalar_at(2).as_primitive().as_::<i64>(), Some(172_800));
295+
assert_eq!(
296+
storage.scalar_at(2).as_primitive().as_::<i64>(),
297+
Some(172_800)
298+
);
295299
}
296300

297301
#[test]

vortex-array/src/arrow/convert.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use arrow_array::GenericByteArray;
1313
use arrow_array::GenericByteViewArray;
1414
use arrow_array::GenericListArray;
1515
use arrow_array::GenericListViewArray;
16+
use arrow_array::MapArray as ArrowMapArray;
1617
use arrow_array::NullArray as ArrowNullArray;
1718
use arrow_array::OffsetSizeTrait;
1819
use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
@@ -447,6 +448,20 @@ impl FromArrowArray<&ArrowFixedSizeListArray> for ArrayRef {
447448
}
448449
}
449450

451+
impl FromArrowArray<&ArrowMapArray> for ArrayRef {
452+
fn from_arrow(value: &ArrowMapArray, nullable: bool) -> Self {
453+
// Arrow Map is logically List<Struct<key, value>> with i32 offsets.
454+
// We convert it to a ListArray of structs.
455+
let entries = Self::from_arrow(value.entries() as &dyn ArrowArray, false);
456+
let offsets = value.offsets().clone().into_array();
457+
let nulls = nulls(value.nulls(), nullable);
458+
459+
ListArray::try_new(entries, offsets, nulls)
460+
.vortex_expect("Failed to convert Arrow MapArray to Vortex ListArray")
461+
.into_array()
462+
}
463+
}
464+
450465
impl FromArrowArray<&ArrowNullArray> for ArrayRef {
451466
fn from_arrow(value: &ArrowNullArray, nullable: bool) -> Self {
452467
assert!(nullable);
@@ -508,6 +523,7 @@ impl FromArrowArray<&dyn ArrowArray> for ArrayRef {
508523
DataType::ListView(_) => Self::from_arrow(array.as_list_view::<i32>(), nullable),
509524
DataType::LargeListView(_) => Self::from_arrow(array.as_list_view::<i64>(), nullable),
510525
DataType::FixedSizeList(..) => Self::from_arrow(array.as_fixed_size_list(), nullable),
526+
DataType::Map(..) => Self::from_arrow(array.as_map(), nullable),
511527
DataType::Null => Self::from_arrow(as_null_array(array), nullable),
512528
DataType::Timestamp(u, _) => match u {
513529
ArrowTimeUnit::Second => {
@@ -675,6 +691,7 @@ mod tests {
675691
use crate::arrays::VarBinVTable;
676692
use crate::arrays::VarBinViewVTable;
677693
use crate::arrow::FromArrowArray as _;
694+
use crate::arrow::executor::ArrowArrayExecutor as _;
678695

679696
// Test primitive array conversions
680697
#[test]
@@ -1757,4 +1774,54 @@ mod tests {
17571774

17581775
ArrayRef::from_arrow(null_struct_array_with_non_nullable_field.as_ref(), true);
17591776
}
1777+
1778+
#[test]
1779+
fn test_map_array_conversion() {
1780+
use arrow_array::MapArray;
1781+
use arrow_array::builder::MapBuilder;
1782+
use arrow_array::builder::StringBuilder;
1783+
1784+
// Build a MapArray: map<string, int32>
1785+
let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
1786+
// First map entry: {"a": 1, "b": 2}
1787+
builder.keys().append_value("a");
1788+
builder.values().append_value(1);
1789+
builder.keys().append_value("b");
1790+
builder.values().append_value(2);
1791+
builder.append(true).unwrap();
1792+
1793+
// Second map entry: null
1794+
builder.append(false).unwrap();
1795+
1796+
// Third map entry: {"c": 3}
1797+
builder.keys().append_value("c");
1798+
builder.values().append_value(3);
1799+
builder.append(true).unwrap();
1800+
1801+
let arrow_map = builder.finish();
1802+
assert_eq!(arrow_map.len(), 3);
1803+
1804+
// Convert Arrow MapArray → Vortex ListArray
1805+
let vortex_array = ArrayRef::from_arrow(&arrow_map, true);
1806+
assert_eq!(vortex_array.len(), 3);
1807+
1808+
// Verify it's stored as List<Struct<key, value>>
1809+
let list_array = vortex_array.as_::<ListVTable>();
1810+
assert_eq!(list_array.elements().len(), 3); // 3 total key-value pairs
1811+
let struct_elements = list_array.elements().as_::<StructVTable>();
1812+
assert_eq!(struct_elements.names().len(), 2); // key and value fields
1813+
1814+
// Convert back to Arrow as a MapArray
1815+
let map_dtype = arrow_map.data_type().clone();
1816+
let arrow_back = vortex_array
1817+
.execute_arrow(&map_dtype, &crate::LEGACY_SESSION)
1818+
.unwrap();
1819+
let map_back = arrow_back
1820+
.as_any()
1821+
.downcast_ref::<MapArray>()
1822+
.expect("Should be a MapArray");
1823+
assert_eq!(map_back.len(), 3);
1824+
assert_eq!(map_back.entries().len(), 3);
1825+
assert!(map_back.is_null(1)); // Second entry was null
1826+
}
17601827
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::ArrayRef as ArrowArrayRef;
7+
use arrow_array::MapArray as ArrowMapArray;
8+
use arrow_array::StructArray as ArrowStructArray;
9+
use arrow_schema::FieldRef;
10+
use vortex_error::VortexResult;
11+
use vortex_error::vortex_bail;
12+
use vortex_session::VortexSession;
13+
14+
use crate::ArrayRef;
15+
use crate::arrow::executor::list::to_arrow_list;
16+
17+
/// Convert a Vortex List<Struct<key, value>> array into an Arrow MapArray.
18+
pub(super) fn to_arrow_map(
19+
array: ArrayRef,
20+
entries_field: &FieldRef,
21+
ordered: bool,
22+
session: &VortexSession,
23+
) -> VortexResult<ArrowArrayRef> {
24+
// First, convert to Arrow ListArray<i32> since Map uses i32 offsets.
25+
let list_array = to_arrow_list::<i32>(array, entries_field, session)?;
26+
27+
// Downcast to GenericListArray<i32> to extract its components.
28+
let Some(list_array) = list_array
29+
.as_any()
30+
.downcast_ref::<arrow_array::GenericListArray<i32>>()
31+
else {
32+
vortex_bail!("to_arrow_list returned a non-ListArray when building a MapArray");
33+
};
34+
35+
// Extract components from the ListArray.
36+
let (_list_field, offsets, entries, nulls) = list_array.clone().into_parts();
37+
38+
// The entries should be a StructArray. Downcast it.
39+
let Some(entries_struct) = entries.as_any().downcast_ref::<ArrowStructArray>() else {
40+
vortex_bail!("Map entries must be a StructArray");
41+
};
42+
let entries_struct = entries_struct.clone();
43+
44+
// Build the MapArray from the components.
45+
let map_array = ArrowMapArray::try_new(
46+
entries_field.clone(),
47+
offsets,
48+
entries_struct,
49+
nulls,
50+
ordered,
51+
)?;
52+
53+
Ok(Arc::new(map_array))
54+
}

vortex-array/src/arrow/executor/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ mod dictionary;
99
mod fixed_size_list;
1010
mod list;
1111
mod list_view;
12+
mod map;
1213
mod null;
1314
mod primitive;
1415
mod run_end;
@@ -28,7 +29,6 @@ use vortex_error::vortex_bail;
2829
use vortex_error::vortex_ensure;
2930
use vortex_session::VortexSession;
3031

31-
use crate::Array;
3232
use crate::ArrayRef;
3333
use crate::arrow::executor::bool::to_arrow_bool;
3434
use crate::arrow::executor::byte::to_arrow_byte_array;
@@ -38,6 +38,7 @@ use crate::arrow::executor::dictionary::to_arrow_dictionary;
3838
use crate::arrow::executor::fixed_size_list::to_arrow_fixed_list;
3939
use crate::arrow::executor::list::to_arrow_list;
4040
use crate::arrow::executor::list_view::to_arrow_list_view;
41+
use crate::arrow::executor::map::to_arrow_map;
4142
use crate::arrow::executor::null::to_arrow_null;
4243
use crate::arrow::executor::primitive::to_arrow_primitive;
4344
use crate::arrow::executor::run_end::to_arrow_run_end;
@@ -138,8 +139,10 @@ impl ArrowArrayExecutor for ArrayRef {
138139
DataType::RunEndEncoded(ends_type, values_type) => {
139140
to_arrow_run_end(self, ends_type.data_type(), values_type, session)
140141
}
142+
DataType::Map(entries_field, ordered) => {
143+
to_arrow_map(self, entries_field, *ordered, session)
144+
}
141145
DataType::FixedSizeBinary(_)
142-
| DataType::Map(..)
143146
| DataType::Duration(_)
144147
| DataType::Interval(_)
145148
| DataType::Union(..) => {

vortex-dtype/src/arrow.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,14 @@ impl FromArrowType<(&DataType, Nullability)> for DType {
148148
| DataType::LargeListView(e) => {
149149
DType::List(Arc::new(Self::from_arrow(e.as_ref())), nullability)
150150
}
151+
DataType::Map(entries_field, _ordered) => {
152+
// Map is logically List<Struct<key, value>>.
153+
// The entries_field contains a Struct type with the key and value fields.
154+
DType::List(
155+
Arc::new(Self::from_arrow(entries_field.as_ref())),
156+
nullability,
157+
)
158+
}
151159
DataType::FixedSizeList(e, size) => DType::FixedSizeList(
152160
Arc::new(Self::from_arrow(e.as_ref())),
153161
*size as u32,

0 commit comments

Comments
 (0)