Skip to content

Commit 00773c5

Browse files
author
Rafał Hibner
committed
Fix list of optionals
1 parent c155e99 commit 00773c5

File tree

3 files changed

+159
-53
lines changed

3 files changed

+159
-53
lines changed

cpp/src/arrow/stl.h

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,33 +48,12 @@ namespace stl {
4848

4949
namespace internal {
5050

51-
template <typename T, typename = void>
52-
struct is_optional_like : public std::false_type {};
53-
54-
template <typename T, typename = void>
55-
struct is_dereferencable : public std::false_type {};
56-
57-
template <typename T>
58-
struct is_dereferencable<T, arrow::internal::void_t<decltype(*std::declval<T>())>>
59-
: public std::true_type {};
60-
61-
template <typename T>
62-
struct is_optional_like<
63-
T, typename std::enable_if<
64-
std::is_constructible<bool, T>::value && is_dereferencable<T>::value &&
65-
!std::is_array<typename std::remove_reference<T>::type>::value>::type>
66-
: public std::true_type {};
67-
6851
template <size_t N, typename Tuple>
6952
using BareTupleElement =
7053
typename std::decay<typename std::tuple_element<N, Tuple>::type>::type;
7154

7255
} // namespace internal
7356

74-
template <typename T, typename R = void>
75-
using enable_if_optional_like =
76-
typename std::enable_if<internal::is_optional_like<T>::value, R>::type;
77-
7857
/// Traits meta class to map standard C/C++ types to equivalent Arrow types.
7958
template <typename T, typename Enable = void>
8059
struct ConversionTraits {};
@@ -232,6 +211,13 @@ struct ConversionTraits<Optional, enable_if_optional_like<Optional>>
232211
return builder.AppendNull();
233212
}
234213
}
214+
static Optional GetEntry(const typename TypeTraits<ArrowType>::ArrayType& array,
215+
size_t j) {
216+
if (array.IsValid(j)) {
217+
return ConversionTraits<OptionalInnerType>::GetEntry(array, j);
218+
}
219+
return {};
220+
}
235221
};
236222

237223
/// Build an arrow::Schema based upon the types defined in a std::tuple-like structure.
@@ -252,7 +238,8 @@ struct SchemaFromTuple {
252238
std::vector<std::shared_ptr<Field>> ret =
253239
SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursion(names);
254240
auto type = ConversionTraits<Element>::type_singleton();
255-
ret.push_back(field(names[N - 1], type, internal::is_optional_like<Element>::value));
241+
ret.push_back(
242+
field(names[N - 1], type, arrow::internal::is_optional_like<Element>::value));
256243
return ret;
257244
}
258245

@@ -283,8 +270,8 @@ struct SchemaFromTuple {
283270
std::vector<std::shared_ptr<Field>> ret =
284271
SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
285272
std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
286-
ret.push_back(
287-
field(get<N - 1>(names), type, internal::is_optional_like<Element>::value));
273+
ret.push_back(field(get<N - 1>(names), type,
274+
arrow::internal::is_optional_like<Element>::value));
288275
return ret;
289276
}
290277

cpp/src/arrow/stl_test.cc

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -176,15 +176,17 @@ TEST(TestSchemaFromTuple, PrimitiveTypesTuple) {
176176
}
177177

178178
TEST(TestSchemaFromTuple, SimpleList) {
179-
Schema expected_schema({field("column1", list(utf8()), false)});
179+
Schema expected_schema({field("column1", list(field("item", utf8(), false)), false)});
180180
std::shared_ptr<Schema> schema =
181181
SchemaFromTuple<std::tuple<std::vector<std::string>>>::MakeSchema({"column1"});
182182

183183
ASSERT_TRUE(expected_schema.Equals(*schema));
184184
}
185185

186186
TEST(TestSchemaFromTuple, NestedList) {
187-
Schema expected_schema({field("column1", list(list(boolean())), false)});
187+
Schema expected_schema(
188+
{field("column1", list(field("item", list(field("item", boolean(), false)), false)),
189+
false)});
188190
std::shared_ptr<Schema> schema =
189191
SchemaFromTuple<std::tuple<std::vector<std::vector<bool>>>>::MakeSchema(
190192
{"column1"});
@@ -230,10 +232,10 @@ TEST(TestTableFromTupleVector, PrimitiveTypes) {
230232
TEST(TestTableFromTupleVector, ListType) {
231233
using tuple_type = std::tuple<std::vector<int64_t>>;
232234

233-
auto expected_schema =
234-
std::make_shared<Schema>(FieldVector{field("column1", list(int64()), false)});
235+
auto expected_schema = std::make_shared<Schema>(
236+
FieldVector{field("column1", list(field("item", int64(), false)), false)});
235237
std::shared_ptr<Array> expected_array =
236-
ArrayFromJSON(list(int64()), "[[1, 1, 2, 34], [2, -4]]");
238+
ArrayFromJSON(list(field("item", int64(), false)), "[[1, 1, 2, 34], [2, -4]]");
237239
std::shared_ptr<Table> expected_table = Table::Make(expected_schema, {expected_array});
238240

239241
std::vector<tuple_type> rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
@@ -248,10 +250,11 @@ TEST(TestTableFromTupleVector, ListType) {
248250
TEST(TestTableFromTupleVector, FixedSizeListType) {
249251
using tuple_type = std::tuple<std::array<int64_t, 4>>;
250252

251-
auto expected_schema = std::make_shared<Schema>(
252-
FieldVector{field("column1", fixed_size_list(int64(), 4), false)});
253+
auto expected_schema = std::make_shared<Schema>(FieldVector{
254+
field("column1", fixed_size_list(field("item", int64(), false), 4), false)});
253255
std::shared_ptr<Array> expected_array =
254-
ArrayFromJSON(fixed_size_list(int64(), 4), "[[1, 1, 2, 34], [2, -4, 1, 1]]");
256+
ArrayFromJSON(fixed_size_list(field("item", int64(), false), 4),
257+
"[[1, 1, 2, 34], [2, -4, 1, 1]]");
255258
std::shared_ptr<Table> expected_table = Table::Make(expected_schema, {expected_array});
256259

257260
std::vector<tuple_type> rows{tuple_type(std::array<int64_t, 4>{1, 1, 2, 34}),
@@ -418,9 +421,9 @@ TEST(TestTableFromTupleVector, AppendingMultipleRows) {
418421
ASSERT_OK(TableFromTupleRange(default_memory_pool(), rows, names, &table));
419422

420423
std::shared_ptr<Schema> expected_schema =
421-
schema({field("column1", list(int32()), false)});
424+
schema({field("column1", list(field("item", int32(), false)), false)});
422425
std::shared_ptr<Array> int_array =
423-
ArrayFromJSON(list(int32()), "[[1, 2, 3], [10, 20, 30]]");
426+
ArrayFromJSON(list(field("item", int32(), false)), "[[1, 2, 3], [10, 20, 30]]");
424427
auto expected_table = Table::Make(expected_schema, {int_array});
425428

426429
ASSERT_TRUE(expected_table->Equals(*table));
@@ -488,6 +491,26 @@ TEST(TestTupleVectorFromTable, ListType) {
488491
ASSERT_EQ(rows, expected_rows);
489492
}
490493

494+
TEST(TestTupleVectorFromTable, ListOptionalType) {
495+
using tuple_type = std::tuple<std::vector<std::optional<int64_t>>>;
496+
497+
compute::ExecContext ctx;
498+
compute::CastOptions cast_options;
499+
auto expected_schema =
500+
std::make_shared<Schema>(FieldVector{field("column1", list(int64()), false)});
501+
std::shared_ptr<Array> expected_array =
502+
ArrayFromJSON(list(int64()), "[[1, null, 2, null], [null, -4]]");
503+
std::shared_ptr<Table> table = Table::Make(expected_schema, {expected_array});
504+
505+
std::vector<tuple_type> expected_rows{
506+
tuple_type(std::vector<std::optional<int64_t>>{1, std::nullopt, 2, std::nullopt}),
507+
tuple_type(std::vector<std::optional<int64_t>>{std::nullopt, -4})};
508+
509+
std::vector<tuple_type> rows(2);
510+
ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
511+
ASSERT_EQ(rows, expected_rows);
512+
}
513+
491514
TEST(TestTupleVectorFromTable, FixedSizeListType) {
492515
using tuple_type = std::tuple<std::array<int64_t, 4>>;
493516

@@ -508,6 +531,27 @@ TEST(TestTupleVectorFromTable, FixedSizeListType) {
508531
ASSERT_EQ(rows, expected_rows);
509532
}
510533

534+
TEST(TestTupleVectorFromTable, FixedSizeListOptionalType) {
535+
using tuple_type = std::tuple<std::array<std::optional<int64_t>, 4>>;
536+
537+
compute::ExecContext ctx;
538+
compute::CastOptions cast_options;
539+
auto expected_schema = std::make_shared<Schema>(
540+
FieldVector{field("column1", fixed_size_list(int64(), 4), false)});
541+
std::shared_ptr<Array> expected_array =
542+
ArrayFromJSON(fixed_size_list(int64(), 4), "[[1, null, 2, 34], [2, -4, null, 1]]");
543+
std::shared_ptr<Table> table = Table::Make(expected_schema, {expected_array});
544+
ASSERT_OK(table->ValidateFull());
545+
546+
std::vector<tuple_type> expected_rows{
547+
tuple_type(std::array<std::optional<int64_t>, 4>{1, std::nullopt, 2, 34}),
548+
tuple_type(std::array<std::optional<int64_t>, 4>{2, -4, std::nullopt, 1})};
549+
550+
std::vector<tuple_type> rows(2);
551+
ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
552+
ASSERT_EQ(rows, expected_rows);
553+
}
554+
511555
TEST(TestTupleVectorFromTable, CastingNeeded) {
512556
using tuple_type = std::tuple<std::vector<int64_t>>;
513557

cpp/src/arrow/type_traits.h

Lines changed: 94 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,44 @@
2727

2828
namespace arrow {
2929

30+
namespace internal {
31+
32+
template <typename... Ts>
33+
struct make_void {
34+
using type = void;
35+
};
36+
37+
template <typename... Ts>
38+
using void_t = typename make_void<Ts...>::type;
39+
40+
template <typename T, typename = void>
41+
struct is_optional_like : public std::false_type {};
42+
43+
template <typename T, typename = void>
44+
struct is_dereferencable : public std::false_type {};
45+
46+
template <typename T>
47+
struct is_dereferencable<T, arrow::internal::void_t<decltype(*std::declval<T>())>>
48+
: public std::true_type {};
49+
50+
template <typename T>
51+
struct is_optional_like<
52+
T, typename std::enable_if<
53+
std::is_constructible<bool, T>::value && is_dereferencable<T>::value &&
54+
!std::is_array<typename std::remove_reference<T>::type>::value>::type>
55+
: public std::true_type {};
56+
57+
template <typename T, typename Enable = void>
58+
struct has_type_singleton : std::false_type {};
59+
template <typename T>
60+
struct has_type_singleton<T, void_t<decltype(T::type_singleton)>> : std::true_type {};
61+
62+
} // namespace internal
63+
64+
template <typename T, typename R = void>
65+
using enable_if_optional_like =
66+
typename std::enable_if<internal::is_optional_like<T>::value, R>::type;
67+
3068
//
3169
// Per-type id type lookup
3270
//
@@ -98,7 +136,7 @@ struct TypeTraits {};
98136

99137
/// \brief Base template for type traits of C++ types
100138
/// \tparam T A standard C++ type
101-
template <typename T>
139+
template <typename T, typename Enable = void>
102140
struct CTypeTraits {};
103141

104142
/// \addtogroup type-traits
@@ -535,26 +573,75 @@ struct TypeTraits<FixedSizeListType> {
535573
};
536574
/// @}
537575

576+
namespace internal {
577+
578+
template <typename T, typename Enable = void>
579+
struct nested_has_type_singleton_impl : std::false_type {};
580+
template <typename T>
581+
struct nested_has_type_singleton_impl<T, std::enable_if_t<is_optional_like<T>::value>>
582+
: has_type_singleton<
583+
CTypeTraits<typename std::decay<decltype(*std::declval<T>())>::type>> {};
584+
template <typename T>
585+
struct nested_has_type_singleton_impl<T, std::enable_if_t<!is_optional_like<T>::value>>
586+
: has_type_singleton<CTypeTraits<T>> {};
587+
588+
} // namespace internal
589+
590+
template <typename T, typename R = void>
591+
using nested_has_type_singleton = typename internal::nested_has_type_singleton_impl<T, R>;
592+
538593
/// \addtogroup c-type-traits
539594
template <typename CType>
540-
struct CTypeTraits<std::vector<CType>> : public TypeTraits<ListType> {
595+
struct CTypeTraits<std::vector<CType>,
596+
std::enable_if_t<nested_has_type_singleton<CType>::value>>
597+
: public TypeTraits<ListType> {
541598
using ArrowType = ListType;
542-
543-
static inline std::shared_ptr<DataType> type_singleton() {
544-
return list(CTypeTraits<CType>::type_singleton());
599+
static auto type_singleton() {
600+
if constexpr (internal::is_optional_like<CType>::value) {
601+
using OptionalInnerType =
602+
typename std::decay<decltype(*std::declval<CType>())>::type;
603+
return list(CTypeTraits<OptionalInnerType>::type_singleton());
604+
} else {
605+
return list(field("item", CTypeTraits<CType>::type_singleton(), false));
606+
}
545607
}
546608
};
547609

610+
/// \addtogroup c-type-traits
611+
template <typename CType>
612+
struct CTypeTraits<std::vector<CType>,
613+
std::enable_if_t<!nested_has_type_singleton<CType>::value>>
614+
: public TypeTraits<ListType> {
615+
using ArrowType = ListType;
616+
};
617+
548618
/// \addtogroup c-type-traits
549619
template <typename CType, std::size_t N>
550-
struct CTypeTraits<std::array<CType, N>> : public TypeTraits<FixedSizeListType> {
620+
struct CTypeTraits<std::array<CType, N>,
621+
std::enable_if_t<nested_has_type_singleton<CType>::value>>
622+
: public TypeTraits<FixedSizeListType> {
551623
using ArrowType = FixedSizeListType;
552624

553625
static auto type_singleton() {
554-
return fixed_size_list(CTypeTraits<CType>::type_singleton(), N);
626+
if constexpr (internal::is_optional_like<CType>::value) {
627+
using OptionalInnerType =
628+
typename std::decay<decltype(*std::declval<CType>())>::type;
629+
return fixed_size_list(CTypeTraits<OptionalInnerType>::type_singleton(), N);
630+
} else {
631+
return fixed_size_list(field("item", CTypeTraits<CType>::type_singleton(), false),
632+
N);
633+
}
555634
}
556635
};
557636

637+
/// \addtogroup c-type-traits
638+
template <typename CType, std::size_t N>
639+
struct CTypeTraits<std::array<CType, N>,
640+
std::enable_if_t<!nested_has_type_singleton<CType>::value>>
641+
: public TypeTraits<FixedSizeListType> {
642+
using ArrowType = FixedSizeListType;
643+
};
644+
558645
/// \addtogroup type-traits
559646
/// @{
560647
template <>
@@ -596,18 +683,6 @@ struct TypeTraits<ExtensionType> {
596683
};
597684
/// @}
598685

599-
namespace internal {
600-
601-
template <typename... Ts>
602-
struct make_void {
603-
using type = void;
604-
};
605-
606-
template <typename... Ts>
607-
using void_t = typename make_void<Ts...>::type;
608-
609-
} // namespace internal
610-
611686
//
612687
// Useful type predicates
613688
//

0 commit comments

Comments
 (0)