@@ -567,12 +567,21 @@ VariantValue::ObjectInfo VariantValue::getObjectInfo() const {
567
567
568
568
std::optional<VariantValue> VariantValue::getObjectValueByKey (
569
569
std::string_view key) const {
570
- if (getBasicType () != VariantBasicType::Object) {
571
- throw ParquetException (" Not an object type" );
572
- }
573
-
574
570
ObjectInfo info = getObjectInfo ();
575
571
572
+ return getObjectValueByKey (key, info);
573
+ }
574
+
575
+ std::optional<VariantValue> VariantValue::getObjectValueByKey (
576
+ std::string_view key, const VariantValue::ObjectInfo& info) const {
577
+ // TODO(mwish): Currently we just linear search here. The best way here is:
578
+ // 1. check the num_elements
579
+ // 2.1. If the element number is less than 8(or other magic number), we can keep
580
+ // current method.
581
+ // 2.2. If the element number is larger than 8, and metadata.sorted_strings is true,
582
+ // we can first apply binary search on the metadata, and then binary search the
583
+ // field id.
584
+
576
585
for (uint32_t i = 0 ; i < info.num_elements ; ++i) {
577
586
std::string_view field_key;
578
587
std::optional<VariantValue> field_value = getObjectFieldByFieldId (i, &field_key);
@@ -692,18 +701,20 @@ VariantValue::ArrayInfo VariantValue::getArrayInfo() const {
692
701
next_offset = arrow::bit_util::FromLittleEndian (next_offset);
693
702
694
703
if (offset > next_offset) {
695
- throw ParquetException (" Invalid array value: offsets not monotonically increasing" );
704
+ throw ParquetException (
705
+ " Invalid array value: offsets not monotonically increasing: " +
706
+ std::to_string (offset) + " > " + std::to_string (next_offset));
696
707
}
697
708
}
698
709
699
710
return info;
700
711
}
701
712
702
- VariantValue VariantValue::getArrayValueByIndex (uint32_t index) const {
703
- ArrayInfo info = getArrayInfo ();
704
-
713
+ VariantValue VariantValue::getArrayValueByIndex (uint32_t index,
714
+ const ArrayInfo& info) const {
705
715
if (index >= info.num_elements ) {
706
- throw ParquetException (" Array index out of range" );
716
+ throw ParquetException (" Array index out of range: " + std::to_string (index ) +
717
+ " >= " + std::to_string (info.num_elements ));
707
718
}
708
719
709
720
// Read the offset and next offset
@@ -725,4 +736,9 @@ VariantValue VariantValue::getArrayValueByIndex(uint32_t index) const {
725
736
return element_value;
726
737
}
727
738
739
+ VariantValue VariantValue::getArrayValueByIndex (uint32_t index) const {
740
+ ArrayInfo info = getArrayInfo ();
741
+ return getArrayValueByIndex (index , info);
742
+ }
743
+
728
744
} // namespace parquet::variant
0 commit comments