Skip to content

GH-46439: [C++} Address post-merge review comments in PR exposing {Array,...}FromJSON helpers in public API #46447

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions cpp/examples/arrow/from_json_string_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ arrow::Status RunExample() {
"[[11, 22], null, [null, 33]]"));

// ChunkedArrayFromJSONString
std::shared_ptr<arrow::ChunkedArray> chunked_array;
ARROW_RETURN_NOT_OK(ChunkedArrayFromJSONString(
arrow::int32(), {"[5, 10]", "[null]", "[16]"}, &chunked_array));
ARROW_ASSIGN_OR_RAISE(
auto chunked_array,
ChunkedArrayFromJSONString(arrow::int32(), {"[5, 10]", "[null]", "[16]"}));

// DictArrayFromJSONString
std::shared_ptr<arrow::Array> dict_array;
ARROW_RETURN_NOT_OK(DictArrayFromJSONString(
dictionary(arrow::int32(), arrow::utf8()), "[0, 1, 0, 2, 0, 3]",
R"(["k1", "k2", "k3", "k4"])", &dict_array));
ARROW_ASSIGN_OR_RAISE(
auto dict_array,
DictArrayFromJSONString(dictionary(arrow::int32(), arrow::utf8()),
"[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])"));

return arrow::Status::OK();
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/dataset/test_util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -2140,8 +2140,8 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
actual_struct = std::dynamic_pointer_cast<Array>(struct_array);
}

auto expected_struct = arrow::ArrayFromJSON(
struct_(expected_physical_schema_->fields()), file_contents->second);
auto expected_struct = ArrayFromJSON(struct_(expected_physical_schema_->fields()),
file_contents->second);

AssertArraysEqual(*expected_struct, *actual_struct, /*verbose=*/true);
}
Expand Down
45 changes: 22 additions & 23 deletions cpp/src/arrow/json/from_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1003,23 +1003,22 @@ Result<std::shared_ptr<Array>> ArrayFromJSONString(const std::shared_ptr<DataTyp
return ArrayFromJSONString(type, std::string_view(json_string));
}

Status ChunkedArrayFromJSONString(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& json_strings,
std::shared_ptr<ChunkedArray>* out) {
Result<std::shared_ptr<ChunkedArray>> ChunkedArrayFromJSONString(
const std::shared_ptr<DataType>& type, const std::vector<std::string>& json_strings) {
ArrayVector out_chunks;
out_chunks.reserve(json_strings.size());
for (const std::string& chunk_json : json_strings) {
out_chunks.emplace_back();
ARROW_ASSIGN_OR_RAISE(out_chunks.back(), ArrayFromJSONString(type, chunk_json));
}
*out = std::make_shared<ChunkedArray>(std::move(out_chunks), type);
return Status::OK();
std::shared_ptr<ChunkedArray> out =
std::make_shared<ChunkedArray>(std::move(out_chunks), type);
return out;
}

Status DictArrayFromJSONString(const std::shared_ptr<DataType>& type,
std::string_view indices_json,
std::string_view dictionary_json,
std::shared_ptr<Array>* out) {
Result<std::shared_ptr<Array>> DictArrayFromJSONString(
const std::shared_ptr<DataType>& type, std::string_view indices_json,
std::string_view dictionary_json) {
if (type->id() != Type::DICTIONARY) {
return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type);
}
Expand All @@ -1030,13 +1029,13 @@ Status DictArrayFromJSONString(const std::shared_ptr<DataType>& type,
ArrayFromJSONString(dictionary_type.index_type(), indices_json));
ARROW_ASSIGN_OR_RAISE(auto dictionary, ArrayFromJSONString(dictionary_type.value_type(),
dictionary_json));

return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary))
.Value(out);
ARROW_ASSIGN_OR_RAISE(auto out, DictionaryArray::FromArrays(type, std::move(indices),
std::move(dictionary)));
return out;
}

Status ScalarFromJSONString(const std::shared_ptr<DataType>& type,
std::string_view json_string, std::shared_ptr<Scalar>* out) {
Result<std::shared_ptr<Scalar>> ScalarFromJSONString(
const std::shared_ptr<DataType>& type, std::string_view json_string) {
std::shared_ptr<internal::Converter> converter;
RETURN_NOT_OK(GetConverter(type, &converter));

Expand All @@ -1051,28 +1050,28 @@ Status ScalarFromJSONString(const std::shared_ptr<DataType>& type,
RETURN_NOT_OK(converter->AppendValue(json_doc));
RETURN_NOT_OK(converter->Finish(&array));
DCHECK_EQ(array->length(), 1);
return array->GetScalar(0).Value(out);
ARROW_ASSIGN_OR_RAISE(auto out, array->GetScalar(0));
return out;
}

Status DictScalarFromJSONString(const std::shared_ptr<DataType>& type,
std::string_view index_json,
std::string_view dictionary_json,
std::shared_ptr<Scalar>* out) {
Result<std::shared_ptr<Scalar>> DictScalarFromJSONString(
const std::shared_ptr<DataType>& type, std::string_view index_json,
std::string_view dictionary_json) {
if (type->id() != Type::DICTIONARY) {
return Status::TypeError("DictScalarFromJSONString requires dictionary type, got ",
*type);
}

const auto& dictionary_type = checked_cast<const DictionaryType&>(*type);

std::shared_ptr<Scalar> index;
std::shared_ptr<Array> dictionary;
RETURN_NOT_OK(ScalarFromJSONString(dictionary_type.index_type(), index_json, &index));
ARROW_ASSIGN_OR_RAISE(auto index,
ScalarFromJSONString(dictionary_type.index_type(), index_json));
ARROW_ASSIGN_OR_RAISE(
dictionary, ArrayFromJSONString(dictionary_type.value_type(), dictionary_json));

*out = DictionaryScalar::Make(std::move(index), std::move(dictionary));
return Status::OK();
auto out = DictionaryScalar::Make(std::move(index), std::move(dictionary));
return out;
}

} // namespace json
Expand Down
43 changes: 19 additions & 24 deletions cpp/src/arrow/json/from_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,52 +68,47 @@ Result<std::shared_ptr<Array>> ArrayFromJSONString(const std::shared_ptr<DataTyp
/// \brief Create a ChunkedArray from a JSON string
///
/// \code {.cpp}
/// std::shared_ptr<ChunkedArray> chunked_array;
/// ChunkedArrayFromJSONString(
/// int64(), {R"([5, 10])", R"([null])", R"([16])"}, &chunked_array
/// );
/// std::shared_ptr<ChunkedArray> chunked_array =
/// ChunkedArrayFromJSONString(int64(), {R"([5, 10])", R"([null])", R"([16])"})
/// .ValueOrDie();
/// \endcode
ARROW_EXPORT
Status ChunkedArrayFromJSONString(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& json_strings,
std::shared_ptr<ChunkedArray>* out);
Result<std::shared_ptr<ChunkedArray>> ChunkedArrayFromJSONString(
const std::shared_ptr<DataType>& type, const std::vector<std::string>& json_strings);

/// \brief Create a DictionaryArray from a JSON string
///
/// \code {.cpp}
/// std::shared_ptr<Array> array;
/// DictArrayFromJSONString(
/// std::shared_ptr<Array> dict_array =
/// DictArrayFromJSONString(
/// dictionary(int32(), utf8()),
/// "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])",
/// &array
/// );
/// "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])");
/// \endcode
ARROW_EXPORT
Status DictArrayFromJSONString(const std::shared_ptr<DataType>&,
std::string_view indices_json,
std::string_view dictionary_json,
std::shared_ptr<Array>* out);
ARROW_EXPORT
Result<std::shared_ptr<Array>> DictArrayFromJSONString(const std::shared_ptr<DataType>&,
std::string_view indices_json,
std::string_view dictionary_json);

/// \brief Create a Scalar from a JSON string
/// \code {.cpp}
/// std::shared_ptr<Scalar> scalar;
/// std::shared_ptr<Scalar> scalar =
/// ScalarFromJSONString(float64(), "42", &scalar);
/// \endcode
ARROW_EXPORT
Status ScalarFromJSONString(const std::shared_ptr<DataType>&, std::string_view json,
std::shared_ptr<Scalar>* out);
Result<std::shared_ptr<Scalar>> ScalarFromJSONString(const std::shared_ptr<DataType>&,
std::string_view json);

/// \brief Create a DictionaryScalar from a JSON string
/// \code {.cpp}
/// std::shared_ptr<Scalar> scalar;
/// std::shared_ptr<Scalar> dict_scalar =
/// DictScalarFromJSONString(dictionary(int32(), utf8()), "3", R"(["k1", "k2", "k3",
/// "k4"])", &scalar);
/// \endcode
ARROW_EXPORT
Status DictScalarFromJSONString(const std::shared_ptr<DataType>&,
std::string_view index_json,
std::string_view dictionary_json,
std::shared_ptr<Scalar>* out);
Result<std::shared_ptr<Scalar>> DictScalarFromJSONString(
const std::shared_ptr<DataType>&, std::string_view index_json,
std::string_view dictionary_json);

/// @}

Expand Down
60 changes: 28 additions & 32 deletions cpp/src/arrow/json/from_string_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ template <typename T, typename C_TYPE = typename T::c_type>
void AssertJSONScalar(const std::shared_ptr<DataType>& type, const std::string& json,
const bool is_valid, const C_TYPE value) {
SCOPED_TRACE(json);
std::shared_ptr<Scalar> actual, expected;
std::shared_ptr<Scalar> expected;

ASSERT_OK(ScalarFromJSONString(type, json, &actual));
ASSERT_OK_AND_ASSIGN(auto actual, ScalarFromJSONString(type, json));
if (is_valid) {
ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value));
} else {
Expand Down Expand Up @@ -1471,30 +1471,29 @@ TEST(TestDictArrayFromJSON, Basics) {

TEST(TestDictArrayFromJSON, Errors) {
auto type = dictionary(int32(), utf8());
std::shared_ptr<Array> array;

ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[\"not a valid index\"]",
"[\"\"]", &array));
ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[0, 1]", "[1]",
&array)); // dict value isn't string
ASSERT_RAISES(Invalid,
DictArrayFromJSONString(type, "[\"not a valid index\"]", "[\"\"]"));
ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[0, 1]",
"[1]")); // dict value isn't string
}

TEST(TestChunkedArrayFromJSON, Basics) {
auto type = int32();
std::shared_ptr<ChunkedArray> chunked_array;
ASSERT_OK(ChunkedArrayFromJSONString(type, {}, &chunked_array));
ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArrayFromJSONString(type, {}));
ASSERT_OK(chunked_array->ValidateFull());
ASSERT_EQ(chunked_array->num_chunks(), 0);
AssertTypeEqual(type, chunked_array->type());

ASSERT_OK(ChunkedArrayFromJSONString(type, {"[1, 2]", "[3, null, 4]"}, &chunked_array));
ASSERT_OK(chunked_array->ValidateFull());
ASSERT_EQ(chunked_array->num_chunks(), 2);
ASSERT_OK_AND_ASSIGN(auto chunked_array_two,
ChunkedArrayFromJSONString(type, {"[1, 2]", "[3, null, 4]"}));
ASSERT_OK(chunked_array_two->ValidateFull());
ASSERT_EQ(chunked_array_two->num_chunks(), 2);
std::shared_ptr<Array> expected_chunk;
ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSONString(type, "[1, 2]"));
AssertArraysEqual(*expected_chunk, *chunked_array->chunk(0), /*verbose=*/true);
AssertArraysEqual(*expected_chunk, *chunked_array_two->chunk(0), /*verbose=*/true);
ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSONString(type, "[3, null, 4]"));
AssertArraysEqual(*expected_chunk, *chunked_array->chunk(1), /*verbose=*/true);
AssertArraysEqual(*expected_chunk, *chunked_array_two->chunk(1), /*verbose=*/true);
}

TEST(TestScalarFromJSON, Basics) {
Expand All @@ -1516,25 +1515,23 @@ TEST(TestScalarFromJSON, Basics) {
AssertJSONScalar<BooleanType, bool>(boolean(), "1", true, true);
AssertJSONScalar<DoubleType>(float64(), "1.0", true, 1.0);
AssertJSONScalar<DoubleType>(float64(), "-0.0", true, -0.0);
ASSERT_OK(ScalarFromJSONString(float64(), "NaN", &scalar));
ASSERT_OK(ScalarFromJSONString(float64(), "NaN"));
ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*scalar).value));
ASSERT_OK(ScalarFromJSONString(float64(), "Inf", &scalar));
ASSERT_OK(ScalarFromJSONString(float64(), "Inf"));
ASSERT_TRUE(std::isinf(checked_cast<DoubleScalar&>(*scalar).value));
}

TEST(TestScalarFromJSON, Errors) {
std::shared_ptr<Scalar> scalar;
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[0]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[9223372036854775808]", &scalar));
ASSERT_RAISES(Invalid,
ScalarFromJSONString(int64(), "[-9223372036854775809]", &scalar));
ASSERT_RAISES(Invalid,
ScalarFromJSONString(uint64(), "[18446744073709551616]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[-1]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "0", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "[]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "0.0", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "\"true\"", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[0]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[9223372036854775808]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[-9223372036854775809]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[18446744073709551616]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[-1]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "0"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "[]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "0.0"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "\"true\""));
}

TEST(TestDictScalarFromJSONString, Basics) {
Expand All @@ -1553,12 +1550,11 @@ TEST(TestDictScalarFromJSONString, Basics) {

TEST(TestDictScalarFromJSONString, Errors) {
auto type = dictionary(int32(), utf8());
std::shared_ptr<Scalar> scalar;

ASSERT_RAISES(Invalid, DictScalarFromJSONString(type, "\"not a valid index\"", "[\"\"]",
&scalar));
ASSERT_RAISES(Invalid, DictScalarFromJSONString(type, "0", "[1]",
&scalar)); // dict value isn't string
ASSERT_RAISES(Invalid,
DictScalarFromJSONString(type, "\"not a valid index\"", "[\"\"]"));
ASSERT_RAISES(Invalid,
DictScalarFromJSONString(type, "0", "[1]")); // dict value isn't string
}

} // namespace json
Expand Down
12 changes: 5 additions & 7 deletions cpp/src/arrow/testing/gtest_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -388,14 +388,13 @@ std::shared_ptr<Array> DictArrayFromJSON(const std::shared_ptr<DataType>& type,
std::string_view indices_json,
std::string_view dictionary_json) {
std::shared_ptr<Array> out;
ABORT_NOT_OK(json::DictArrayFromJSONString(type, indices_json, dictionary_json, &out));
ABORT_NOT_OK(json::DictArrayFromJSONString(type, indices_json, dictionary_json));
return out;
}

std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& json) {
std::shared_ptr<ChunkedArray> out;
ABORT_NOT_OK(json::ChunkedArrayFromJSONString(type, json, &out));
EXPECT_OK_AND_ASSIGN(auto out, json::ChunkedArrayFromJSONString(type, json));
return out;
}

Expand All @@ -411,16 +410,15 @@ std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&

std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>& type,
std::string_view json) {
std::shared_ptr<Scalar> out;
ABORT_NOT_OK(json::ScalarFromJSONString(type, json, &out));
EXPECT_OK_AND_ASSIGN(auto out, json::ScalarFromJSONString(type, json));
return out;
}

std::shared_ptr<Scalar> DictScalarFromJSON(const std::shared_ptr<DataType>& type,
std::string_view index_json,
std::string_view dictionary_json) {
std::shared_ptr<Scalar> out;
ABORT_NOT_OK(json::DictScalarFromJSONString(type, index_json, dictionary_json, &out));
EXPECT_OK_AND_ASSIGN(auto out,
json::DictScalarFromJSONString(type, index_json, dictionary_json));
return out;
}

Expand Down
13 changes: 6 additions & 7 deletions python/pyarrow/src/arrow/python/gdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,13 +479,12 @@ void TestSession() {
key_value_metadata({"key1", "key2", "key3"}, {"value1", "value2", "value3"}));

// Table
ChunkedArrayVector table_columns{2};
ARROW_CHECK_OK(
ChunkedArrayFromJSONString(int32(), {"[1, 2, 3]", "[4, 5]"}, &table_columns[0]));
ARROW_CHECK_OK(ChunkedArrayFromJSONString(
utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"},
&table_columns[1]));
auto table = Table::Make(batch_schema, table_columns);
ASSERT_OK_AND_ASSIGN(auto col1,
ChunkedArrayFromJSONString(int32(), {"[1, 2, 3]", "[4, 5]"}));
ASSERT_OK_AND_ASSIGN(
auto col2, ChunkedArrayFromJSONString(
utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"}));
auto table = Table::Make(batch_schema, {col1, col2});

// Datum
Datum empty_datum{};
Expand Down
Loading