Skip to content

Commit 3f66796

Browse files
Krishna Paifacebook-github-bot
authored andcommitted
feat(json): Add support for field_names_in_json_cast_enabled when casting rows to json (facebookincubator#13108)
Summary: This diff adds support for field_names_in_json_cast_enabled session property which when enabled produces a json object with the field names having same key as the child name in the row. For e.g: ``` presto:gen_ai> set session field_names_in_json_cast_enabled=true; SET SESSION presto:gen_ai> select cast(cast(row(1, array[1,2], 3.0, row('a', 5)) AS row(x int, y array<int>, z double, r row(a varchar, b int))) as json); _col0 ----------------------------------------------- {"r":{"a":"a","b":5},"x":1,"y":[1,2],"z":3.0} ``` Velox will produce canonicalized json even though currently Presto java doesnt. Differential Revision: D73468351
1 parent 49d5cda commit 3f66796

File tree

3 files changed

+157
-11
lines changed

3 files changed

+157
-11
lines changed

velox/core/QueryConfig.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,11 @@ class QueryConfig {
542542
static constexpr const char* kStreamingAggregationEagerFlush =
543543
"streaming_aggregation_eager_flush";
544544

545+
/// If this is true, then it allows you to get the struct field names
546+
/// as json element names when casting a row to json.
547+
static constexpr const char* kFieldNamesInJsonCastEnabled =
548+
"field_names_in_json_cast_enabled";
549+
545550
bool selectiveNimbleReaderEnabled() const {
546551
return get<bool>(kSelectiveNimbleReaderEnabled, false);
547552
}
@@ -991,6 +996,10 @@ class QueryConfig {
991996
return get<bool>(kStreamingAggregationEagerFlush, false);
992997
}
993998

999+
bool isFieldNamesInJsonCastEnabled() const {
1000+
return get<bool>(kFieldNamesInJsonCastEnabled, false);
1001+
}
1002+
9941003
template <typename T>
9951004
T get(const std::string& key, const T& defaultValue) const {
9961005
return config_->get<T>(key, defaultValue);

velox/functions/prestosql/tests/JsonCastTest.cpp

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ class JsonCastTest : public functions::test::CastBaseTest {
8282
auto thirdChild =
8383
makeNullableFlatVector<TChild3>(child3, fromType->childAt(2));
8484

85-
auto rowVector = makeRowVector({firstChild, secondChild, thirdChild});
85+
auto names = fromType->asRow().names();
86+
auto rowVector =
87+
makeRowVector(names, {firstChild, secondChild, thirdChild});
8688
auto expectedVector =
8789
makeNullableFlatVector<JsonNativeType>(expected, JSON());
8890

@@ -215,6 +217,12 @@ class JsonCastTest : public functions::test::CastBaseTest {
215217

216218
return vector;
217219
}
220+
221+
void setFieldNamesInJsonCast(bool flag) {
222+
queryCtx_->testingOverrideConfigUnsafe({
223+
{core::QueryConfig::kFieldNamesInJsonCastEnabled, std::to_string(flag)},
224+
});
225+
}
218226
};
219227

220228
TEST_F(JsonCastTest, fromInteger) {
@@ -734,6 +742,84 @@ TEST_F(JsonCastTest, fromRow) {
734742
testCast(allNullRow, allNullExpected);
735743
}
736744

745+
TEST_F(JsonCastTest, fieldNamesInJsonCast) {
746+
setFieldNamesInJsonCast(true);
747+
748+
std::vector<std::optional<int64_t>> child1{
749+
std::nullopt, 2, 3, std::nullopt, 5};
750+
std::vector<std::optional<StringView>> child2{
751+
"red"_sv, std::nullopt, "blue"_sv, std::nullopt, "yellow"_sv};
752+
std::vector<std::optional<double>> child3{
753+
1.1, 2.2, std::nullopt, std::nullopt, 5.5};
754+
std::vector<std::optional<JsonNativeType>> expected{
755+
R"({"a":null,"b":"red","c":1.1})",
756+
R"({"a":2,"b":null,"c":2.2})",
757+
R"({"a":3,"b":"blue","c":null})",
758+
R"({"a":null,"b":null,"c":null})",
759+
R"({"a":5,"b":"yellow","c":5.5})"};
760+
761+
testCastFromRow<int64_t, StringView, double>(
762+
ROW({"a", "b", "c"}, {BIGINT(), VARCHAR(), DOUBLE()}),
763+
child1,
764+
child2,
765+
child3,
766+
expected);
767+
768+
// Tests rows with child rows, and make sure json's are canonicalized.
769+
auto child1_1 = makeNullableFlatVector<int64_t>({3, 1, 2});
770+
auto child1_2 = makeArrayVectorFromJson<int64_t>({
771+
"[1, 2, 3]",
772+
"[4, 5]",
773+
"[6, 7, 8]",
774+
});
775+
776+
auto child1_3 = makeRowVector(
777+
{"b", "a"},
778+
{makeNullableFlatVector<int64_t>({5, 4, 3}),
779+
makeNullableFlatVector<int64_t>({1, 2, 3})});
780+
781+
auto rowVector =
782+
makeRowVector({"xyz", "abc", "mno"}, {child1_1, child1_2, child1_3});
783+
784+
// Canonicalized json's.
785+
auto expectedVector = makeNullableFlatVector<JsonNativeType>(
786+
{
787+
R"({"abc":[1,2,3],"mno":{"a":1,"b":5},"xyz":3})",
788+
R"({"abc":[4,5],"mno":{"a":2,"b":4},"xyz":1})",
789+
R"({"abc":[6,7,8],"mno":{"a":3,"b":3},"xyz":2})",
790+
},
791+
JSON());
792+
793+
testCast(rowVector, expectedVector);
794+
795+
// Ensure Rows containing maps are also canonicalized.
796+
797+
auto child2_1 = makeNullableFlatVector<int64_t>({3, std::nullopt, 2});
798+
auto child2_2 = makeMapVector<std::string, int64_t>(
799+
{{{"x", 2}, {"a", 4}}, {{"y", 6}}, {{"z", 8}, {"A", 10}}});
800+
801+
auto child2_3 = makeRowVector(
802+
{"b", "a"},
803+
{makeNullableFlatVector<int64_t>({5, 4, 3}),
804+
makeNullableFlatVector<int64_t>({1, 2, std::nullopt})});
805+
806+
auto rowVector2 =
807+
makeRowVector({"xyz", "abc", "mno"}, {child2_1, child2_2, child2_3});
808+
809+
// Canonicalized json's.
810+
auto expectedVector2 = makeNullableFlatVector<JsonNativeType>(
811+
{
812+
R"({"abc":{"a":4,"x":2},"mno":{"a":1,"b":5},"xyz":3})",
813+
R"({"abc":{"y":6},"mno":{"a":2,"b":4},"xyz":null})",
814+
R"({"abc":{"A":10,"z":8},"mno":{"a":null,"b":3},"xyz":2})",
815+
},
816+
JSON());
817+
818+
testCast(rowVector2, expectedVector2);
819+
820+
setFieldNamesInJsonCast(false);
821+
}
822+
737823
TEST_F(JsonCastTest, fromNested) {
738824
// Create map of array vector.
739825
auto keyVector = makeNullableFlatVector<StringView>(

velox/functions/prestosql/types/JsonCastOperator.cpp

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,9 @@ struct AsJson {
237237
const SelectivityVector& rows,
238238
const BufferPtr& elementToTopLevelRows,
239239
const std::shared_ptr<exec::CastHooks>& hooks,
240-
bool isMapKey = false)
241-
: decoded_(context) {
240+
bool isMapKey = false,
241+
std::optional<std::string> fieldName = std::nullopt)
242+
: decoded_(context), fieldName_(std::move(fieldName)) {
242243
VELOX_CHECK(rows.hasSelections());
243244

244245
exec::EvalErrorsPtr oldErrors;
@@ -294,12 +295,20 @@ struct AsJson {
294295
// Null values are inlined as "null".
295296
return 4;
296297
} else {
298+
// If we have field names, we need to add the field name to the length.
299+
if (fieldName_.has_value()) {
300+
return fieldName_->size() + 3 + this->at(i).size();
301+
}
297302
return this->at(i).size();
298303
}
299304
}
300305

301306
// Appends the json string of the value at i to a string writer.
302307
void append(vector_size_t i, exec::StringWriter& proxy) const {
308+
if (fieldName_.has_value()) {
309+
proxy.append(fmt::format("\"{}\":", fieldName_.value()));
310+
}
311+
303312
if (decoded_->isNullAt(i)) {
304313
proxy.append("null");
305314
} else {
@@ -353,6 +362,7 @@ struct AsJson {
353362
exec::LocalDecodedVector decoded_;
354363
VectorPtr json_;
355364
const SimpleVector<StringView>* jsonStrings_;
365+
std::optional<std::string> fieldName_;
356366
};
357367

358368
void castToJsonFromArray(
@@ -537,26 +547,44 @@ void castToJsonFromRow(
537547
const SelectivityVector& rows,
538548
FlatVector<StringView>& flatResult,
539549
const std::shared_ptr<exec::CastHooks>& hooks) {
550+
using NameJsonPair = std::pair<std::string, AsJson>;
540551
// input is guaranteed to be in flat encoding when passed in.
541552
VELOX_CHECK_EQ(input.encoding(), VectorEncoding::Simple::ROW);
542553
auto inputRow = input.as<RowVector>();
543554
auto childrenSize = inputRow->childrenSize();
555+
auto fieldNamesInJsonCastEnabled = context.execCtx()
556+
->queryCtx()
557+
->queryConfig()
558+
.isFieldNamesInJsonCastEnabled();
544559

545560
// Estimates an upperbound of the total length of all Json strings for the
546561
// input according to the length of all children Json strings and the
547562
// delimiters to be added.
548563
size_t childrenStringSize = 0;
549-
std::vector<AsJson> childrenAsJson;
564+
std::vector<NameJsonPair> jsonChildren;
565+
550566
for (int i = 0; i < childrenSize; ++i) {
551-
childrenAsJson.emplace_back(
552-
context, inputRow->childAt(i), rows, nullptr, hooks);
567+
auto name = inputRow->type()->asRow().nameOf(i);
568+
std::optional<std::string> fieldName =
569+
fieldNamesInJsonCastEnabled ? std::optional{name} : std::nullopt;
570+
571+
jsonChildren.emplace_back(
572+
name,
573+
AsJson{
574+
context,
575+
inputRow->childAt(i),
576+
rows,
577+
nullptr,
578+
hooks,
579+
false,
580+
std::move(fieldName)});
553581

554582
context.applyToSelectedNoThrow(rows, [&](auto row) {
555583
if (inputRow->isNullAt(row)) {
556584
// "null" will be inlined in the StringView.
557585
return;
558586
}
559-
childrenStringSize += childrenAsJson[i].lengthAt(row);
587+
childrenStringSize += jsonChildren[i].second.lengthAt(row);
560588
});
561589
}
562590

@@ -565,6 +593,19 @@ void castToJsonFromRow(
565593
rows.countSelected() * (childrenSize > 0 ? childrenSize + 1 : 2);
566594
flatResult.getBufferWithSpace(childrenStringSize);
567595

596+
// Make sure to sort the children based on their field names if
597+
// fieldNamesInJsonCastEnabled is true. This is to make sure the output is
598+
// canoncialized.
599+
600+
if (fieldNamesInJsonCastEnabled) {
601+
std::sort(
602+
jsonChildren.begin(),
603+
jsonChildren.end(),
604+
[](const NameJsonPair& a, const NameJsonPair& b) {
605+
return a.first < b.first;
606+
});
607+
}
608+
568609
// Constructs Json string of each row from Json strings of its children.
569610
context.applyToSelectedNoThrow(rows, [&](auto row) {
570611
if (inputRow->isNullAt(row)) {
@@ -574,14 +615,24 @@ void castToJsonFromRow(
574615

575616
auto proxy = exec::StringWriter(&flatResult, row);
576617

577-
proxy.append("["_sv);
578-
for (int i = 0; i < childrenSize; ++i) {
618+
if (fieldNamesInJsonCastEnabled) {
619+
proxy.append("{"_sv);
620+
} else {
621+
proxy.append("["_sv);
622+
}
623+
624+
for (int i = 0; i < jsonChildren.size(); ++i) {
579625
if (i > 0) {
580626
proxy.append(","_sv);
581627
}
582-
childrenAsJson[i].append(row, proxy);
628+
jsonChildren[i].second.append(row, proxy);
629+
}
630+
631+
if (fieldNamesInJsonCastEnabled) {
632+
proxy.append("}"_sv);
633+
} else {
634+
proxy.append("]"_sv);
583635
}
584-
proxy.append("]"_sv);
585636

586637
proxy.finalize();
587638
});

0 commit comments

Comments
 (0)