Skip to content

Commit 831c4d9

Browse files
Krishna Paifacebook-github-bot
authored andcommitted
fix(json): Bug with duplicate keys leading to incorrect writes (facebookincubator#13445)
Summary: Currently when we cast json to row's if we encounter a duplicate key, we throw, however this throw isnt caught in the json cast row calling function , instead the throw is subsumed by the applyToselectedNoThrow call. This causes a problem because the previous write operation is not cancelled and is still valid, often leading to invalid data being read. This diff fix this bug. This bug is very evident when the key duplicated has a string value , it might be harder to replicate if the key duplicated is against a complex type , however the fix should work for all types. Reviewed By: kevinwilfong Differential Revision: D75251964
1 parent bbf1f34 commit 831c4d9

File tree

2 files changed

+40
-15
lines changed

2 files changed

+40
-15
lines changed

velox/functions/prestosql/tests/JsonCastTest.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,6 +1347,27 @@ TEST_F(JsonCastTest, toRowDuplicateKey) {
13471347
expected->setNull(2, true);
13481348

13491349
testCast(data, expected, true /*try_cast*/);
1350+
1351+
// Duplicate keys with strings
1352+
jsonStrings = {
1353+
R"({"c0": "abc", "c1": "xyz", "c0": "mno"})",
1354+
R"({"c0": "123", "c1": "hjk"})",
1355+
};
1356+
1357+
expected = makeRowVector({
1358+
makeFlatVector<std::string>({"", "123"}),
1359+
makeFlatVector<std::string>({"", "hjk"}),
1360+
});
1361+
expected->setNull(0, true);
1362+
1363+
data = makeNullableFlatVector<std::string>(jsonStrings, JSON());
1364+
testCast(data, expected, true);
1365+
1366+
testThrow<std::string>(
1367+
JSON(),
1368+
ROW({"c0", "c1"}, {VARCHAR(), VARCHAR()}),
1369+
jsonStrings,
1370+
"Duplicate field: c0");
13501371
}
13511372

13521373
TEST_F(JsonCastTest, toRow) {

velox/functions/prestosql/types/JsonCastOperator.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,21 +1229,25 @@ void JsonCastOperator::castFromJson(
12291229
maxSize = std::max(maxSize, input.size());
12301230
});
12311231
paddedInput_.resize(maxSize + simdjson::SIMDJSON_PADDING);
1232-
context.applyToSelectedNoThrow(rows, [&](auto row) {
1233-
writer.setOffset(row);
1234-
if (inputVector->isNullAt(row)) {
1235-
writer.commitNull();
1236-
return;
1237-
}
1238-
auto& input = inputVector->valueAt(row);
1239-
memcpy(paddedInput_.data(), input.data(), input.size());
1240-
simdjson::padded_string_view paddedInput(
1241-
paddedInput_.data(), input.size(), paddedInput_.size());
1242-
if (auto error = castFromJsonOneRow<kind>(paddedInput, writer)) {
1243-
context.setVeloxExceptionError(row, errors_[error]);
1244-
writer.commitNull();
1245-
}
1246-
});
1232+
context.applyToSelectedNoThrow(
1233+
rows,
1234+
[&](auto row) INLINE_LAMBDA {
1235+
writer.setOffset(row);
1236+
if (inputVector->isNullAt(row)) {
1237+
writer.commitNull();
1238+
return;
1239+
}
1240+
auto& input = inputVector->valueAt(row);
1241+
memcpy(paddedInput_.data(), input.data(), input.size());
1242+
simdjson::padded_string_view paddedInput(
1243+
paddedInput_.data(), input.size(), paddedInput_.size());
1244+
if (auto error = castFromJsonOneRow<kind>(paddedInput, writer)) {
1245+
context.setVeloxExceptionError(row, errors_[error]);
1246+
writer.commitNull();
1247+
}
1248+
},
1249+
[&](vector_size_t row) INLINE_LAMBDA { writer.commitNull(); });
1250+
12471251
writer.finish();
12481252
}
12491253

0 commit comments

Comments
 (0)