Skip to content

Commit 09a51bf

Browse files
committed
Preserve Legacy Behavior for Duplicate Fields in Nested Rows
This change makes it so that nested rows keep only the first of duplicate fields. This is different from top-level struct fields and maps, which both keep the last value. I'd like to have a SerDe property to manage this behavior but don't want to add any more complexity at this point.
1 parent f44e4dc commit 09a51bf

File tree

2 files changed

+22
-20
lines changed

2 files changed

+22
-20
lines changed

Diff for: lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java

+2-5
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,10 @@ private void decode(IonReader ionReader, IntFunction<BlockBuilder> blockSelector
235235
continue;
236236
}
237237
final BlockBuilder blockBuilder = blockSelector.apply(fieldIndex);
238-
if (encountered[fieldIndex]) {
239-
blockBuilder.resetTo(blockBuilder.getPositionCount() - 1);
240-
}
241-
else {
238+
if (!encountered[fieldIndex]) {
239+
fieldDecoders.get(fieldIndex).decode(ionReader, blockBuilder);
242240
encountered[fieldIndex] = true;
243241
}
244-
fieldDecoders.get(fieldIndex).decode(ionReader, blockBuilder);
245242
}
246243

247244
for (int i = 0; i < encountered.length; i++) {

Diff for: lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java

+20-15
Original file line numberDiff line numberDiff line change
@@ -292,20 +292,6 @@ public void testStructWithNullAndMissingValues()
292292
Arrays.asList(null, null));
293293
}
294294

295-
@Test
296-
public void testStructWithDuplicateKeys()
297-
throws IOException
298-
{
299-
// this test is not making a value judgement; capturing the last
300-
// is not necessarily the "right" behavior. the test just
301-
// documents what the behavior is, which is based on the behavior
302-
// of the hive serde, and is consistent with the trino json parser.
303-
assertValues(
304-
RowType.rowType(field("foo", INTEGER)),
305-
"{ foo: 17, foo: 31, foo: 53 } { foo: 67 }",
306-
List.of(53), List.of(67));
307-
}
308-
309295
@Test
310296
public void testNestedList()
311297
throws IOException
@@ -330,6 +316,25 @@ public void testNestedStruct()
330316
List.of(List.of("Woody", "Guthrie")));
331317
}
332318

319+
/**
320+
* The Ion Hive SerDe captures the last value for fields with
321+
* duplicate keys. There is different behavior for nested Rows,
322+
* which you can see below.
323+
*/
324+
@Test
325+
public void testTopLevelStructWithDuplicateKeys()
326+
throws IOException
327+
{
328+
assertValues(
329+
RowType.rowType(field("foo", INTEGER)),
330+
"{ foo: 17, foo: 31, foo: 53 } { foo: 67 }",
331+
List.of(53), List.of(67));
332+
}
333+
334+
/**
335+
* The Ion Hive SerDe captures the first value for duplicate fields in
336+
* nested Rows, so that is what we default to here.
337+
*/
333338
@Test
334339
public void testNestedStructWithDuplicateAndMissingKeys()
335340
throws IOException
@@ -340,7 +345,7 @@ public void testNestedStructWithDuplicateAndMissingKeys()
340345
field("first", VARCHAR),
341346
field("last", VARCHAR)))),
342347
"""
343-
{ name: { last: Godfrey, last: Guthrie } }
348+
{ name: { last: Guthrie, last: Godfrey } }
344349
{ name: { first: Joan, last: Baez } }
345350
""",
346351
List.of(Arrays.asList(null, "Guthrie")),

0 commit comments

Comments
 (0)