44
55import com .fasterxml .jackson .annotation .JsonIgnoreProperties ;
66import com .fasterxml .jackson .annotation .JsonProperty ;
7+ import com .fasterxml .jackson .annotation .JsonSetter ;
8+ import com .fasterxml .jackson .annotation .Nulls ;
9+ import com .fasterxml .jackson .core .JsonParser ;
10+ import com .fasterxml .jackson .core .JsonToken ;
11+ import com .fasterxml .jackson .databind .DeserializationContext ;
12+ import com .fasterxml .jackson .databind .annotation .JsonDeserialize ;
13+ import com .fasterxml .jackson .databind .deser .std .StdDeserializer ;
14+ import java .io .IOException ;
715import java .util .Collections ;
816import java .util .List ;
917import java .util .Map ;
@@ -106,10 +114,16 @@ public static class SourceReport {
106114 @ Nullable
107115 private Map <String , Double > ingestionHighStageSeconds ;
108116
117+ @ JsonDeserialize (contentUsing = LogEntryDeserializer .class )
118+ @ JsonSetter (contentNulls = Nulls .SKIP )
109119 private List <LogEntry > warnings = Collections .emptyList ();
110120
121+ @ JsonDeserialize (contentUsing = LogEntryDeserializer .class )
122+ @ JsonSetter (contentNulls = Nulls .SKIP )
111123 private List <LogEntry > failures = Collections .emptyList ();
112124
125+ @ JsonDeserialize (contentUsing = LogEntryDeserializer .class )
126+ @ JsonSetter (contentNulls = Nulls .SKIP )
113127 private List <LogEntry > infos = Collections .emptyList ();
114128 }
115129
@@ -143,6 +157,8 @@ public static class SinkReport {
143157
144158 @ Nullable private String mode ;
145159
160+ @ JsonDeserialize (contentUsing = LogEntryDeserializer .class )
161+ @ JsonSetter (contentNulls = Nulls .SKIP )
146162 private List <LogEntry > failures = Collections .emptyList ();
147163 }
148164
@@ -160,4 +176,44 @@ public static class LogEntry {
160176 @ Nullable
161177 private String logCategory ;
162178 }
179+
180+ /**
181+ * Custom element deserializer for {@code List<LogEntry>} fields.
182+ *
183+ * <p>The Python ingestion framework's {@code LossyList} caps log-entry arrays at 10 items and
184+ * appends a plain-string sentinel (e.g. {@code "... sampled of 1246 total elements"}) as the 11th
185+ * element when the original list was larger. Without this deserializer, Jackson throws a {@code
186+ * MismatchedInputException} when it encounters that sentinel string while trying to construct a
187+ * {@link LogEntry} object.
188+ *
189+ * <p>Strategy: annotate every {@code List<LogEntry>} field with
190+ * {@code @JsonDeserialize(contentUsing = LogEntryDeserializer.class)}. When Jackson processes
191+ * each list element it dispatches here:
192+ *
193+ * <ul>
194+ * <li>String token → return an empty {@link LogEntry} (silently dropped by {@code
195+ * toLogEntryMaps} because all fields are null and the resulting map is empty).
196+ * <li>Object token → delegate to standard bean deserialization via {@code ctxt.readValue(p,
197+ * LogEntry.class)}, which is safe because {@link LogEntry} itself does <em>not</em> carry
198+ * {@code @JsonDeserialize} — so there is no recursion.
199+ * </ul>
200+ */
201+ public static class LogEntryDeserializer extends StdDeserializer <LogEntry > {
202+
203+ public LogEntryDeserializer () {
204+ super (LogEntry .class );
205+ }
206+
207+ @ Override
208+ public LogEntry deserialize (JsonParser p , DeserializationContext ctxt ) throws IOException {
209+ if (p .currentToken () == JsonToken .VALUE_STRING ) {
210+ // Python LossyList sentinel string — return null so @JsonSetter(contentNulls = Nulls.SKIP)
211+ // excludes it from the list entirely. This keeps list.size() == real entry count.
212+ return null ;
213+ }
214+ // Standard bean deserialization. No recursion: LogEntry itself has no @JsonDeserialize,
215+ // so ctxt.readValue uses the default BeanDeserializer, not this class.
216+ return ctxt .readValue (p , LogEntry .class );
217+ }
218+ }
163219}
0 commit comments