@@ -105,78 +105,160 @@ public void initializeAndWriteRecords(TableName tableName, List<ConvertedRecord>
105
105
logger .debug ("Sending {} records to write Api Application stream {}" , rows .size (), streamName );
106
106
RecordBatches <ConvertedRecord > batches = new RecordBatches <>(rows );
107
107
StreamWriter writer = streamWriter (tableName , streamName , rows );
108
- do {
109
- try {
110
- List <ConvertedRecord > batch = batches .currentBatch ();
111
- JSONArray jsonRecords = getJsonRecords (batch );
112
- logger .trace ("Sending records to Storage API writer for batch load" );
113
- ApiFuture <AppendRowsResponse > response = writer .appendRows (jsonRecords );
114
- AppendRowsResponse writeResult = response .get ();
115
- logger .trace ("Received response from Storage API writer batch" );
116
-
117
- if (writeResult .hasUpdatedSchema ()) {
118
- logger .warn ("Sent records schema does not match with table schema, will attempt to update schema" );
119
- if (!canAttemptSchemaUpdate ()) {
120
- throw new BigQueryStorageWriteApiConnectException ("Connector is not configured to perform schema updates." );
108
+ while (!batches .completed ()) {
109
+ List <ConvertedRecord > batch = batches .currentBatch ();
110
+
111
+ while (!batch .isEmpty ()) {
112
+ try {
113
+ writeBatch (writer , batch , retryHandler , tableName );
114
+ batch = Collections .emptyList (); // Can't do batch.clear(); it'll mess with the batch tracking logic in RecordBatches
115
+ } catch (RetryException e ) {
116
+ retryHandler .maybeRetry ("write to table " + tableName );
117
+ if (e .getMessage () != null ) {
118
+ logger .warn (e .getMessage () + " Retry attempt " + retryHandler .getAttempt ());
121
119
}
122
- retryHandler .attemptTableOperation (schemaManager ::updateSchema );
123
- } else if (writeResult .hasError ()) {
124
- Status errorStatus = writeResult .getError ();
125
- String errorMessage = String .format ("Failed to write rows on table %s due to %s" , tableName , writeResult .getError ().getMessage ());
126
- retryHandler .setMostRecentException (new BigQueryStorageWriteApiConnectException (errorMessage ));
127
- if (BigQueryStorageWriteApiErrorResponses .isMalformedRequest (errorMessage )) {
128
- rows = maybeHandleDlqRoutingAndFilterRecords (rows , convertToMap (writeResult .getRowErrorsList ()), tableName .getTable ());
129
- if (rows .isEmpty ()) {
130
- writer .onSuccess ();
131
- return ;
120
+ } catch (BatchTooLargeException e ) {
121
+ if (batch .size () <= 1 ) {
122
+ Map <Integer , String > rowErrorMapping = Collections .singletonMap (
123
+ 0 , e .getMessage ()
124
+ );
125
+ batch = maybeHandleDlqRoutingAndFilterRecords (batch , rowErrorMapping , tableName .getTable ());
126
+ if (!batch .isEmpty ()) {
127
+ retryHandler .maybeRetry ("write to table " + tableName );
132
128
}
133
- } else if (!BigQueryStorageWriteApiErrorResponses .isRetriableError (errorStatus .getMessage ())) {
134
- failTask (retryHandler .getMostRecentException ());
129
+ } else {
130
+ int previousSize = batch .size ();
131
+ batches .reduceBatchSize ();
132
+ batch = batches .currentBatch ();
133
+ logger .debug ("Reducing batch size for table {} from {} to {}" , tableName , previousSize , batch .size ());
135
134
}
136
- logger .warn (errorMessage + " Retry attempt " + retryHandler .getAttempt ());
137
- } else {
138
- if (!writeResult .hasAppendResult ()) {
139
- logger .warn (
140
- "Write result did not report any errors, but also did not succeed. "
141
- + "This may be indicative of a bug in the BigQuery Java client library or back end; "
142
- + "please report it to the maintainers of the connector to investigate."
143
- );
135
+ } catch (MalformedRowsException e ) {
136
+ batch = maybeHandleDlqRoutingAndFilterRecords (batch , e .getRowErrorMapping (), tableName .getTable ());
137
+ if (!batch .isEmpty ()) {
138
+ // TODO: Does this actually make sense? Should we count this as part of our retry logic?
139
+ // As long as we're guaranteed that the number of rows in the batch is decreasing, it
140
+ // may make sense to skip the maybeRetry invocation
141
+ retryHandler .maybeRetry ("write to table " + tableName );
144
142
}
145
- logger .trace ("Append call completed successfully on stream {}" , streamName );
146
- writer .onSuccess ();
147
- return ;
148
143
}
149
- } catch (BigQueryStorageWriteApiConnectException exception ) {
150
- throw exception ;
151
- } catch (Exception e ) {
152
- String message = e .getMessage ();
153
- String errorMessage = String .format ("Failed to write rows on table %s due to %s" , tableName , message );
154
- retryHandler .setMostRecentException (new BigQueryStorageWriteApiConnectException (errorMessage , e ));
155
-
156
- if (shouldHandleSchemaMismatch (e )) {
157
- logger .warn ("Sent records schema does not match with table schema, will attempt to update schema" );
158
- retryHandler .attemptTableOperation (schemaManager ::updateSchema );
159
- } else if (BigQueryStorageWriteApiErrorResponses .isMalformedRequest (message )) {
160
- rows = maybeHandleDlqRoutingAndFilterRecords (rows , getRowErrorMapping (e ), tableName .getTable ());
161
- if (rows .isEmpty ()) {
162
- writer .onSuccess ();
163
- return ;
164
- }
165
- } else if (BigQueryStorageWriteApiErrorResponses .isStreamClosed (message )) {
166
- writer .refresh ();
167
- } else if (BigQueryStorageWriteApiErrorResponses .isTableMissing (message ) && getAutoCreateTables ()) {
168
- retryHandler .attemptTableOperation (schemaManager ::createTable );
169
- } else if (!BigQueryStorageWriteApiErrorResponses .isRetriableError (e .getMessage ())
170
- && BigQueryStorageWriteApiErrorResponses .isNonRetriableStorageError (e )
171
- ) {
144
+ }
145
+
146
+ batches .advanceToNextBatch ();
147
+ }
148
+
149
+ writer .onSuccess ();
150
+ }
151
+
152
+ private void writeBatch (
153
+ StreamWriter writer ,
154
+ List <ConvertedRecord > batch ,
155
+ StorageWriteApiRetryHandler retryHandler ,
156
+ TableName tableName
157
+ ) throws BatchTooLargeException , MalformedRowsException , RetryException {
158
+ try {
159
+ JSONArray jsonRecords = getJsonRecords (batch );
160
+ logger .trace ("Sending records to Storage API writer for batch load" );
161
+ ApiFuture <AppendRowsResponse > response = writer .appendRows (jsonRecords );
162
+ AppendRowsResponse writeResult = response .get ();
163
+ logger .trace ("Received response from Storage API writer batch" );
164
+
165
+ if (writeResult .hasUpdatedSchema ()) {
166
+ logger .warn ("Sent records schema does not match with table schema, will attempt to update schema" );
167
+ if (!canAttemptSchemaUpdate ()) {
168
+ throw new BigQueryStorageWriteApiConnectException ("Connector is not configured to perform schema updates." );
169
+ }
170
+ retryHandler .attemptTableOperation (schemaManager ::updateSchema );
171
+ throw new RetryException ();
172
+ } else if (writeResult .hasError ()) {
173
+ Status errorStatus = writeResult .getError ();
174
+ String errorMessage = String .format ("Failed to write rows on table %s due to %s" , tableName , writeResult .getError ().getMessage ());
175
+ retryHandler .setMostRecentException (new BigQueryStorageWriteApiConnectException (errorMessage ));
176
+ if (BigQueryStorageWriteApiErrorResponses .isMalformedRequest (errorMessage )) {
177
+ throw new MalformedRowsException (convertToMap (writeResult .getRowErrorsList ()));
178
+ } else if (!BigQueryStorageWriteApiErrorResponses .isRetriableError (errorStatus .getMessage ())) {
172
179
failTask (retryHandler .getMostRecentException ());
173
180
}
174
- logger .warn (errorMessage + " Retry attempt " + retryHandler .getAttempt ());
181
+ throw new RetryException (errorMessage );
182
+ } else {
183
+ if (!writeResult .hasAppendResult ()) {
184
+ logger .warn (
185
+ "Write result did not report any errors, but also did not succeed. "
186
+ + "This may be indicative of a bug in the BigQuery Java client library or back end; "
187
+ + "please report it to the maintainers of the connector to investigate."
188
+ );
189
+ }
190
+ logger .trace ("Append call completed successfully on stream {}" , writer .streamName ());
191
+ }
192
+ } catch (BigQueryStorageWriteApiConnectException | BatchWriteException exception ) {
193
+ throw exception ;
194
+ } catch (Exception e ) {
195
+ String message = e .getMessage ();
196
+ String errorMessage = String .format ("Failed to write rows on table %s due to %s" , tableName , message );
197
+ retryHandler .setMostRecentException (new BigQueryStorageWriteApiConnectException (errorMessage , e ));
198
+
199
+ if (shouldHandleSchemaMismatch (e )) {
200
+ logger .warn ("Sent records schema does not match with table schema, will attempt to update schema" );
201
+ retryHandler .attemptTableOperation (schemaManager ::updateSchema );
202
+ } else if (BigQueryStorageWriteApiErrorResponses .isMessageTooLargeError (message )) {
203
+ throw new BatchTooLargeException (errorMessage );
204
+ } else if (BigQueryStorageWriteApiErrorResponses .isMalformedRequest (message )) {
205
+ throw new MalformedRowsException (getRowErrorMapping (e ));
206
+ } else if (BigQueryStorageWriteApiErrorResponses .isStreamClosed (message )) {
207
+ writer .refresh ();
208
+ } else if (BigQueryStorageWriteApiErrorResponses .isTableMissing (message ) && getAutoCreateTables ()) {
209
+ retryHandler .attemptTableOperation (schemaManager ::createTable );
210
+ } else if (!BigQueryStorageWriteApiErrorResponses .isRetriableError (e .getMessage ())
211
+ && BigQueryStorageWriteApiErrorResponses .isNonRetriableStorageError (e )
212
+ ) {
213
+ failTask (retryHandler .getMostRecentException ());
175
214
}
176
- } while (retryHandler .maybeRetry ());
177
- throw new BigQueryStorageWriteApiConnectException (
178
- String .format ("Exceeded %s attempts to write to table %s " , retryHandler .getAttempt (), tableName ),
179
- retryHandler .getMostRecentException ());
215
+ throw new RetryException (errorMessage );
216
+ }
217
+ }
218
+
219
+ private abstract static class BatchWriteException extends Exception {
220
+
221
+ protected BatchWriteException () {
222
+ super ();
223
+ }
224
+
225
+ protected BatchWriteException (String message ) {
226
+ super (message );
227
+ }
228
+
229
+ }
230
+
231
+ private static class BatchTooLargeException extends BatchWriteException {
232
+
233
+ public BatchTooLargeException (String message ) {
234
+ super (message );
235
+ }
236
+
237
+ }
238
+
239
+ private static class MalformedRowsException extends BatchWriteException {
240
+
241
+ private final Map <Integer , String > rowErrorMapping ;
242
+
243
+ public MalformedRowsException (Map <Integer , String > rowErrorMapping ) {
244
+ this .rowErrorMapping = rowErrorMapping ;
245
+ }
246
+
247
+ public Map <Integer , String > getRowErrorMapping () {
248
+ return rowErrorMapping ;
249
+ }
250
+
251
+ }
252
+
253
+ private static class RetryException extends BatchWriteException {
254
+
255
+ public RetryException () {
256
+ super ();
257
+ }
258
+
259
+ public RetryException (String message ) {
260
+ super (message );
261
+ }
180
262
}
181
263
182
264
/**
0 commit comments