Add currently-ignored test case for high-throughput loads

C0urante · C0urante · commit 5b718f56f31f · 2024-03-08T13:45:08.000Z
diff --git a/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/StorageWriteApiBigQuerySinkConnectorIT.java b/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/StorageWriteApiBigQuerySinkConnectorIT.java
@@ -52,6 +52,7 @@
 import org.apache.kafka.test.TestUtils;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.slf4j.Logger;
@@ -382,6 +383,9 @@ public void testAvroLargeBatches() throws InterruptedException {
     final String topic = suffixedTableOrTopic("storage-api-append-large-batches" + System.nanoTime());
     final String table = sanitizedTable(topic);
 
+    // pre-create the table
+    createPerformanceTestingTable(table);
+
     int tasksMax = 1;
     long numRecords = 100_000;
 
@@ -440,6 +444,97 @@ public void testAvroLargeBatches() throws InterruptedException {
     );
   }
 
+  @Test
+  @Ignore("TODO: Handle 'java.lang.RuntimeException: Request has waited in inflight queue for <duration> for writer <writer>, which is over maximum wait time PT5M'")
+  public void testAvroHighThroughput() throws InterruptedException {
+    // create topic in Kafka
+    final String topic = suffixedTableOrTopic("storage-api-append-high-throughput" + System.nanoTime());
+    final String table = sanitizedTable(topic);
+
+    // pre-create the table
+    createPerformanceTestingTable(table);
+
+    int tasksMax = 10;
+    long numRecords = 10_000_000;
+
+    // create topic
+    connect.kafka().createTopic(topic, tasksMax);
+
+    // clean table
+    TableClearer.clearTables(bigQuery, dataset(), table);
+
+    // Instantiate the converters we'll use to send records to the connector
+    initialiseAvroConverters();
+
+    //produce records, each with a 100 byte value
+    produceAvroRecords(topic, numRecords, 100);
+
+    // setup props for the sink connector
+    Map<String, String> props = configs(topic);
+    props.put(TASKS_MAX_CONFIG, Integer.toString(tasksMax));
+
+    // the Storage Write API allows for 10MB per write; try to get close to that
+    // for each poll
+    props.put(
+        CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX + FETCH_MAX_BYTES_CONFIG,
+        Integer.toString(9 * 1024 * 1024)
+    );
+    props.put(
+        CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX + MAX_PARTITION_FETCH_BYTES_CONFIG,
+        Integer.toString(Integer.MAX_VALUE)
+    );
+    props.put(
+        CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX + MAX_POLL_RECORDS_CONFIG,
+        Long.toString(numRecords)
+    );
+
+    // start a sink connector
+    connect.configureConnector(CONNECTOR_NAME, props);
+
+    // wait for tasks to spin up
+    waitForConnectorToStart(CONNECTOR_NAME, tasksMax);
+
+    // wait for tasks to write to BigQuery and commit offsets for their records
+    waitForCommittedRecords(
+        CONNECTOR_NAME,
+        Collections.singleton(topic),
+        numRecords,
+        tasksMax,
+        TimeUnit.MINUTES.toMillis(10)
+    );
+
+    connect.deleteConnector(CONNECTOR_NAME);
+
+    final AtomicLong numRows = new AtomicLong();
+    TestUtils.waitForCondition(
+        () -> {
+          numRows.set(countRows(bigQuery, table));
+          assertEquals(numRecords, numRows.get());
+          return true;
+        },
+        10_000L,
+        () -> "Table should contain " + numRecords
+            + " rows, but has " + numRows.get() + " instead"
+    );
+  }
+
+  private void createPerformanceTestingTable(String table) {
+    // pre-create the table
+    com.google.cloud.bigquery.Schema tableSchema = com.google.cloud.bigquery.Schema.of(
+        Field.of("f1", StandardSQLTypeName.STRING),
+        Field.of(KAFKA_FIELD_NAME, StandardSQLTypeName.STRUCT, Field.of("k1", StandardSQLTypeName.STRING))
+    );
+    try {
+      BigQueryTestUtils.createPartitionedTable(bigQuery, dataset(), table, tableSchema);
+    } catch (BigQueryException ex) {
+      if (!ex.getError().getReason().equalsIgnoreCase("duplicate")) {
+        throw new ConnectException("Failed to create table: ", ex);
+      } else {
+        logger.info("Table {} already exist", table);
+      }
+    }
+  }
+
   private void createTable(String table, boolean incompleteSchema) {
     com.google.cloud.bigquery.Schema tableSchema;
     if (incompleteSchema) {
@@ -509,7 +604,6 @@ private void produceAvroRecords(String topic) {
     produceAvroRecords(topic, NUM_RECORDS_PRODUCED);
   }
 
-
   private void produceAvroRecords(String topic, long numRecords) {
     produceAvroRecords(
         topic,