re-introduce the fail fast mechanic with fixes for documented bugs.

fozzie15 · fozzie15 · commit a56ca25eb669 · 2025-04-23T15:47:50.000-04:00
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -744,6 +744,9 @@ public abstract static class Read<K, V>
     @Pure
     public abstract long getConsumerPollingTimeout();
 
+    @Pure
+    public abstract @Nullable Boolean getLogTopicVerification();
+
     abstract Builder<K, V> toBuilder();
 
     @AutoValue.Builder
@@ -810,6 +813,8 @@ Builder<K, V> setCheckStopReadingFn(
 
       abstract Builder<K, V> setConsumerPollingTimeout(long consumerPollingTimeout);
 
+      abstract Builder<K, V> setLogTopicVerification(@Nullable Boolean logTopicVerification);
+
       abstract Read<K, V> build();
 
       static <K, V> void setupExternalBuilder(
@@ -1483,6 +1488,10 @@ public Read<K, V> withGCPApplicationDefaultCredentials() {
               "org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required;"));
     }
 
+    public Read<K, V> withTopicVerificationLogging(boolean logTopicVerification) {
+      return toBuilder().setLogTopicVerification(logTopicVerification).build();
+    }
+
     /** Returns a {@link PTransform} for PCollection of {@link KV}, dropping Kafka metatdata. */
     public PTransform<PBegin, PCollection<KV<K, V>>> withoutMetadata() {
       return new TypedWithoutMetadata<>(this);
@@ -1910,7 +1919,15 @@ public void processElement(OutputReceiver<KafkaSourceDescriptor> receiver) {
               }
             } else {
               for (String topic : topics) {
-                for (PartitionInfo p : consumer.partitionsFor(topic)) {
+                List<PartitionInfo> partitionInfoList = consumer.partitionsFor(topic);
+                checkState(
+                    partitionInfoList != null && !partitionInfoList.isEmpty(),
+                    "Could not find any partitions info for topic "
+                        + topic
+                        + ". Please check Kafka configuration and make sure "
+                        + "that provided topics exist.");
+
+                for (PartitionInfo p : partitionInfoList) {
                   partitions.add(new TopicPartition(p.topic(), p.partition()));
                 }
               }
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedSource.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedSource.java
@@ -23,9 +23,12 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.extensions.avro.coders.AvroCoder;
 import org.apache.beam.sdk.io.UnboundedSource;
@@ -37,6 +40,7 @@
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.TopicPartition;
 import org.checkerframework.checker.nullness.qual.Nullable;
@@ -89,7 +93,7 @@ public List<KafkaUnboundedSource<K, V>> split(int desiredNumSplits, PipelineOpti
           for (String topic : topics) {
             List<PartitionInfo> partitionInfoList = consumer.partitionsFor(topic);
             checkState(
-                partitionInfoList != null,
+                partitionInfoList != null && !partitionInfoList.isEmpty(),
                 "Could not find any partitions info. Please check Kafka configuration and make sure "
                     + "that provided topics exist.");
             for (PartitionInfo p : partitionInfoList) {
@@ -100,8 +104,52 @@ public List<KafkaUnboundedSource<K, V>> split(int desiredNumSplits, PipelineOpti
         }
       }
     } else {
+      final Map<String, List<Integer>> topicsAndPartitions = new HashMap<>();
       for (TopicPartition p : partitions) {
-        Lineage.getSources().add("kafka", ImmutableList.of(bootStrapServers, p.topic()));
+        topicsAndPartitions.computeIfAbsent(p.topic(), k -> new ArrayList<>()).add(p.partition());
+      }
+
+      try (Consumer<?, ?> consumer = spec.getConsumerFactoryFn().apply(spec.getConsumerConfig())) {
+        for (Map.Entry<String, List<Integer>> e : topicsAndPartitions.entrySet()) {
+          final String providedTopic = e.getKey();
+          final List<Integer> providedPartitions = e.getValue();
+          final Set<Integer> partitionsForTopic;
+          try {
+            partitionsForTopic =
+                consumer.partitionsFor(providedTopic).stream()
+                    .map(PartitionInfo::partition)
+                    .collect(Collectors.toSet());
+            if (spec.getLogTopicVerification() == null || !spec.getLogTopicVerification()){
+              for (Integer p : providedPartitions) {
+                checkState(
+                    partitionsForTopic.contains(p),
+                    "Partition "
+                        + p
+                        + " does not exist for topic "
+                        + providedTopic
+                        + ". Please check Kafka configuration.");
+              }
+            } else {
+              for (Integer p : providedPartitions) {
+                if (!partitionsForTopic.contains(p)) {
+                  LOG.warn("Partition {} does not exist for topic {}. Please check Kafka configuration.",
+                           p,
+                           providedTopic);
+                }
+              }
+            }
+          } catch (KafkaException exception) {
+            LOG.warn("Unable to access cluster. Skipping fail fast checks.");
+          }
+          Lineage.getSources().add("kafka", ImmutableList.of(bootStrapServers, providedTopic));
+        }
+      } catch (KafkaException exception) {
+        LOG.warn(
+            "WARN: Failed to connect to kafka for running pre-submit validation of kafka "
+                + "topic and partition configuration. This may be due to local permissions or "
+                + "connectivity to the kafka bootstrap server, or due to misconfiguration of "
+                + "KafkaIO. This validation is not required, and this warning may be ignored "
+                + "if the Beam job runs successfully.");
       }
     }
 
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/WatchForKafkaTopicPartitions.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/WatchForKafkaTopicPartitions.java
@@ -18,6 +18,7 @@
 package org.apache.beam.sdk.io.kafka;
 
 import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects.firstNonNull;
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -189,7 +190,14 @@ static List<TopicPartition> getAllTopicPartitions(
         kafkaConsumerFactoryFn.apply(kafkaConsumerConfig)) {
       if (topics != null && !topics.isEmpty()) {
         for (String topic : topics) {
-          for (PartitionInfo partition : kafkaConsumer.partitionsFor(topic)) {
+          List<PartitionInfo> partitionInfoList = kafkaConsumer.partitionsFor(topic);
+          checkState(
+              partitionInfoList != null && !partitionInfoList.isEmpty(),
+              "Could not find any partitions info for topic "
+                  + topic
+                  + ". Please check Kafka configuration and make sure "
+                  + "that provided topics exist.");
+          for (PartitionInfo partition : partitionInfoList) {
             current.add(new TopicPartition(topic, partition.partition()));
           }
         }
diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java
@@ -38,6 +38,7 @@
 import java.util.function.BiFunction;
 import java.util.stream.Collectors;
 import java.util.stream.LongStream;
+import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.coders.ByteArrayCoder;
 import org.apache.beam.sdk.coders.NullableCoder;
@@ -90,13 +91,15 @@
 import org.apache.beam.sdk.values.Row;
 import org.apache.beam.sdk.values.TypeDescriptors;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
 import org.apache.kafka.clients.admin.AdminClient;
 import org.apache.kafka.clients.admin.NewPartitions;
 import org.apache.kafka.clients.admin.NewTopic;
 import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.serialization.ByteArrayDeserializer;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
@@ -114,6 +117,7 @@
 import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.slf4j.Logger;
@@ -171,6 +175,8 @@ public class KafkaIOIT {
 
   @Rule public TestPipeline readPipeline = TestPipeline.create();
 
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
   private static ExperimentalOptions sdfPipelineOptions;
 
   static {
@@ -215,6 +221,175 @@ public static void afterClass() {
     }
   }
 
+  @Test
+  public void testKafkaIOFailsFastWithInvalidPartitions() throws IOException {
+    thrown.expect(Pipeline.PipelineExecutionException.class);
+    thrown.expectMessage(
+        "Partition 1000 does not exist for topic "
+            + options.getKafkaTopic()
+            + ". Please check Kafka configuration.");
+
+    // Use streaming pipeline to read Kafka records.
+    readPipeline.getOptions().as(Options.class).setStreaming(true);
+    TopicPartition invalidPartition = new TopicPartition(options.getKafkaTopic(), 1000);
+    readPipeline.apply(
+        "Read from unbounded Kafka",
+        readFromKafka().withTopicPartitions(ImmutableList.of(invalidPartition)));
+
+    PipelineResult readResult = readPipeline.run();
+    PipelineResult.State readState =
+        readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
+
+    // call asynchronous deleteTopics first since cancelIfTimeouted is blocking.
+    tearDownTopic(options.getKafkaTopic());
+    cancelIfTimeouted(readResult, readState);
+  }
+
+  @Test
+  public void testKafkaIOFailsFastWithInvalidPartitionsAndFlagExplicitlySet() throws IOException {
+    thrown.expect(Pipeline.PipelineExecutionException.class);
+    thrown.expectMessage(
+        "Partition 1000 does not exist for topic "
+            + options.getKafkaTopic()
+            + ". Please check Kafka configuration.");
+
+    // Use streaming pipeline to read Kafka records.
+    readPipeline.getOptions().as(Options.class).setStreaming(true);
+    TopicPartition invalidPartition = new TopicPartition(options.getKafkaTopic(), 1000);
+    readPipeline.apply(
+        "Read from unbounded Kafka",
+        readFromKafka().withTopicPartitions(ImmutableList.of(invalidPartition)).withTopicVerificationLogging(false));
+
+    PipelineResult readResult = readPipeline.run();
+    PipelineResult.State readState =
+        readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
+
+    // call asynchronous deleteTopics first since cancelIfTimeouted is blocking.
+    tearDownTopic(options.getKafkaTopic());
+    cancelIfTimeouted(readResult, readState);
+  }
+
+  @Test
+  public void testKafkaIODoesNotFailFastWithInvalidPartitionsAndFlagExplicitlyNotSet()
+      throws IOException {
+
+    // Expect a different error which is thrown at runtime. This is because we disable the failfast
+    // error
+    // by setting the logging flag to True.
+    thrown.expect(java.lang.RuntimeException.class);
+
+    // Use streaming pipeline to read Kafka records.
+    readPipeline.getOptions().as(Options.class).setStreaming(true);
+    TopicPartition invalidPartition = new TopicPartition(options.getKafkaTopic(), 1000);
+    readPipeline.apply(
+        "Read from unbounded Kafka",
+        readFromKafka()
+            .withTopicPartitions(ImmutableList.of(invalidPartition))
+            .withTopicVerificationLogging(true));
+
+    PipelineResult readResult = readPipeline.run();
+    PipelineResult.State readState =
+        readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
+
+    // call asynchronous deleteTopics first since cancelIfTimeouted is blocking.
+    tearDownTopic(options.getKafkaTopic());
+    cancelIfTimeouted(readResult, readState);
+  }
+
+  @Test
+  public void testKafkaIOFailsFastWithInvalidTopics() throws IOException {
+    // This test will fail on versions before 2.3.0 due to the non-existence of the
+    // allow.auto.create.topics
+    // flag. This can be removed when/if support for this older version is dropped.
+    String actualVer = AppInfoParser.getVersion();
+    assumeFalse(actualVer.compareTo("2.0.0") >= 0 && actualVer.compareTo("2.3.0") < 0);
+
+    thrown.expect(Pipeline.PipelineExecutionException.class);
+    thrown.expectMessage(
+        "Could not find any partitions info for topic invalid_topic. Please check Kafka configuration"
+            + " and make sure that provided topics exist.");
+
+    // Use streaming pipeline to read Kafka records.
+    sdfReadPipeline.getOptions().as(Options.class).setStreaming(true);
+    String invalidTopic = "invalid_topic";
+    sdfReadPipeline.apply(
+        "Read from unbounded Kafka",
+        KafkaIO.<byte[], byte[]>read()
+            .withConsumerConfigUpdates(ImmutableMap.of("allow.auto.create.topics", "false"))
+            .withBootstrapServers(options.getKafkaBootstrapServerAddresses())
+            .withTopics(ImmutableList.of(invalidTopic))
+            .withKeyDeserializer(ByteArrayDeserializer.class)
+            .withValueDeserializer(ByteArrayDeserializer.class));
+
+    PipelineResult readResult = sdfReadPipeline.run();
+    PipelineResult.State readState =
+        readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
+
+    // call asynchronous deleteTopics first since cancelIfTimeouted is blocking.
+    tearDownTopic(options.getKafkaTopic());
+    cancelIfTimeouted(readResult, readState);
+  }
+
+  @Test
+  public void testKafkaIOFailsFastWithInvalidTopicsAndDynamicRead() throws IOException {
+    // This test will fail on versions before 2.3.0 due to the non-existence of the
+    // allow.auto.create.topics
+    // flag. This can be removed when/if support for this older version is dropped.
+    String actualVer = AppInfoParser.getVersion();
+    assumeFalse(actualVer.compareTo("2.0.0") >= 0 && actualVer.compareTo("2.3.0") < 0);
+
+    thrown.expect(Pipeline.PipelineExecutionException.class);
+    thrown.expectMessage(
+        "Could not find any partitions info for topic invalid_topic. Please check Kafka configuration"
+            + " and make sure that provided topics exist.");
+
+    // Use streaming pipeline to read Kafka records.
+    sdfReadPipeline.getOptions().as(Options.class).setStreaming(true);
+    String invalidTopic = "invalid_topic";
+    sdfReadPipeline.apply(
+        "Read from unbounded Kafka",
+        KafkaIO.<byte[], byte[]>read()
+            .withConsumerConfigUpdates(ImmutableMap.of("allow.auto.create.topics", "false"))
+            .withBootstrapServers(options.getKafkaBootstrapServerAddresses())
+            .withTopics(ImmutableList.of(invalidTopic))
+            .withDynamicRead(Duration.standardSeconds(5))
+            .withKeyDeserializer(ByteArrayDeserializer.class)
+            .withValueDeserializer(ByteArrayDeserializer.class));
+
+    PipelineResult readResult = sdfReadPipeline.run();
+    PipelineResult.State readState =
+        readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
+
+    // call asynchronous deleteTopics first since cancelIfTimeouted is blocking.
+    tearDownTopic(options.getKafkaTopic());
+    cancelIfTimeouted(readResult, readState);
+  }
+
+  @Test
+  public void testKafkaIODoesNotErrorAtValidationWithBadBootstrapServer() throws IOException {
+    // expect an error during execution that the bootstrap server is bad, not during validation
+    // steps in
+    // KafakUnboundedSource.
+    thrown.expect(KafkaException.class);
+    // Use streaming pipeline to read Kafka records.
+    readPipeline.getOptions().as(Options.class).setStreaming(true);
+    TopicPartition invalidPartition = new TopicPartition(options.getKafkaTopic(), 1000);
+    readPipeline.apply(
+        "Read from unbounded Kafka",
+        KafkaIO.readBytes()
+            .withBootstrapServers("bootstrap.invalid-name.fake-region.bad-project:invalid-port")
+            .withConsumerConfigUpdates(ImmutableMap.of("auto.offset.reset", "earliest"))
+            .withTopicPartitions(ImmutableList.of(invalidPartition)));
+
+    PipelineResult readResult = readPipeline.run();
+    PipelineResult.State readState =
+        readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
+
+    // call asynchronous deleteTopics first since cancelIfTimeouted is blocking.
+    tearDownTopic(options.getKafkaTopic());
+    cancelIfTimeouted(readResult, readState);
+  }
+
   @Test
   public void testKafkaIOReadsAndWritesCorrectlyInStreaming() throws IOException {
     // Use batch pipeline to write records.