|
29 | 29 | import org.apache.kafka.connect.header.Headers; |
30 | 30 | import org.apache.kafka.connect.source.SourceRecord; |
31 | 31 | import org.apache.kafka.connect.source.SourceTask; |
32 | | -import org.apache.kafka.common.errors.OffsetOutOfRangeException; // Add this |
| 32 | +// import org.apache.kafka.common.errors.OffsetOutOfRangeException; |
33 | 33 |
|
34 | 34 | import org.slf4j.Logger; |
35 | 35 | import org.slf4j.LoggerFactory; |
36 | 36 |
|
37 | 37 | import java.time.Duration; |
38 | 38 | import java.util.ArrayList; |
| 39 | +import java.util.Collections; |
39 | 40 | import java.util.List; |
40 | 41 | import java.util.Map; |
41 | 42 | import java.util.Set; |
|
49 | 50 | public class MirrorSourceTask extends SourceTask { |
50 | 51 |
|
51 | 52 | private static final Logger log = LoggerFactory.getLogger(MirrorSourceTask.class); |
| 53 | + private final java.util.Map<TopicPartition, Long> lastExpectedOffsets = new java.util.HashMap<>(); |
52 | 54 |
|
53 | 55 | private KafkaConsumer<byte[], byte[]> consumer; |
54 | 56 | private String sourceClusterAlias; |
@@ -131,47 +133,63 @@ public String version() { |
131 | 133 |
|
132 | 134 | @Override |
133 | 135 | public List<SourceRecord> poll() { |
134 | | - if (!consumerAccess.tryAcquire()) { |
135 | | - return null; |
136 | | - } |
| 136 | + if (!consumerAccess.tryAcquire()) return null; |
| 137 | + |
137 | 138 | if (stopping) { |
138 | 139 | consumerAccess.release(); |
139 | 140 | return null; |
140 | 141 | } |
| 142 | + |
141 | 143 | try { |
142 | | - // REMOVED: validateSourceTopicState() from here to save network overhead |
143 | | - |
144 | 144 | ConsumerRecords<byte[], byte[]> records = consumer.poll(pollTimeout); |
| 145 | + // Validate partitions AFTER poll |
| 146 | + for (TopicPartition tp : consumer.assignment()) { |
| 147 | + long nextOffset = consumer.position(tp); |
| 148 | + Map<TopicPartition, Long> beginningOffsets = |
| 149 | + consumer.beginningOffsets(Collections.singleton(tp)); |
| 150 | + |
| 151 | + Map<TopicPartition, Long> endOffsets = |
| 152 | + consumer.endOffsets(Collections.singleton(tp)); |
| 153 | + |
| 154 | + long beginningOffset = beginningOffsets.get(tp); |
| 155 | + long endOffset = endOffsets.get(tp); |
| 156 | + |
| 157 | + verifyPartitionState( |
| 158 | + tp, |
| 159 | + nextOffset, |
| 160 | + beginningOffset, |
| 161 | + endOffset |
| 162 | + ); |
| 163 | + } |
145 | 164 |
|
146 | 165 | List<SourceRecord> sourceRecords = new ArrayList<>(records.count()); |
147 | 166 | for (ConsumerRecord<byte[], byte[]> record : records) { |
148 | | - SourceRecord converted = convertRecord(record); |
149 | | - sourceRecords.add(converted); |
150 | | - TopicPartition topicPartition = new TopicPartition(converted.topic(), converted.kafkaPartition()); |
151 | | - metrics.recordAge(topicPartition, System.currentTimeMillis() - record.timestamp()); |
152 | | - metrics.recordBytes(topicPartition, byteSize(record.value())); |
| 167 | + sourceRecords.add(convertRecord(record)); |
153 | 168 | } |
154 | | - if (sourceRecords.isEmpty()) { |
155 | | - return null; |
156 | | - } else { |
157 | | - return sourceRecords; |
158 | | - } |
159 | | - } catch (org.apache.kafka.common.errors.WakeupException e) { |
160 | | - return null; |
161 | | - } catch (OffsetOutOfRangeException e) { |
162 | | - // ================================================================= |
163 | | - // RECOVERY & FAIL-FAST ROUTER ON EXCEPTION |
164 | | - // ================================================================= |
165 | | - log.warn("Consumer offset out of bounds. Evaluating cluster state to differentiate truncation vs reset..."); |
166 | | - handleOffsetBreach(consumer.assignment()); |
167 | | - return null; |
168 | | - } catch (KafkaException e) { |
169 | | - throw e; |
| 169 | + return sourceRecords.isEmpty() ? null : sourceRecords; |
| 170 | + |
| 171 | + } catch (org.apache.kafka.common.errors.OffsetOutOfRangeException e) { |
| 172 | + log.error("Source log truncation detected", e); |
| 173 | + throw new org.apache.kafka.connect.errors.ConnectException( |
| 174 | + "Fail-Fast: Source log truncation detected.", |
| 175 | + e |
| 176 | + ); |
| 177 | + |
170 | 178 | } finally { |
171 | 179 | consumerAccess.release(); |
172 | 180 | } |
173 | 181 | } |
174 | 182 |
|
| 183 | + // // Helper to keep poll() clean |
| 184 | + // private List<SourceRecord> processRecords(ConsumerRecords<byte[], byte[]> records) { |
| 185 | + // List<SourceRecord> sourceRecords = new ArrayList<>(records.count()); |
| 186 | + // for (ConsumerRecord<byte[], byte[]> record : records) { |
| 187 | + // sourceRecords.add(convertRecord(record)); |
| 188 | + // } |
| 189 | + // return sourceRecords.isEmpty() ? null : sourceRecords; |
| 190 | + // } |
| 191 | + |
| 192 | + /* |
175 | 193 | private void handleOffsetBreach(Set<TopicPartition> breachedPartitions) { |
176 | 194 | if (breachedPartitions == null || breachedPartitions.isEmpty()) return; |
177 | 195 |
|
@@ -219,6 +237,7 @@ private void handleOffsetBreach(Set<TopicPartition> breachedPartitions) { |
219 | 237 | } |
220 | 238 | } |
221 | 239 | } |
| 240 | + */ |
222 | 241 |
|
223 | 242 | @Override |
224 | 243 | public void commitRecord(SourceRecord record, RecordMetadata metadata) { |
@@ -264,22 +283,16 @@ private Long loadOffset(TopicPartition topicPartition) { |
264 | 283 | return MirrorUtils.unwrapOffset(wrappedOffset); |
265 | 284 | } |
266 | 285 |
|
267 | | - // visible for testing |
268 | 286 | void initializeConsumer(Set<TopicPartition> taskTopicPartitions) { |
269 | 287 | Map<TopicPartition, Long> topicPartitionOffsets = loadOffsets(taskTopicPartitions); |
| 288 | + |
| 289 | + // Use standard assign, no listener here |
270 | 290 | consumer.assign(topicPartitionOffsets.keySet()); |
271 | | - log.info("Starting with {} previously uncommitted partitions.", topicPartitionOffsets.values().stream() |
272 | | - .filter(this::isUncommitted).count()); |
273 | | - |
| 291 | + |
274 | 292 | topicPartitionOffsets.forEach((topicPartition, offset) -> { |
275 | | - // Do not call seek on partitions that don't have an existing offset committed. |
276 | | - if (isUncommitted(offset)) { |
277 | | - log.trace("Skipping seeking offset for topicPartition: {}", topicPartition); |
278 | | - return; |
| 293 | + if (!isUncommitted(offset)) { |
| 294 | + consumer.seek(topicPartition, offset + 1L); |
279 | 295 | } |
280 | | - long nextOffsetToCommittedOffset = offset + 1L; |
281 | | - log.trace("Seeking to offset {} for topicPartition: {}", nextOffsetToCommittedOffset, topicPartition); |
282 | | - consumer.seek(topicPartition, nextOffsetToCommittedOffset); |
283 | 296 | }); |
284 | 297 | } |
285 | 298 |
|
@@ -319,4 +332,20 @@ private static int byteSize(byte[] bytes) { |
319 | 332 | private boolean isUncommitted(Long offset) { |
320 | 333 | return offset == null || offset < 0; |
321 | 334 | } |
| 335 | + |
| 336 | + private void verifyPartitionState(TopicPartition tp, long nextOffset, long beginningOffset, long endOffset) { |
| 337 | + // 1. True Log Truncation (Scenario 2: Data was chopped out from underneath MM2) |
| 338 | + if (nextOffset < beginningOffset) { |
| 339 | + log.error("CRITICAL: Source log truncation detected for {}! MM2 position is {}, but log starts at {}.", |
| 340 | + tp, nextOffset, beginningOffset); |
| 341 | + throw new org.apache.kafka.connect.errors.ConnectException("Fail-Fast: Hard log truncation detected."); |
| 342 | + } |
| 343 | + |
| 344 | + // 2. True Topic Reset/Purge (Scenario 3: Topic was wiped clean, log reset back to 0) |
| 345 | + if (beginningOffset == 0 && nextOffset > endOffset) { |
| 346 | + log.warn("Detected intentional source topic purge/reset for {}. Re-aligning consumer position to 0L.", tp); |
| 347 | + consumer.seekToBeginning(Collections.singleton(tp)); |
| 348 | + } |
| 349 | + } |
322 | 350 | } |
| 351 | + |
0 commit comments