16
16
*/
17
17
package io .aiven .inkless .produce ;
18
18
19
- import org .apache .kafka .common .InvalidRecordException ;
20
19
import org .apache .kafka .common .TopicIdPartition ;
21
20
import org .apache .kafka .common .TopicPartition ;
22
21
import org .apache .kafka .common .compress .Compression ;
23
- import org .apache .kafka .common .errors .CorruptRecordException ;
24
- import org .apache .kafka .common .errors .OffsetOutOfRangeException ;
25
22
import org .apache .kafka .common .errors .RecordTooLargeException ;
26
23
import org .apache .kafka .common .protocol .Errors ;
27
- import org .apache .kafka .common .record .CompressionType ;
28
24
import org .apache .kafka .common .record .MemoryRecords ;
29
- import org .apache .kafka .common .record .MutableRecordBatch ;
30
- import org .apache .kafka .common .record .Record ;
31
25
import org .apache .kafka .common .record .RecordBatch ;
32
- import org .apache .kafka .common .record .RecordValidationStats ;
33
26
import org .apache .kafka .common .record .TimestampType ;
34
27
import org .apache .kafka .common .requests .ProduceResponse ;
35
28
import org .apache .kafka .common .utils .PrimitiveRef ;
38
31
import org .apache .kafka .server .common .RequestLocal ;
39
32
import org .apache .kafka .server .record .BrokerCompressionType ;
40
33
import org .apache .kafka .storage .internals .log .AppendOrigin ;
41
- import org .apache .kafka .storage .internals .log .LeaderHwChange ;
42
- import org .apache .kafka .storage .internals .log .LocalLog ;
43
34
import org .apache .kafka .storage .internals .log .LogAppendInfo ;
44
35
import org .apache .kafka .storage .internals .log .LogConfig ;
45
36
import org .apache .kafka .storage .internals .log .LogValidator ;
48
39
import org .slf4j .Logger ;
49
40
import org .slf4j .LoggerFactory ;
50
41
51
- import java .nio .ByteBuffer ;
52
- import java .util .Collections ;
53
42
import java .util .Map ;
54
- import java .util .Optional ;
55
- import java .util .stream .Collectors ;
56
- import java .util .stream .StreamSupport ;
57
43
58
44
import static org .apache .kafka .storage .internals .log .UnifiedLog .UNKNOWN_OFFSET ;
45
+ import static org .apache .kafka .storage .internals .log .UnifiedLog .analyzeAndValidateRecords ;
46
+ import static org .apache .kafka .storage .internals .log .UnifiedLog .trimInvalidBytes ;
59
47
60
- // TODO: This method is being migrated to Java and this is a placeholder for when it becomes available
61
- // on UnifiedLog.java from apache/kafka#19030
48
+ // Stub to keep code that may eventually be moved to upstream UnifiedLog
62
49
class UnifiedLog {
63
50
private static final Logger LOGGER = LoggerFactory .getLogger (UnifiedLog .class );
64
51
@@ -67,169 +54,6 @@ class UnifiedLog {
67
54
// Using 0 as for inkless the leader epoch is not used
68
55
public static final int LEADER_EPOCH = LeaderAndIsr .INITIAL_LEADER_EPOCH ;
69
56
70
- /**
71
- * Validate the following:
72
- * <ol>
73
- * <li> each message matches its CRC
74
- * <li> each message size is valid (if ignoreRecordSize is false)
75
- * <li> that the sequence numbers of the incoming record batches are consistent with the existing state and with each other
76
- * <li> that the offsets are monotonically increasing (if requireOffsetsMonotonic is true)
77
- * </ol>
78
- * <p>
79
- * Also compute the following quantities:
80
- * <ol>
81
- * <li> First offset in the message set
82
- * <li> Last offset in the message set
83
- * <li> Number of messages
84
- * <li> Number of valid bytes
85
- * <li> Whether the offsets are monotonically increasing
86
- * <li> Whether any compression codec is used (if many are used, then the last one is given)
87
- * </ol>
88
- */
89
- static LogAppendInfo analyzeAndValidateRecords (TopicPartition topicPartition ,
90
- LogConfig config ,
91
- MemoryRecords records ,
92
- long logStartOffset ,
93
- AppendOrigin origin ,
94
- boolean ignoreRecordSize ,
95
- boolean requireOffsetsMonotonic ,
96
- int leaderEpoch ,
97
- BrokerTopicStats brokerTopicStats ) {
98
- int validBytesCount = 0 ;
99
- long firstOffset = LocalLog .UNKNOWN_OFFSET ;
100
- long lastOffset = -1L ;
101
- int lastLeaderEpoch = RecordBatch .NO_PARTITION_LEADER_EPOCH ;
102
- CompressionType sourceCompression = CompressionType .NONE ;
103
- boolean monotonic = true ;
104
- long maxTimestamp = RecordBatch .NO_TIMESTAMP ;
105
- boolean readFirstMessage = false ;
106
- long lastOffsetOfFirstBatch = -1L ;
107
- boolean skipRemainingBatches = false ;
108
-
109
- for (MutableRecordBatch batch : records .batches ()) {
110
- // we only validate V2 and higher to avoid potential compatibility issues with older clients
111
- if (batch .magic () >= RecordBatch .MAGIC_VALUE_V2 && batch .baseOffset () != 0 ) {
112
- throw new InvalidRecordException ("The baseOffset of the record batch in the append to " + topicPartition + " should " +
113
- "be 0, but it is " + batch .baseOffset ());
114
- }
115
-
116
- /* During replication of uncommitted data it is possible for the remote replica to send record batches after it lost
117
- * leadership. This can happen if sending FETCH responses is slow. There is a race between sending the FETCH
118
- * response and the replica truncating and appending to the log. The replicating replica resolves this issue by only
119
- * persisting up to the current leader epoch used in the fetch request. See KAFKA-18723 for more details.
120
- */
121
- skipRemainingBatches = skipRemainingBatches || hasHigherPartitionLeaderEpoch (batch , origin , leaderEpoch );
122
- if (skipRemainingBatches ) {
123
- LOGGER .info ("Skipping batch {} from an origin of {} because its partition leader epoch {} is higher than the replica's current leader epoch {}" ,
124
- batch , origin , batch .partitionLeaderEpoch (), leaderEpoch );
125
- } else {
126
- // update the first offset if on the first message. For magic versions older than 2, we use the last offset
127
- // to avoid the need to decompress the data (the last offset can be obtained directly from the wrapper message).
128
- // For magic version 2, we can get the first offset directly from the batch header.
129
- // When appending to the leader, we will update LogAppendInfo.baseOffset with the correct value. In the follower
130
- // case, validation will be more lenient.
131
- // Also indicate whether we have the accurate first offset or not
132
- if (!readFirstMessage ) {
133
- if (batch .magic () >= RecordBatch .MAGIC_VALUE_V2 ) {
134
- firstOffset = batch .baseOffset ();
135
- }
136
- lastOffsetOfFirstBatch = batch .lastOffset ();
137
- readFirstMessage = true ;
138
- }
139
-
140
- // check that offsets are monotonically increasing
141
- if (lastOffset >= batch .lastOffset ()) {
142
- monotonic = false ;
143
- }
144
-
145
- // update the last offset seen
146
- lastOffset = batch .lastOffset ();
147
- lastLeaderEpoch = batch .partitionLeaderEpoch ();
148
-
149
- // Check if the message sizes are valid.
150
- int batchSize = batch .sizeInBytes ();
151
- if (!ignoreRecordSize && batchSize > config .maxMessageSize ()) {
152
- brokerTopicStats .topicStats (topicPartition .topic ()).bytesRejectedRate ().mark (records .sizeInBytes ());
153
- brokerTopicStats .allTopicsStats ().bytesRejectedRate ().mark (records .sizeInBytes ());
154
- throw new RecordTooLargeException ("The record batch size in the append to " + topicPartition + " is " + batchSize + " bytes " +
155
- "which exceeds the maximum configured value of " + config .maxMessageSize () + ")." );
156
- }
157
-
158
- // check the validity of the message by checking CRC
159
- if (!batch .isValid ()) {
160
- brokerTopicStats .allTopicsStats ().invalidMessageCrcRecordsPerSec ().mark ();
161
- throw new CorruptRecordException ("Record is corrupt (stored crc = " + batch .checksum () + ") in topic partition " + topicPartition + "." );
162
- }
163
-
164
- if (batch .maxTimestamp () > maxTimestamp ) {
165
- maxTimestamp = batch .maxTimestamp ();
166
- }
167
-
168
- validBytesCount += batchSize ;
169
-
170
- CompressionType batchCompression = CompressionType .forId (batch .compressionType ().id );
171
- // sourceCompression is only used on the leader path, which only contains one batch if version is v2 or messages are compressed
172
- if (batchCompression != CompressionType .NONE ) {
173
- sourceCompression = batchCompression ;
174
- }
175
- }
176
-
177
- if (requireOffsetsMonotonic && !monotonic ) {
178
- throw new OffsetOutOfRangeException ("Out of order offsets found in append to " + topicPartition + ": " +
179
- StreamSupport .stream (records .records ().spliterator (), false )
180
- .map (Record ::offset )
181
- .map (String ::valueOf )
182
- .collect (Collectors .joining ("," )));
183
- }
184
- }
185
- Optional <Integer > lastLeaderEpochOpt = (lastLeaderEpoch != RecordBatch .NO_PARTITION_LEADER_EPOCH )
186
- ? Optional .of (lastLeaderEpoch )
187
- : Optional .empty ();
188
-
189
- return new LogAppendInfo (firstOffset , lastOffset , lastLeaderEpochOpt , maxTimestamp ,
190
- RecordBatch .NO_TIMESTAMP , logStartOffset , RecordValidationStats .EMPTY , sourceCompression ,
191
- validBytesCount , lastOffsetOfFirstBatch , Collections .emptyList (), LeaderHwChange .NONE );
192
- }
193
-
194
- /**
195
- * Return true if the record batch has a higher leader epoch than the specified leader epoch
196
- *
197
- * @param batch the batch to validate
198
- * @param origin the reason for appending the record batch
199
- * @param leaderEpoch the epoch to compare
200
- * @return true if the append reason is replication and the batch's partition leader epoch is
201
- * greater than the specified leaderEpoch, otherwise false
202
- */
203
- private static boolean hasHigherPartitionLeaderEpoch (RecordBatch batch , AppendOrigin origin , int leaderEpoch ) {
204
- return origin == AppendOrigin .REPLICATION
205
- && batch .partitionLeaderEpoch () != RecordBatch .NO_PARTITION_LEADER_EPOCH
206
- && batch .partitionLeaderEpoch () > leaderEpoch ;
207
- }
208
-
209
- /**
210
- * Trim any invalid bytes from the end of this message set (if there are any)
211
- *
212
- * @param records The records to trim
213
- * @param info The general information of the message set
214
- * @return A trimmed message set. This may be the same as what was passed in, or it may not.
215
- */
216
- static MemoryRecords trimInvalidBytes (TopicPartition topicPartition , MemoryRecords records , LogAppendInfo info ) {
217
- int validBytes = info .validBytes ();
218
- if (validBytes < 0 ) {
219
- throw new CorruptRecordException ("Cannot append record batch with illegal length " + validBytes + " to " +
220
- "log for " + topicPartition + ". A possible cause is a corrupted produce request." );
221
- }
222
- if (validBytes == records .sizeInBytes ()) {
223
- return records ;
224
- } else {
225
- // trim invalid bytes
226
- ByteBuffer validByteBuffer = records .buffer ().duplicate ();
227
- validByteBuffer .limit (validBytes );
228
- return MemoryRecords .readableRecords (validByteBuffer );
229
- }
230
- }
231
-
232
-
233
57
// Similar to UnifiedLog.append(...)
234
58
static LogAppendInfo validateAndAppendBatch (
235
59
final Time time ,
@@ -252,6 +76,7 @@ static LogAppendInfo validateAndAppendBatch(
252
76
false ,
253
77
true , // ensures that offsets across batches on the same partition grow monotonically
254
78
LEADER_EPOCH ,
79
+ LOGGER ,
255
80
brokerTopicStats );
256
81
257
82
if (appendInfo .validBytes () <= 0 ) {
0 commit comments