Skip to content

Commit 1167cf2

Browse files
committed
CLDSRV-863: Update TrailingChecksumTransform to use an FSM
1 parent 4ace027 commit 1167cf2

File tree

2 files changed

+463
-286
lines changed

2 files changed

+463
-286
lines changed

lib/auth/streamingV4/trailingChecksumTransform.js

Lines changed: 207 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,202 @@ const { maximumAllowedPartSize } = require('../../../constants');
55
const incompleteBodyError = errorInstances.IncompleteBody.customizeDescription(
66
'The request body terminated unexpectedly');
77

8+
const CR = '\r'.charCodeAt(0);
9+
const LF = '\n'.charCodeAt(0);
10+
11+
const State = Object.freeze({
12+
READ_CHUNK_LEN: 0,
13+
READ_CHUNK_LEN_LF: 1,
14+
READ_CHUNK_DATA: 2,
15+
READ_CHUNK_DATA_CR: 3,
16+
READ_CHUNK_DATA_LF: 4,
17+
COMPLETED_NO_TRAILER: 5,
18+
READ_TRAILER_CHECKSUM: 6,
19+
READ_TRAILER_CHECKSUM_LF: 7,
20+
COMPLETED_WITH_TRAILER: 8,
21+
});
22+
23+
class Fsm {
24+
constructor() {
25+
this.state = State.READ_CHUNK_LEN;
26+
this.buffer = Buffer.alloc(1024);
27+
this.bufferOffset = 0;
28+
this.chunkRemaining = 0;
29+
}
30+
31+
/**
32+
* Advance the FSM by one input buffer.
33+
*
34+
* Parses the AWS chunked-upload framing byte-by-byte, forwarding raw object
35+
* data via `push` and signalling the trailer via `emit`.
36+
*
37+
* @param {Buffer} data - incoming bytes from the transport stream
38+
* @param {function} push - bound Transform.push; called with each slice of
39+
* decoded object data
40+
* @param {function} emit - bound Transform.emit; called as
41+
* emit('trailer', name, value) when the trailing checksum line is parsed
42+
* @param {object} log - request logger
43+
* @return {ArsenalError|null} an Arsenal error if the framing is invalid,
44+
* or null on success
45+
*/
46+
step(data, push, emit, log) {
47+
let idx = 0;
48+
49+
while (idx < data.byteLength) {
50+
switch (this.state) {
51+
case State.READ_CHUNK_LEN: {
52+
const byte = data[idx++];
53+
if (byte === CR) {
54+
this.state = State.READ_CHUNK_LEN_LF;
55+
continue;
56+
}
57+
58+
// Accumulate all bytes. AWS accepts whitespace before and after the hex digits.
59+
// This is a safety-net against excessively long fields; the real digit-count
60+
// limit (> 9 trimmed chars) is enforced in READ_CHUNK_LEN_LF after trim().
61+
this.buffer[this.bufferOffset++] = byte;
62+
if (this.bufferOffset >= this.buffer.length) {
63+
log.error('chunk length field too large');
64+
return errors.InvalidArgument;
65+
}
66+
67+
continue;
68+
}
69+
case State.READ_CHUNK_LEN_LF: {
70+
const byte = data[idx++];
71+
if (byte !== LF) {
72+
log.error('expected LF after chunk length CR', { byte });
73+
return errors.InvalidArgument;
74+
}
75+
76+
const chunkLenStr = this.buffer.toString('ascii', 0, this.bufferOffset).trim();
77+
if (chunkLenStr.length === 0) {
78+
log.error('empty chunk length field');
79+
return errors.InvalidArgument;
80+
}
81+
// AWS does not do this check, it returns 500 if it is too large.
82+
if (chunkLenStr.length > 9) {
83+
log.error('chunk length field too large', { chunkLenStr });
84+
return errors.InvalidArgument;
85+
}
86+
87+
// Check it is HEX.
88+
if (!/^[0-9a-fA-F]+$/.test(chunkLenStr)) {
89+
log.error('invalid chunk size', { chunkLenStr });
90+
return errors.InvalidArgument;
91+
}
92+
const chunkLen = parseInt(chunkLenStr, 16);
93+
94+
if (chunkLen > maximumAllowedPartSize) {
95+
log.error('chunk size too big', { chunkLen });
96+
return errors.EntityTooLarge;
97+
}
98+
99+
this.bufferOffset = 0;
100+
101+
if (chunkLen === 0) {
102+
this.state = State.COMPLETED_NO_TRAILER;
103+
continue;
104+
}
105+
106+
this.chunkRemaining = chunkLen;
107+
this.state = State.READ_CHUNK_DATA;
108+
109+
continue;
110+
}
111+
case State.READ_CHUNK_DATA: {
112+
// subarray clamps to buffer bounds, so toPush may be shorter than chunkRemaining
113+
// when the chunk spans multiple _transform calls.
114+
const toPush = data.subarray(idx, idx + this.chunkRemaining);
115+
push(toPush);
116+
this.chunkRemaining -= toPush.byteLength;
117+
idx += toPush.byteLength;
118+
if (this.chunkRemaining === 0) {
119+
this.state = State.READ_CHUNK_DATA_CR;
120+
}
121+
continue;
122+
}
123+
case State.READ_CHUNK_DATA_CR: {
124+
const byte = data[idx++];
125+
if (byte !== CR) {
126+
log.error('expected CR after chunk data', { byte });
127+
return errors.InvalidArgument;
128+
}
129+
130+
this.state = State.READ_CHUNK_DATA_LF;
131+
132+
continue;
133+
}
134+
case State.READ_CHUNK_DATA_LF: {
135+
const byte = data[idx++];
136+
if (byte !== LF) {
137+
log.error('expected LF after chunk data CR', { byte });
138+
return errors.InvalidArgument;
139+
}
140+
141+
// Reset buffer state before reading the next chunk header.
142+
this.bufferOffset = 0;
143+
this.chunkRemaining = 0;
144+
this.state = State.READ_CHUNK_LEN;
145+
146+
continue;
147+
}
148+
case State.COMPLETED_NO_TRAILER:
149+
// A byte arrived after "0\r\n" — transition to trailer reading.
150+
// bufferOffset is reset here so the shared buffer is clean for the trailer.
151+
this.state = State.READ_TRAILER_CHECKSUM;
152+
this.bufferOffset = 0;
153+
continue;
154+
case State.READ_TRAILER_CHECKSUM: {
155+
const byte = data[idx++];
156+
if (byte === CR) {
157+
this.state = State.READ_TRAILER_CHECKSUM_LF;
158+
continue;
159+
}
160+
161+
// Accumulate all bytes, AWS accepts white spaces before and after the CRLF
162+
this.buffer[this.bufferOffset++] = byte;
163+
if (this.bufferOffset === this.buffer.length) {
164+
log.error('trailer field too large');
165+
return errors.MalformedTrailerError;
166+
}
167+
168+
continue;
169+
}
170+
case State.READ_TRAILER_CHECKSUM_LF: {
171+
const byte = data[idx++];
172+
if (byte !== LF) {
173+
log.error('expected LF after trailer CR', { byte });
174+
return errors.InvalidArgument;
175+
}
176+
177+
if (this.bufferOffset > 0) {
178+
const trailerLine = this.buffer.toString('ascii', 0, this.bufferOffset).trim();
179+
const colonIndex = trailerLine.indexOf(':');
180+
if (colonIndex > 0) {
181+
const trailerName = trailerLine.slice(0, colonIndex).trim();
182+
const trailerValue = trailerLine.slice(colonIndex + 1).trim();
183+
emit('trailer', trailerName, trailerValue);
184+
} else {
185+
log.error('incomplete trailer missing ":"', { trailerLine });
186+
return incompleteBodyError;
187+
}
188+
}
189+
190+
this.state = State.COMPLETED_WITH_TRAILER;
191+
192+
continue;
193+
}
194+
case State.COMPLETED_WITH_TRAILER:
195+
// We successfully parsed the trailing checksum, discard all extra data.
196+
return null;
197+
}
198+
}
199+
200+
return null;
201+
}
202+
}
203+
8204
/**
9205
* This class handles the chunked-upload body format used by
10206
* STREAMING-UNSIGNED-PAYLOAD-TRAILER requests. It strips the chunk-size
@@ -20,14 +216,8 @@ class TrailingChecksumTransform extends Transform {
20216
constructor(log) {
21217
super({});
22218
this.log = log;
23-
this.chunkSizeBuffer = Buffer.alloc(0);
24-
this.bytesToDiscard = 0; // when trailing \r\n are present, we discard them but they can be in different chunks
25-
this.bytesToRead = 0; // when a chunk is advertised, the size is put here and we forward all bytes
26-
this.streamClosed = false;
27-
this.readingTrailer = false;
28-
this.trailerBuffer = Buffer.alloc(0);
29-
this.trailerName = null;
30-
this.trailerValue = null;
219+
this.log.addDefaultFields({ component: 'TrailingChecksumTransform' });
220+
this.fsm = new Fsm();
31221
}
32222

33223
/**
@@ -38,17 +228,14 @@ class TrailingChecksumTransform extends Transform {
38228
* @return {function} executes callback with err if applicable
39229
*/
40230
_flush(callback) {
41-
if (!this.streamClosed && this.readingTrailer && this.trailerBuffer.length === 0) {
42-
// Nothing came after "0\r\n", don't fail.
43-
// If the x-amz-trailer header was present then the trailer is required and ChecksumTransform will fail.
44-
return callback();
45-
} else if (!this.streamClosed && this.readingTrailer && this.trailerBuffer.length !== 0) {
46-
this.log.error('stream ended without trailer "\r\n"');
47-
return callback(incompleteBodyError);
48-
} else if (!this.streamClosed && !this.readingTrailer) {
49-
this.log.error('stream ended without closing chunked encoding');
231+
// COMPLETED means we saw "0\r\n" but no trailer bytes after,
232+
// ChecksumTransform will enforce the trailer if x-amz-trailer was present.
233+
if (this.fsm.state !== State.COMPLETED_WITH_TRAILER && this.fsm.state !== State.COMPLETED_NO_TRAILER) {
234+
this.log.error('stream ended without closing chunked encoding',
235+
{ state: this.fsm.state });
50236
return callback(incompleteBodyError);
51237
}
238+
52239
return callback();
53240
}
54241

@@ -61,115 +248,10 @@ class TrailingChecksumTransform extends Transform {
61248
* @return {function} executes callback with err if applicable
62249
*/
63250
_transform(chunkInput, encoding, callback) {
64-
let chunk = chunkInput;
65-
while (chunk.byteLength > 0 && !this.streamClosed) {
66-
if (this.bytesToDiscard > 0) {
67-
const toDiscard = Math.min(this.bytesToDiscard, chunk.byteLength);
68-
chunk = chunk.subarray(toDiscard);
69-
this.bytesToDiscard -= toDiscard;
70-
continue;
71-
}
72-
// forward up to bytesToRead bytes from the chunk, restart processing on leftover
73-
if (this.bytesToRead > 0) {
74-
const toRead = Math.min(this.bytesToRead, chunk.byteLength);
75-
this.push(chunk.subarray(0, toRead));
76-
chunk = chunk.subarray(toRead);
77-
this.bytesToRead -= toRead;
78-
if (this.bytesToRead === 0) {
79-
this.bytesToDiscard = 2;
80-
}
81-
continue;
82-
}
83-
84-
// after the 0-size chunk, read the trailer line (e.g. "x-amz-checksum-crc32:YABb/g==")
85-
if (this.readingTrailer) {
86-
const combined = Buffer.concat([this.trailerBuffer, chunk]);
87-
const lineBreakIndex = combined.indexOf('\r\n');
88-
if (lineBreakIndex === -1) {
89-
if (combined.byteLength > 1024) {
90-
this.log.error('trailer line too long');
91-
return callback(errors.MalformedTrailerError);
92-
}
93-
// The trailer is not complete yet, continue.
94-
this.trailerBuffer = combined;
95-
return callback();
96-
}
97-
this.trailerBuffer = Buffer.alloc(0);
98-
const fullTrailer = combined.subarray(0, lineBreakIndex);
99-
if (fullTrailer.length === 0) {
100-
// The trailer is empty, stop reading.
101-
this.readingTrailer = false;
102-
this.streamClosed = true;
103-
return callback();
104-
}
105-
let trailerLine = fullTrailer.toString();
106-
// Some clients terminate the trailer with \n\r\n instead of
107-
// just \r\n, producing a trailing \n in the parsed line.
108-
if (trailerLine.endsWith('\n')) {
109-
trailerLine = trailerLine.slice(0, -1);
110-
}
111-
const colonIndex = trailerLine.indexOf(':');
112-
if (colonIndex > 0) {
113-
this.trailerName = trailerLine.slice(0, colonIndex).trim();
114-
this.trailerValue = trailerLine.slice(colonIndex + 1).trim();
115-
this.emit('trailer', this.trailerName, this.trailerValue);
116-
} else {
117-
this.log.error('incomplete trailer missing ":"', { trailerLine });
118-
return callback(incompleteBodyError);
119-
}
120-
this.readingTrailer = false;
121-
this.streamClosed = true;
122-
// The trailer \r\n is the last bytes of the stream per the AWS
123-
// chunked upload format, so any remaining bytes are discarded.
124-
return callback();
125-
}
126-
127-
// we are now looking for the chunk size field
128-
// no need to look further than 10 bytes since the field cannot be bigger: the max
129-
// chunk size is 5GB (see constants.maximumAllowedPartSize)
130-
const lineBreakIndex = chunk.subarray(0, 10).indexOf('\r');
131-
const bytesToKeep = lineBreakIndex === -1 ? chunk.byteLength : lineBreakIndex;
132-
if (this.chunkSizeBuffer.byteLength + bytesToKeep > 10) {
133-
this.log.error('chunk size field too big', {
134-
chunkSizeBuffer: this.chunkSizeBuffer.subarray(0, 11).toString('hex'),
135-
chunkSizeBufferLength: this.chunkSizeBuffer.length,
136-
truncatedChunk: chunk.subarray(0, 10).toString('hex'),
137-
});
138-
// if bigger, the chunk would be over 5 GB
139-
// returning early to avoid a DoS by memory exhaustion
140-
return callback(errors.InvalidArgument);
141-
}
142-
if (lineBreakIndex === -1) {
143-
// no delimiter, we'll keep the chunk for later
144-
this.chunkSizeBuffer = Buffer.concat([this.chunkSizeBuffer, chunk]);
145-
return callback();
146-
}
147-
148-
this.chunkSizeBuffer = Buffer.concat([this.chunkSizeBuffer, chunk.subarray(0, lineBreakIndex)]);
149-
chunk = chunk.subarray(lineBreakIndex);
150-
151-
// chunk-size is sent in hex
152-
const chunkSizeStr = this.chunkSizeBuffer.toString();
153-
const dataSize = parseInt(chunkSizeStr, 16);
154-
// we check that the parsing is correct (parseInt returns a partial parse when it fails)
155-
if (isNaN(dataSize) || dataSize.toString(16) !== chunkSizeStr.toLowerCase()) {
156-
this.log.error('invalid chunk size', { chunkSizeBuffer: chunkSizeStr });
157-
return callback(errors.InvalidArgument);
158-
}
159-
this.chunkSizeBuffer = Buffer.alloc(0);
160-
if (dataSize === 0) {
161-
// last chunk, no more data to read; enter trailer-reading mode
162-
// bytesToDiscard = 2 below will consume the \r\n after "0"
163-
this.readingTrailer = true;
164-
}
165-
if (dataSize > maximumAllowedPartSize) {
166-
this.log.error('chunk size too big', { dataSize });
167-
return callback(errors.EntityTooLarge);
168-
}
169-
this.bytesToRead = dataSize;
170-
this.bytesToDiscard = 2;
251+
const err = this.fsm.step(chunkInput, this.push.bind(this), this.emit.bind(this), this.log);
252+
if (err) {
253+
return callback(err);
171254
}
172-
173255
return callback();
174256
}
175257
}

0 commit comments

Comments
 (0)