scality
diff --git a/‎lib/auth/streamingV4/trailingChecksumTransform.js‎
Lines changed: 207 additions & 125 deletions b/‎lib/auth/streamingV4/trailingChecksumTransform.js‎
Lines changed: 207 additions & 125 deletions
@@ -5,6 +5,202 @@ const { maximumAllowedPartSize } = require('../../../constants');
 const incompleteBodyError = errorInstances.IncompleteBody.customizeDescription(
     'The request body terminated unexpectedly');
 
+const CR = '\r'.charCodeAt(0);
+const LF = '\n'.charCodeAt(0);
+
+const State = Object.freeze({
+    READ_CHUNK_LEN: 0,
+    READ_CHUNK_LEN_LF: 1,
+    READ_CHUNK_DATA: 2,
+    READ_CHUNK_DATA_CR: 3,
+    READ_CHUNK_DATA_LF: 4,
+    COMPLETED_NO_TRAILER: 5,
+    READ_TRAILER_CHECKSUM: 6,
+    READ_TRAILER_CHECKSUM_LF: 7,
+    COMPLETED_WITH_TRAILER: 8,
+});
+
+class Fsm {
+    constructor() {
+        this.state = State.READ_CHUNK_LEN;
+        this.buffer = Buffer.alloc(1024);
+        this.bufferOffset = 0;
+        this.chunkRemaining = 0;
+    }
+
+    /**
+     * Advance the FSM by one input buffer.
+     *
+     * Parses the AWS chunked-upload framing byte-by-byte, forwarding raw object
+     * data via `push` and signalling the trailer via `emit`.
+     *
+     * @param {Buffer} data - incoming bytes from the transport stream
+     * @param {function} push - bound Transform.push; called with each slice of
+     *   decoded object data
+     * @param {function} emit - bound Transform.emit; called as
+     *   emit('trailer', name, value) when the trailing checksum line is parsed
+     * @param {object} log - request logger
+     * @return {ArsenalError|null} an Arsenal error if the framing is invalid,
+     *   or null on success
+     */
+    step(data, push, emit, log) {
+        let idx = 0;
+
+        while (idx < data.byteLength) {
+            switch (this.state) {
+                case State.READ_CHUNK_LEN: {
+                    const byte = data[idx++];
+                    if (byte === CR) {
+                        this.state = State.READ_CHUNK_LEN_LF;
+                        continue;
+                    }
+
+                    // Accumulate all bytes. AWS accepts whitespace before and after the hex digits.
+                    // This is a safety-net against excessively long fields; the real digit-count
+                    // limit (> 9 trimmed chars) is enforced in READ_CHUNK_LEN_LF after trim().
+                    this.buffer[this.bufferOffset++] = byte;
+                    if (this.bufferOffset >= this.buffer.length) {
+                        log.error('chunk length field too large');
+                        return errors.InvalidArgument;
+                    }
+
+                    continue;
+                }
+                case State.READ_CHUNK_LEN_LF: {
+                    const byte = data[idx++];
+                    if (byte !== LF) {
+                        log.error('expected LF after chunk length CR', { byte });
+                        return errors.InvalidArgument;
+                    }
+
+                    const chunkLenStr = this.buffer.toString('ascii', 0, this.bufferOffset).trim();
+                    if (chunkLenStr.length === 0) {
+                        log.error('empty chunk length field');
+                        return errors.InvalidArgument;
+                    }
+                    // AWS does not do this check, it returns 500 if it is too large.
+                    if (chunkLenStr.length > 9) {
+                        log.error('chunk length field too large', { chunkLenStr });
+                        return errors.InvalidArgument;
+                    }
+
+                    // Check it is HEX.
+                    if (!/^[0-9a-fA-F]+$/.test(chunkLenStr)) {
+                        log.error('invalid chunk size', { chunkLenStr });
+                        return errors.InvalidArgument;
+                    }
+                    const chunkLen = parseInt(chunkLenStr, 16);
+
+                    if (chunkLen > maximumAllowedPartSize) {
+                        log.error('chunk size too big', { chunkLen });
+                        return errors.EntityTooLarge;
+                    }
+
+                    this.bufferOffset = 0;
+
+                    if (chunkLen === 0) {
+                        this.state = State.COMPLETED_NO_TRAILER;
+                        continue;
+                    }
+
+                    this.chunkRemaining = chunkLen;
+                    this.state = State.READ_CHUNK_DATA;
+
+                    continue;
+                }
+                case State.READ_CHUNK_DATA: {
+                    // subarray clamps to buffer bounds, so toPush may be shorter than chunkRemaining
+                    // when the chunk spans multiple _transform calls.
+                    const toPush = data.subarray(idx, idx + this.chunkRemaining);
+                    push(toPush);
+                    this.chunkRemaining -= toPush.byteLength;
+                    idx += toPush.byteLength;
+                    if (this.chunkRemaining === 0) {
+                        this.state = State.READ_CHUNK_DATA_CR;
+                    }
+                    continue;
+                }
+                case State.READ_CHUNK_DATA_CR: {
+                    const byte = data[idx++];
+                    if (byte !== CR) {
+                        log.error('expected CR after chunk data', { byte });
+                        return errors.InvalidArgument;
+                    }
+
+                    this.state = State.READ_CHUNK_DATA_LF;
+
+                    continue;
+                }
+                case State.READ_CHUNK_DATA_LF: {
+                    const byte = data[idx++];
+                    if (byte !== LF) {
+                        log.error('expected LF after chunk data CR', { byte });
+                        return errors.InvalidArgument;
+                    }
+
+                    // Reset buffer state before reading the next chunk header.
+                    this.bufferOffset = 0;
+                    this.chunkRemaining = 0;
+                    this.state = State.READ_CHUNK_LEN;
+
+                    continue;
+                }
+                case State.COMPLETED_NO_TRAILER:
+                    // A byte arrived after "0\r\n" — transition to trailer reading.
+                    // bufferOffset is reset here so the shared buffer is clean for the trailer.
+                    this.state = State.READ_TRAILER_CHECKSUM;
+                    this.bufferOffset = 0;
+                    continue;
+                case State.READ_TRAILER_CHECKSUM: {
+                    const byte = data[idx++];
+                    if (byte === CR) {
+                        this.state = State.READ_TRAILER_CHECKSUM_LF;
+                        continue;
+                    }
+
+                    // Accumulate all bytes, AWS accepts white spaces before and after the CRLF
+                    this.buffer[this.bufferOffset++] = byte;
+                    if (this.bufferOffset === this.buffer.length) {
+                        log.error('trailer field too large');
+                        return errors.MalformedTrailerError;
+                    }
+
+                    continue;
+                }
+                case State.READ_TRAILER_CHECKSUM_LF: {
+                    const byte = data[idx++];
+                    if (byte !== LF) {
+                        log.error('expected LF after trailer CR', { byte });
+                        return errors.InvalidArgument;
+                    }
+
+                    if (this.bufferOffset > 0) {
+                        const trailerLine = this.buffer.toString('ascii', 0, this.bufferOffset).trim();
+                        const colonIndex = trailerLine.indexOf(':');
+                        if (colonIndex > 0) {
+                            const trailerName = trailerLine.slice(0, colonIndex).trim();
+                            const trailerValue = trailerLine.slice(colonIndex + 1).trim();
+                            emit('trailer', trailerName, trailerValue);
+                        } else {
+                            log.error('incomplete trailer missing ":"', { trailerLine });
+                            return incompleteBodyError;
+                        }
+                    }
+
+                    this.state = State.COMPLETED_WITH_TRAILER;
+
+                    continue;
+                }
+                case State.COMPLETED_WITH_TRAILER:
+                    // We successfully parsed the trailing checksum, discard all extra data.
+                    return null;
+            }
+        }
+
+        return null;
+    }
+}
+
 /**
  * This class handles the chunked-upload body format used by
  * STREAMING-UNSIGNED-PAYLOAD-TRAILER requests. It strips the chunk-size
@@ -20,14 +216,8 @@ class TrailingChecksumTransform extends Transform {
     constructor(log) {
         super({});
         this.log = log;
-        this.chunkSizeBuffer = Buffer.alloc(0);
-        this.bytesToDiscard = 0; // when trailing \r\n are present, we discard them but they can be in different chunks
-        this.bytesToRead = 0; // when a chunk is advertised, the size is put here and we forward all bytes
-        this.streamClosed = false;
-        this.readingTrailer = false;
-        this.trailerBuffer = Buffer.alloc(0);
-        this.trailerName = null;
-        this.trailerValue = null;
+        this.log.addDefaultFields({ component: 'TrailingChecksumTransform' });
+        this.fsm = new Fsm();
     }
 
     /**
@@ -38,17 +228,14 @@ class TrailingChecksumTransform extends Transform {
      * @return {function} executes callback with err if applicable
      */
     _flush(callback) {
-        if (!this.streamClosed && this.readingTrailer && this.trailerBuffer.length === 0) {
-            // Nothing came after "0\r\n", don't fail.
-            // If the x-amz-trailer header was present then the trailer is required and ChecksumTransform will fail.
-            return callback();
-        } else if (!this.streamClosed && this.readingTrailer && this.trailerBuffer.length !== 0) {
-            this.log.error('stream ended without trailer "\r\n"');
-            return callback(incompleteBodyError);
-        } else if (!this.streamClosed && !this.readingTrailer) {
-            this.log.error('stream ended without closing chunked encoding');
+        // COMPLETED means we saw "0\r\n" but no trailer bytes after,
+        // ChecksumTransform will enforce the trailer if x-amz-trailer was present.
+        if (this.fsm.state !== State.COMPLETED_WITH_TRAILER && this.fsm.state !== State.COMPLETED_NO_TRAILER) {
+            this.log.error('stream ended without closing chunked encoding',
+                { state: this.fsm.state });
             return callback(incompleteBodyError);
         }
+
         return callback();
     }
 
@@ -61,115 +248,10 @@ class TrailingChecksumTransform extends Transform {
      * @return {function} executes callback with err if applicable
      */
     _transform(chunkInput, encoding, callback) {
-        let chunk = chunkInput;
-        while (chunk.byteLength > 0 && !this.streamClosed) {
-            if (this.bytesToDiscard > 0) {
-                const toDiscard = Math.min(this.bytesToDiscard, chunk.byteLength);
-                chunk = chunk.subarray(toDiscard);
-                this.bytesToDiscard -= toDiscard;
-                continue;
-            }
-            // forward up to bytesToRead bytes from the chunk, restart processing on leftover
-            if (this.bytesToRead > 0) {
-                const toRead = Math.min(this.bytesToRead, chunk.byteLength);
-                this.push(chunk.subarray(0, toRead));
-                chunk = chunk.subarray(toRead);
-                this.bytesToRead -= toRead;
-                if (this.bytesToRead === 0) {
-                    this.bytesToDiscard = 2;
-                }
-                continue;
-            }
-
-            // after the 0-size chunk, read the trailer line (e.g. "x-amz-checksum-crc32:YABb/g==")
-            if (this.readingTrailer) {
-                const combined = Buffer.concat([this.trailerBuffer, chunk]);
-                const lineBreakIndex = combined.indexOf('\r\n');
-                if (lineBreakIndex === -1) {
-                    if (combined.byteLength > 1024) {
-                        this.log.error('trailer line too long');
-                        return callback(errors.MalformedTrailerError);
-                    }
-                    // The trailer is not complete yet, continue.
-                    this.trailerBuffer = combined;
-                    return callback();
-                }
-                this.trailerBuffer = Buffer.alloc(0);
-                const fullTrailer = combined.subarray(0, lineBreakIndex);
-                if (fullTrailer.length === 0) {
-                    // The trailer is empty, stop reading.
-                    this.readingTrailer = false;
-                    this.streamClosed = true;
-                    return callback();
-                }
-                let trailerLine = fullTrailer.toString();
-                // Some clients terminate the trailer with \n\r\n instead of
-                // just \r\n, producing a trailing \n in the parsed line.
-                if (trailerLine.endsWith('\n')) {
-                    trailerLine = trailerLine.slice(0, -1);
-                }
-                const colonIndex = trailerLine.indexOf(':');
-                if (colonIndex > 0) {
-                    this.trailerName = trailerLine.slice(0, colonIndex).trim();
-                    this.trailerValue = trailerLine.slice(colonIndex + 1).trim();
-                    this.emit('trailer', this.trailerName, this.trailerValue);
-                } else {
-                    this.log.error('incomplete trailer missing ":"', { trailerLine });
-                    return callback(incompleteBodyError);
-                }
-                this.readingTrailer = false;
-                this.streamClosed = true;
-                // The trailer \r\n is the last bytes of the stream per the AWS
-                // chunked upload format, so any remaining bytes are discarded.
-                return callback();
-            }
-
-            // we are now looking for the chunk size field
-            // no need to look further than 10 bytes since the field cannot be bigger: the max
-            // chunk size is 5GB (see constants.maximumAllowedPartSize)
-            const lineBreakIndex = chunk.subarray(0, 10).indexOf('\r');
-            const bytesToKeep = lineBreakIndex === -1 ? chunk.byteLength : lineBreakIndex;
-            if (this.chunkSizeBuffer.byteLength + bytesToKeep > 10) {
-                this.log.error('chunk size field too big', {
-                    chunkSizeBuffer: this.chunkSizeBuffer.subarray(0, 11).toString('hex'),
-                    chunkSizeBufferLength: this.chunkSizeBuffer.length,
-                    truncatedChunk: chunk.subarray(0, 10).toString('hex'),
-                });
-                // if bigger, the chunk would be over 5 GB
-                // returning early to avoid a DoS by memory exhaustion
-                return callback(errors.InvalidArgument);
-            }
-            if (lineBreakIndex === -1) {
-                // no delimiter, we'll keep the chunk for later
-                this.chunkSizeBuffer = Buffer.concat([this.chunkSizeBuffer, chunk]);
-                return callback();
-            }
-
-            this.chunkSizeBuffer = Buffer.concat([this.chunkSizeBuffer, chunk.subarray(0, lineBreakIndex)]);
-            chunk = chunk.subarray(lineBreakIndex);
-
-            // chunk-size is sent in hex
-            const chunkSizeStr = this.chunkSizeBuffer.toString();
-            const dataSize = parseInt(chunkSizeStr, 16);
-            // we check that the parsing is correct (parseInt returns a partial parse when it fails)
-            if (isNaN(dataSize) || dataSize.toString(16) !== chunkSizeStr.toLowerCase()) {
-                this.log.error('invalid chunk size', { chunkSizeBuffer: chunkSizeStr });
-                return callback(errors.InvalidArgument);
-            }
-            this.chunkSizeBuffer = Buffer.alloc(0);
-            if (dataSize === 0) {
-                // last chunk, no more data to read; enter trailer-reading mode
-                // bytesToDiscard = 2 below will consume the \r\n after "0"
-                this.readingTrailer = true;
-            }
-            if (dataSize > maximumAllowedPartSize) {
-                this.log.error('chunk size too big', { dataSize });
-                return callback(errors.EntityTooLarge);
-            }
-            this.bytesToRead = dataSize;
-            this.bytesToDiscard = 2;
+        const err = this.fsm.step(chunkInput, this.push.bind(this), this.emit.bind(this), this.log);
+        if (err) {
+            return callback(err);
         }
-
         return callback();
     }
 }