@@ -5,6 +5,202 @@ const { maximumAllowedPartSize } = require('../../../constants');
55const incompleteBodyError = errorInstances . IncompleteBody . customizeDescription (
66 'The request body terminated unexpectedly' ) ;
77
8+ const CR = '\r' . charCodeAt ( 0 ) ;
9+ const LF = '\n' . charCodeAt ( 0 ) ;
10+
11+ const State = Object . freeze ( {
12+ READ_CHUNK_LEN : 0 ,
13+ READ_CHUNK_LEN_LF : 1 ,
14+ READ_CHUNK_DATA : 2 ,
15+ READ_CHUNK_DATA_CR : 3 ,
16+ READ_CHUNK_DATA_LF : 4 ,
17+ COMPLETED_NO_TRAILER : 5 ,
18+ READ_TRAILER_CHECKSUM : 6 ,
19+ READ_TRAILER_CHECKSUM_LF : 7 ,
20+ COMPLETED_WITH_TRAILER : 8 ,
21+ } ) ;
22+
23+ class Fsm {
24+ constructor ( ) {
25+ this . state = State . READ_CHUNK_LEN ;
26+ this . buffer = Buffer . alloc ( 1024 ) ;
27+ this . bufferOffset = 0 ;
28+ this . chunkRemaining = 0 ;
29+ }
30+
31+ /**
32+ * Advance the FSM by one input buffer.
33+ *
34+ * Parses the AWS chunked-upload framing byte-by-byte, forwarding raw object
35+ * data via `push` and signalling the trailer via `emit`.
36+ *
37+ * @param {Buffer } data - incoming bytes from the transport stream
38+ * @param {function } push - bound Transform.push; called with each slice of
39+ * decoded object data
40+ * @param {function } emit - bound Transform.emit; called as
41+ * emit('trailer', name, value) when the trailing checksum line is parsed
42+ * @param {object } log - request logger
43+ * @return {ArsenalError|null } an Arsenal error if the framing is invalid,
44+ * or null on success
45+ */
46+ step ( data , push , emit , log ) {
47+ let idx = 0 ;
48+
49+ while ( idx < data . byteLength ) {
50+ switch ( this . state ) {
51+ case State . READ_CHUNK_LEN : {
52+ const byte = data [ idx ++ ] ;
53+ if ( byte === CR ) {
54+ this . state = State . READ_CHUNK_LEN_LF ;
55+ continue ;
56+ }
57+
58+ // Accumulate all bytes. AWS accepts whitespace before and after the hex digits.
59+ // This is a safety-net against excessively long fields; the real digit-count
60+ // limit (> 9 trimmed chars) is enforced in READ_CHUNK_LEN_LF after trim().
61+ this . buffer [ this . bufferOffset ++ ] = byte ;
62+ if ( this . bufferOffset >= this . buffer . length ) {
63+ log . error ( 'chunk length field too large' ) ;
64+ return errors . InvalidArgument ;
65+ }
66+
67+ continue ;
68+ }
69+ case State . READ_CHUNK_LEN_LF : {
70+ const byte = data [ idx ++ ] ;
71+ if ( byte !== LF ) {
72+ log . error ( 'expected LF after chunk length CR' , { byte } ) ;
73+ return errors . InvalidArgument ;
74+ }
75+
76+ const chunkLenStr = this . buffer . toString ( 'ascii' , 0 , this . bufferOffset ) . trim ( ) ;
77+ if ( chunkLenStr . length === 0 ) {
78+ log . error ( 'empty chunk length field' ) ;
79+ return errors . InvalidArgument ;
80+ }
81+ // AWS does not do this check, it returns 500 if it is too large.
82+ if ( chunkLenStr . length > 9 ) {
83+ log . error ( 'chunk length field too large' , { chunkLenStr } ) ;
84+ return errors . InvalidArgument ;
85+ }
86+
87+ // Check it is HEX.
88+ if ( ! / ^ [ 0 - 9 a - f A - F ] + $ / . test ( chunkLenStr ) ) {
89+ log . error ( 'invalid chunk size' , { chunkLenStr } ) ;
90+ return errors . InvalidArgument ;
91+ }
92+ const chunkLen = parseInt ( chunkLenStr , 16 ) ;
93+
94+ if ( chunkLen > maximumAllowedPartSize ) {
95+ log . error ( 'chunk size too big' , { chunkLen } ) ;
96+ return errors . EntityTooLarge ;
97+ }
98+
99+ this . bufferOffset = 0 ;
100+
101+ if ( chunkLen === 0 ) {
102+ this . state = State . COMPLETED_NO_TRAILER ;
103+ continue ;
104+ }
105+
106+ this . chunkRemaining = chunkLen ;
107+ this . state = State . READ_CHUNK_DATA ;
108+
109+ continue ;
110+ }
111+ case State . READ_CHUNK_DATA : {
112+ // subarray clamps to buffer bounds, so toPush may be shorter than chunkRemaining
113+ // when the chunk spans multiple _transform calls.
114+ const toPush = data . subarray ( idx , idx + this . chunkRemaining ) ;
115+ push ( toPush ) ;
116+ this . chunkRemaining -= toPush . byteLength ;
117+ idx += toPush . byteLength ;
118+ if ( this . chunkRemaining === 0 ) {
119+ this . state = State . READ_CHUNK_DATA_CR ;
120+ }
121+ continue ;
122+ }
123+ case State . READ_CHUNK_DATA_CR : {
124+ const byte = data [ idx ++ ] ;
125+ if ( byte !== CR ) {
126+ log . error ( 'expected CR after chunk data' , { byte } ) ;
127+ return errors . InvalidArgument ;
128+ }
129+
130+ this . state = State . READ_CHUNK_DATA_LF ;
131+
132+ continue ;
133+ }
134+ case State . READ_CHUNK_DATA_LF : {
135+ const byte = data [ idx ++ ] ;
136+ if ( byte !== LF ) {
137+ log . error ( 'expected LF after chunk data CR' , { byte } ) ;
138+ return errors . InvalidArgument ;
139+ }
140+
141+ // Reset buffer state before reading the next chunk header.
142+ this . bufferOffset = 0 ;
143+ this . chunkRemaining = 0 ;
144+ this . state = State . READ_CHUNK_LEN ;
145+
146+ continue ;
147+ }
148+ case State . COMPLETED_NO_TRAILER :
149+ // A byte arrived after "0\r\n" — transition to trailer reading.
150+ // bufferOffset is reset here so the shared buffer is clean for the trailer.
151+ this . state = State . READ_TRAILER_CHECKSUM ;
152+ this . bufferOffset = 0 ;
153+ continue ;
154+ case State . READ_TRAILER_CHECKSUM : {
155+ const byte = data [ idx ++ ] ;
156+ if ( byte === CR ) {
157+ this . state = State . READ_TRAILER_CHECKSUM_LF ;
158+ continue ;
159+ }
160+
161+ // Accumulate all bytes, AWS accepts white spaces before and after the CRLF
162+ this . buffer [ this . bufferOffset ++ ] = byte ;
163+ if ( this . bufferOffset === this . buffer . length ) {
164+ log . error ( 'trailer field too large' ) ;
165+ return errors . MalformedTrailerError ;
166+ }
167+
168+ continue ;
169+ }
170+ case State . READ_TRAILER_CHECKSUM_LF : {
171+ const byte = data [ idx ++ ] ;
172+ if ( byte !== LF ) {
173+ log . error ( 'expected LF after trailer CR' , { byte } ) ;
174+ return errors . InvalidArgument ;
175+ }
176+
177+ if ( this . bufferOffset > 0 ) {
178+ const trailerLine = this . buffer . toString ( 'ascii' , 0 , this . bufferOffset ) . trim ( ) ;
179+ const colonIndex = trailerLine . indexOf ( ':' ) ;
180+ if ( colonIndex > 0 ) {
181+ const trailerName = trailerLine . slice ( 0 , colonIndex ) . trim ( ) ;
182+ const trailerValue = trailerLine . slice ( colonIndex + 1 ) . trim ( ) ;
183+ emit ( 'trailer' , trailerName , trailerValue ) ;
184+ } else {
185+ log . error ( 'incomplete trailer missing ":"' , { trailerLine } ) ;
186+ return incompleteBodyError ;
187+ }
188+ }
189+
190+ this . state = State . COMPLETED_WITH_TRAILER ;
191+
192+ continue ;
193+ }
194+ case State . COMPLETED_WITH_TRAILER :
195+ // We successfully parsed the trailing checksum, discard all extra data.
196+ return null ;
197+ }
198+ }
199+
200+ return null ;
201+ }
202+ }
203+
8204/**
9205 * This class handles the chunked-upload body format used by
10206 * STREAMING-UNSIGNED-PAYLOAD-TRAILER requests. It strips the chunk-size
@@ -20,14 +216,8 @@ class TrailingChecksumTransform extends Transform {
20216 constructor ( log ) {
21217 super ( { } ) ;
22218 this . log = log ;
23- this . chunkSizeBuffer = Buffer . alloc ( 0 ) ;
24- this . bytesToDiscard = 0 ; // when trailing \r\n are present, we discard them but they can be in different chunks
25- this . bytesToRead = 0 ; // when a chunk is advertised, the size is put here and we forward all bytes
26- this . streamClosed = false ;
27- this . readingTrailer = false ;
28- this . trailerBuffer = Buffer . alloc ( 0 ) ;
29- this . trailerName = null ;
30- this . trailerValue = null ;
219+ this . log . addDefaultFields ( { component : 'TrailingChecksumTransform' } ) ;
220+ this . fsm = new Fsm ( ) ;
31221 }
32222
33223 /**
@@ -38,17 +228,14 @@ class TrailingChecksumTransform extends Transform {
38228 * @return {function } executes callback with err if applicable
39229 */
40230 _flush ( callback ) {
41- if ( ! this . streamClosed && this . readingTrailer && this . trailerBuffer . length === 0 ) {
42- // Nothing came after "0\r\n", don't fail.
43- // If the x-amz-trailer header was present then the trailer is required and ChecksumTransform will fail.
44- return callback ( ) ;
45- } else if ( ! this . streamClosed && this . readingTrailer && this . trailerBuffer . length !== 0 ) {
46- this . log . error ( 'stream ended without trailer "\r\n"' ) ;
47- return callback ( incompleteBodyError ) ;
48- } else if ( ! this . streamClosed && ! this . readingTrailer ) {
49- this . log . error ( 'stream ended without closing chunked encoding' ) ;
231+ // COMPLETED means we saw "0\r\n" but no trailer bytes after,
232+ // ChecksumTransform will enforce the trailer if x-amz-trailer was present.
233+ if ( this . fsm . state !== State . COMPLETED_WITH_TRAILER && this . fsm . state !== State . COMPLETED_NO_TRAILER ) {
234+ this . log . error ( 'stream ended without closing chunked encoding' ,
235+ { state : this . fsm . state } ) ;
50236 return callback ( incompleteBodyError ) ;
51237 }
238+
52239 return callback ( ) ;
53240 }
54241
@@ -61,115 +248,10 @@ class TrailingChecksumTransform extends Transform {
61248 * @return {function } executes callback with err if applicable
62249 */
63250 _transform ( chunkInput , encoding , callback ) {
64- let chunk = chunkInput ;
65- while ( chunk . byteLength > 0 && ! this . streamClosed ) {
66- if ( this . bytesToDiscard > 0 ) {
67- const toDiscard = Math . min ( this . bytesToDiscard , chunk . byteLength ) ;
68- chunk = chunk . subarray ( toDiscard ) ;
69- this . bytesToDiscard -= toDiscard ;
70- continue ;
71- }
72- // forward up to bytesToRead bytes from the chunk, restart processing on leftover
73- if ( this . bytesToRead > 0 ) {
74- const toRead = Math . min ( this . bytesToRead , chunk . byteLength ) ;
75- this . push ( chunk . subarray ( 0 , toRead ) ) ;
76- chunk = chunk . subarray ( toRead ) ;
77- this . bytesToRead -= toRead ;
78- if ( this . bytesToRead === 0 ) {
79- this . bytesToDiscard = 2 ;
80- }
81- continue ;
82- }
83-
84- // after the 0-size chunk, read the trailer line (e.g. "x-amz-checksum-crc32:YABb/g==")
85- if ( this . readingTrailer ) {
86- const combined = Buffer . concat ( [ this . trailerBuffer , chunk ] ) ;
87- const lineBreakIndex = combined . indexOf ( '\r\n' ) ;
88- if ( lineBreakIndex === - 1 ) {
89- if ( combined . byteLength > 1024 ) {
90- this . log . error ( 'trailer line too long' ) ;
91- return callback ( errors . MalformedTrailerError ) ;
92- }
93- // The trailer is not complete yet, continue.
94- this . trailerBuffer = combined ;
95- return callback ( ) ;
96- }
97- this . trailerBuffer = Buffer . alloc ( 0 ) ;
98- const fullTrailer = combined . subarray ( 0 , lineBreakIndex ) ;
99- if ( fullTrailer . length === 0 ) {
100- // The trailer is empty, stop reading.
101- this . readingTrailer = false ;
102- this . streamClosed = true ;
103- return callback ( ) ;
104- }
105- let trailerLine = fullTrailer . toString ( ) ;
106- // Some clients terminate the trailer with \n\r\n instead of
107- // just \r\n, producing a trailing \n in the parsed line.
108- if ( trailerLine . endsWith ( '\n' ) ) {
109- trailerLine = trailerLine . slice ( 0 , - 1 ) ;
110- }
111- const colonIndex = trailerLine . indexOf ( ':' ) ;
112- if ( colonIndex > 0 ) {
113- this . trailerName = trailerLine . slice ( 0 , colonIndex ) . trim ( ) ;
114- this . trailerValue = trailerLine . slice ( colonIndex + 1 ) . trim ( ) ;
115- this . emit ( 'trailer' , this . trailerName , this . trailerValue ) ;
116- } else {
117- this . log . error ( 'incomplete trailer missing ":"' , { trailerLine } ) ;
118- return callback ( incompleteBodyError ) ;
119- }
120- this . readingTrailer = false ;
121- this . streamClosed = true ;
122- // The trailer \r\n is the last bytes of the stream per the AWS
123- // chunked upload format, so any remaining bytes are discarded.
124- return callback ( ) ;
125- }
126-
127- // we are now looking for the chunk size field
128- // no need to look further than 10 bytes since the field cannot be bigger: the max
129- // chunk size is 5GB (see constants.maximumAllowedPartSize)
130- const lineBreakIndex = chunk . subarray ( 0 , 10 ) . indexOf ( '\r' ) ;
131- const bytesToKeep = lineBreakIndex === - 1 ? chunk . byteLength : lineBreakIndex ;
132- if ( this . chunkSizeBuffer . byteLength + bytesToKeep > 10 ) {
133- this . log . error ( 'chunk size field too big' , {
134- chunkSizeBuffer : this . chunkSizeBuffer . subarray ( 0 , 11 ) . toString ( 'hex' ) ,
135- chunkSizeBufferLength : this . chunkSizeBuffer . length ,
136- truncatedChunk : chunk . subarray ( 0 , 10 ) . toString ( 'hex' ) ,
137- } ) ;
138- // if bigger, the chunk would be over 5 GB
139- // returning early to avoid a DoS by memory exhaustion
140- return callback ( errors . InvalidArgument ) ;
141- }
142- if ( lineBreakIndex === - 1 ) {
143- // no delimiter, we'll keep the chunk for later
144- this . chunkSizeBuffer = Buffer . concat ( [ this . chunkSizeBuffer , chunk ] ) ;
145- return callback ( ) ;
146- }
147-
148- this . chunkSizeBuffer = Buffer . concat ( [ this . chunkSizeBuffer , chunk . subarray ( 0 , lineBreakIndex ) ] ) ;
149- chunk = chunk . subarray ( lineBreakIndex ) ;
150-
151- // chunk-size is sent in hex
152- const chunkSizeStr = this . chunkSizeBuffer . toString ( ) ;
153- const dataSize = parseInt ( chunkSizeStr , 16 ) ;
154- // we check that the parsing is correct (parseInt returns a partial parse when it fails)
155- if ( isNaN ( dataSize ) || dataSize . toString ( 16 ) !== chunkSizeStr . toLowerCase ( ) ) {
156- this . log . error ( 'invalid chunk size' , { chunkSizeBuffer : chunkSizeStr } ) ;
157- return callback ( errors . InvalidArgument ) ;
158- }
159- this . chunkSizeBuffer = Buffer . alloc ( 0 ) ;
160- if ( dataSize === 0 ) {
161- // last chunk, no more data to read; enter trailer-reading mode
162- // bytesToDiscard = 2 below will consume the \r\n after "0"
163- this . readingTrailer = true ;
164- }
165- if ( dataSize > maximumAllowedPartSize ) {
166- this . log . error ( 'chunk size too big' , { dataSize } ) ;
167- return callback ( errors . EntityTooLarge ) ;
168- }
169- this . bytesToRead = dataSize ;
170- this . bytesToDiscard = 2 ;
251+ const err = this . fsm . step ( chunkInput , this . push . bind ( this ) , this . emit . bind ( this ) , this . log ) ;
252+ if ( err ) {
253+ return callback ( err ) ;
171254 }
172-
173255 return callback ( ) ;
174256 }
175257}
0 commit comments