Skip to content

Commit 35de30b

Browse files
authored
Merge pull request #38 from sebastian-nagel/jwarc-33-chunk-size-opt-space
Allow optional space after chunk-size in chunked transfer-encoding, fixes #33
2 parents 65e68ac + c7cb0e1 commit 35de30b

File tree

3 files changed

+82
-57
lines changed

3 files changed

+82
-57
lines changed

src/org/netpreserve/jwarc/ChunkedBody.java

+59-55
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,9 @@ private static byte[] init__chunked_actions_0()
217217
private static short[] init__chunked_key_offsets_0()
218218
{
219219
return new short [] {
220-
0, 0, 7, 16, 17, 33, 34, 42, 43, 44, 45, 60,
221-
76, 91, 108, 115, 117, 123, 139, 145, 146, 164, 165, 180,
222-
196, 211, 228, 235, 237, 243, 250
220+
0, 0, 7, 18, 21, 22, 38, 39, 49, 52, 53, 54,
221+
55, 70, 86, 101, 120, 127, 131, 137, 153, 159, 160, 178,
222+
179, 194, 210, 225, 244, 251, 255, 261, 268
223223
};
224224
}
225225

@@ -229,27 +229,29 @@ private static short[] init__chunked_key_offsets_0()
229229
private static char[] init__chunked_trans_keys_0()
230230
{
231231
return new char [] {
232-
48, 49, 57, 65, 70, 97, 102, 13, 48, 59, 49, 57,
233-
65, 70, 97, 102, 10, 13, 33, 124, 126, 35, 39, 42,
234-
43, 45, 46, 48, 57, 65, 90, 94, 122, 10, 13, 59,
235-
48, 57, 65, 70, 97, 102, 10, 13, 10, 33, 124, 126,
232+
48, 49, 57, 65, 70, 97, 102, 9, 13, 32, 48, 59,
233+
49, 57, 65, 70, 97, 102, 9, 13, 32, 10, 13, 33,
234+
124, 126, 35, 39, 42, 43, 45, 46, 48, 57, 65, 90,
235+
94, 122, 10, 9, 13, 32, 59, 48, 57, 65, 70, 97,
236+
102, 9, 13, 32, 10, 13, 10, 33, 124, 126, 35, 39,
237+
42, 43, 45, 46, 48, 57, 65, 90, 94, 122, 33, 61,
238+
124, 126, 35, 39, 42, 43, 45, 46, 48, 57, 65, 90,
239+
94, 122, 34, 124, 126, 33, 39, 42, 43, 45, 46, 48,
240+
57, 65, 90, 94, 122, 9, 13, 32, 33, 59, 124, 126,
236241
35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 122,
237-
33, 61, 124, 126, 35, 39, 42, 43, 45, 46, 48, 57,
238-
65, 90, 94, 122, 34, 124, 126, 33, 39, 42, 43, 45,
239-
46, 48, 57, 65, 90, 94, 122, 13, 33, 59, 124, 126,
240-
35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 122,
241-
9, 34, 92, 32, 126, 128, 255, 13, 59, 0, 9, 11,
242-
12, 14, 127, 33, 58, 124, 126, 35, 39, 42, 43, 45,
243-
46, 48, 57, 65, 90, 94, 122, 9, 13, 32, 126, 128,
244-
255, 10, 9, 13, 32, 33, 124, 126, 35, 39, 42, 43,
245-
45, 46, 48, 57, 65, 90, 94, 122, 10, 33, 124, 126,
246-
35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 122,
247-
33, 61, 124, 126, 35, 39, 42, 43, 45, 46, 48, 57,
248-
65, 90, 94, 122, 34, 124, 126, 33, 39, 42, 43, 45,
249-
46, 48, 57, 65, 90, 94, 122, 13, 33, 59, 124, 126,
250-
35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 122,
251-
9, 34, 92, 32, 126, 128, 255, 13, 59, 0, 9, 11,
252-
12, 14, 127, 48, 49, 57, 65, 70, 97, 102, 0
242+
9, 34, 92, 32, 126, 128, 255, 9, 13, 32, 59, 0,
243+
9, 11, 12, 14, 127, 33, 58, 124, 126, 35, 39, 42,
244+
43, 45, 46, 48, 57, 65, 90, 94, 122, 9, 13, 32,
245+
126, 128, 255, 10, 9, 13, 32, 33, 124, 126, 35, 39,
246+
42, 43, 45, 46, 48, 57, 65, 90, 94, 122, 10, 33,
247+
124, 126, 35, 39, 42, 43, 45, 46, 48, 57, 65, 90,
248+
94, 122, 33, 61, 124, 126, 35, 39, 42, 43, 45, 46,
249+
48, 57, 65, 90, 94, 122, 34, 124, 126, 33, 39, 42,
250+
43, 45, 46, 48, 57, 65, 90, 94, 122, 9, 13, 32,
251+
33, 59, 124, 126, 35, 39, 42, 43, 45, 46, 48, 57,
252+
65, 90, 94, 122, 9, 34, 92, 32, 126, 128, 255, 9,
253+
13, 32, 59, 0, 9, 11, 12, 14, 127, 48, 49, 57,
254+
65, 70, 97, 102, 0
253255
};
254256
}
255257

@@ -259,9 +261,9 @@ private static char[] init__chunked_trans_keys_0()
259261
private static byte[] init__chunked_single_lengths_0()
260262
{
261263
return new byte [] {
262-
0, 1, 3, 1, 4, 1, 2, 1, 1, 1, 3, 4,
263-
3, 5, 3, 2, 0, 4, 2, 1, 6, 1, 3, 4,
264-
3, 5, 3, 2, 0, 1, 0
264+
0, 1, 5, 3, 1, 4, 1, 4, 3, 1, 1, 1,
265+
3, 4, 3, 7, 3, 4, 0, 4, 2, 1, 6, 1,
266+
3, 4, 3, 7, 3, 4, 0, 1, 0
265267
};
266268
}
267269

@@ -271,9 +273,9 @@ private static byte[] init__chunked_single_lengths_0()
271273
private static byte[] init__chunked_range_lengths_0()
272274
{
273275
return new byte [] {
274-
0, 3, 3, 0, 6, 0, 3, 0, 0, 0, 6, 6,
275-
6, 6, 2, 0, 3, 6, 2, 0, 6, 0, 6, 6,
276-
6, 6, 2, 0, 3, 3, 0
276+
0, 3, 3, 0, 0, 6, 0, 3, 0, 0, 0, 0,
277+
6, 6, 6, 6, 2, 0, 3, 6, 2, 0, 6, 0,
278+
6, 6, 6, 6, 2, 0, 3, 3, 0
277279
};
278280
}
279281

@@ -283,9 +285,9 @@ private static byte[] init__chunked_range_lengths_0()
283285
private static short[] init__chunked_index_offsets_0()
284286
{
285287
return new short [] {
286-
0, 0, 5, 12, 14, 25, 27, 33, 35, 37, 39, 49,
287-
60, 70, 82, 88, 91, 95, 106, 111, 113, 126, 128, 138,
288-
149, 159, 171, 177, 180, 184, 189
288+
0, 0, 5, 14, 18, 20, 31, 33, 41, 45, 47, 49,
289+
51, 61, 72, 82, 96, 102, 107, 111, 122, 127, 129, 142,
290+
144, 154, 165, 175, 189, 195, 200, 204, 209
289291
};
290292
}
291293

@@ -295,22 +297,24 @@ private static short[] init__chunked_index_offsets_0()
295297
private static byte[] init__chunked_indicies_0()
296298
{
297299
return new byte [] {
298-
0, 2, 2, 2, 1, 3, 0, 4, 2, 2, 2, 1,
299-
5, 1, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,
300-
1, 8, 1, 9, 10, 2, 2, 2, 1, 11, 1, 12,
301-
1, 13, 1, 14, 14, 14, 14, 14, 14, 14, 14, 14,
302-
1, 14, 15, 14, 14, 14, 14, 14, 14, 14, 14, 1,
303-
17, 16, 16, 16, 16, 16, 16, 16, 16, 1, 9, 16,
304-
10, 16, 16, 16, 16, 16, 16, 16, 16, 1, 17, 18,
305-
19, 17, 17, 1, 9, 10, 1, 17, 17, 17, 1, 7,
306-
20, 7, 7, 7, 7, 7, 7, 7, 7, 1, 20, 21,
307-
20, 20, 1, 22, 1, 20, 23, 20, 7, 7, 7, 7,
308-
7, 7, 7, 7, 7, 1, 24, 1, 25, 25, 25, 25,
309-
25, 25, 25, 25, 25, 1, 25, 26, 25, 25, 25, 25,
310-
25, 25, 25, 25, 1, 28, 27, 27, 27, 27, 27, 27,
311-
27, 27, 1, 3, 27, 4, 27, 27, 27, 27, 27, 27,
312-
27, 27, 1, 28, 29, 30, 28, 28, 1, 3, 4, 1,
313-
28, 28, 28, 1, 0, 2, 2, 2, 1, 1, 0
300+
0, 2, 2, 2, 1, 3, 4, 3, 0, 5, 2, 2,
301+
2, 1, 3, 4, 3, 1, 6, 1, 7, 8, 8, 8,
302+
8, 8, 8, 8, 8, 8, 1, 9, 1, 10, 11, 10,
303+
12, 2, 2, 2, 1, 10, 11, 10, 1, 13, 1, 14,
304+
1, 15, 1, 16, 16, 16, 16, 16, 16, 16, 16, 16,
305+
1, 16, 17, 16, 16, 16, 16, 16, 16, 16, 16, 1,
306+
19, 18, 18, 18, 18, 18, 18, 18, 18, 1, 10, 11,
307+
10, 18, 12, 18, 18, 18, 18, 18, 18, 18, 18, 1,
308+
19, 20, 21, 19, 19, 1, 10, 11, 10, 12, 1, 19,
309+
19, 19, 1, 8, 22, 8, 8, 8, 8, 8, 8, 8,
310+
8, 1, 22, 23, 22, 22, 1, 24, 1, 22, 25, 22,
311+
8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 26, 1,
312+
27, 27, 27, 27, 27, 27, 27, 27, 27, 1, 27, 28,
313+
27, 27, 27, 27, 27, 27, 27, 27, 1, 30, 29, 29,
314+
29, 29, 29, 29, 29, 29, 1, 3, 4, 3, 29, 5,
315+
29, 29, 29, 29, 29, 29, 29, 29, 1, 30, 31, 32,
316+
30, 30, 1, 3, 4, 3, 5, 1, 30, 30, 30, 1,
317+
0, 2, 2, 2, 1, 1, 0
314318
};
315319
}
316320

@@ -320,9 +324,9 @@ private static byte[] init__chunked_indicies_0()
320324
private static byte[] init__chunked_trans_targs_0()
321325
{
322326
return new byte [] {
323-
2, 0, 6, 3, 22, 4, 5, 17, 29, 7, 10, 8,
324-
9, 1, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21,
325-
30, 23, 24, 25, 26, 27, 28
327+
2, 0, 7, 3, 4, 24, 5, 6, 19, 31, 8, 9,
328+
12, 10, 11, 1, 13, 14, 15, 16, 17, 18, 20, 21,
329+
22, 23, 32, 25, 26, 27, 28, 29, 30
326330
};
327331
}
328332

@@ -332,17 +336,17 @@ private static byte[] init__chunked_trans_targs_0()
332336
private static byte[] init__chunked_trans_actions_0()
333337
{
334338
return new byte [] {
335-
1, 0, 1, 0, 0, 3, 0, 0, 5, 0, 0, 3,
336-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337-
5, 0, 0, 0, 0, 0, 0
339+
1, 0, 1, 0, 0, 0, 3, 0, 0, 5, 0, 0,
340+
0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
341+
0, 0, 5, 0, 0, 0, 0, 0, 0
338342
};
339343
}
340344

341345
private static final byte _chunked_trans_actions[] = init__chunked_trans_actions_0();
342346

343347

344348
static final int chunked_start = 1;
345-
static final int chunked_first_final = 29;
349+
static final int chunked_first_final = 31;
346350
static final int chunked_error = 0;
347351

348352
static final int chunked_en_chunks = 1;

src/org/netpreserve/jwarc/ChunkedBody.rl

+2-2
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,9 @@ class ChunkedBody extends MessageBody {
108108
chunk_ext_val = token | quoted_string;
109109
chunk_extension = ';' token '=' chunk_ext_val;
110110
chunk_length = hexdigit+ $add_length;
111-
chunk_header = chunk_length chunk_extension* CRLF @end_header;
111+
chunk_header = chunk_length chunk_extension* WS* CRLF @end_header;
112112
chunk = chunk_header CRLF;
113-
last_chunk = "0"+ chunk_extension* CRLF;
113+
last_chunk = "0"+ chunk_extension* WS* CRLF;
114114
chunks := chunk* last_chunk named_fields @end_final;
115115
}%%
116116

test/org/netpreserve/jwarc/ChunkedBodyTest.java

+21
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,25 @@ public void testBypassInternalBuffer() throws IOException {
6969
assertFalse(initBuf.hasRemaining());
7070
assertEquals(bodyString, new String(Arrays.copyOf(buf.array(), buf.position()), US_ASCII));
7171
}
72+
73+
/** Test trailing whitespace after chunk length (#33) */
74+
@Test
75+
public void testChunkLengthTrailingWhiteSpace() throws IOException {
76+
String bodyString = "hello world, hello world!";
77+
byte[] body = ("19 \r\n" + bodyString + "\r\n00000\r\n\r\n").getBytes(US_ASCII);
78+
ByteBuffer buf = ByteBuffer.allocate(8192);
79+
ByteBuffer initBuf = ByteBuffer.allocate(8192);
80+
initBuf.flip();
81+
ReadableByteChannel chan = Channels.newChannel(new ByteArrayInputStream(body));
82+
ChunkedBody decoder = new ChunkedBody(chan, initBuf);
83+
while (true) {
84+
int n = decoder.read(buf);
85+
assertNotEquals(0, n);
86+
if (n < 0) {
87+
break;
88+
}
89+
}
90+
assertFalse(initBuf.hasRemaining());
91+
assertEquals(bodyString, new String(Arrays.copyOf(buf.array(), buf.position()), US_ASCII));
92+
}
7293
}

0 commit comments

Comments
 (0)