Skip to content

Commit e7c9390

Browse files
jlacolineasvetlov
authored andcommitted
Feature/bugfix: make the HTTP client able to return HTTP chunks when chunked transfer encoding is used (#2150)
* implement http chunk parsing in http parser (C version) * http chunk decoding: implement chunk signals in Python parser * StreamReader: add tests for [begin|end]_chunk_receiving methods * update documentation to clarify the difference between iter_any() and iter_chunks() * add tests for http chunks parsing * add changelog file for PR 2150 * http chunk parsing: readchunk() now returns tuples of (data, end_of_http_chunk) * http chunk parsing: adapt iterchunks() generator to new return format * streams.py: use parenthesis for line wrapping instead of backslash * add unit tests for ChunkTupleAsyncStreamIterator * do not catch EofStream in ChunkTupleAsyncStreamIterator * change the behaviour of stream.readchunk when searching for the next http chunk * add tests to the stream.readchunk() method * http_parser.py: remove useless blank line * update documentation in streams.rst * update documentation in docs/client_reference.rst * minor change to test_streams.py * change formatting in streams.rst * fix spelling errors in documentation * stream.rs: replace 'boolean' with :class:
1 parent 42361c5 commit e7c9390

File tree

10 files changed

+261
-23
lines changed

10 files changed

+261
-23
lines changed

aiohttp/_http_parser.pyx

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ cdef class HttpParser:
117117
self._csettings.on_body = cb_on_body
118118
self._csettings.on_message_begin = cb_on_message_begin
119119
self._csettings.on_message_complete = cb_on_message_complete
120+
self._csettings.on_chunk_header = cb_on_chunk_header
121+
self._csettings.on_chunk_complete = cb_on_chunk_complete
120122

121123
self._last_error = None
122124

@@ -208,6 +210,11 @@ cdef class HttpParser:
208210
self._payload.feed_eof()
209211
self._payload = None
210212

213+
cdef _on_chunk_header(self):
214+
self._payload.begin_http_chunk_receiving()
215+
216+
cdef _on_chunk_complete(self):
217+
self._payload.end_http_chunk_receiving()
211218

212219
### Public API ###
213220

@@ -436,6 +443,28 @@ cdef int cb_on_message_complete(cparser.http_parser* parser) except -1:
436443
return 0
437444

438445

446+
cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1:
447+
cdef HttpParser pyparser = <HttpParser>parser.data
448+
try:
449+
pyparser._on_chunk_header()
450+
except BaseException as exc:
451+
pyparser._last_error = exc
452+
return -1
453+
else:
454+
return 0
455+
456+
457+
cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1:
458+
cdef HttpParser pyparser = <HttpParser>parser.data
459+
try:
460+
pyparser._on_chunk_complete()
461+
except BaseException as exc:
462+
pyparser._last_error = exc
463+
return -1
464+
else:
465+
return 0
466+
467+
439468
cdef parser_error_from_errno(cparser.http_errno errno):
440469
cdef bytes desc = cparser.http_errno_description(errno)
441470

aiohttp/http_parser.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,7 @@ def feed_data(self, chunk, SEP=b'\r\n', CHUNK_EXT=b';'):
538538
else:
539539
self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
540540
self._chunk_size = size
541+
self.payload.begin_http_chunk_receiving()
541542
else:
542543
self._chunk_tail = chunk
543544
return False, None
@@ -547,18 +548,16 @@ def feed_data(self, chunk, SEP=b'\r\n', CHUNK_EXT=b';'):
547548
required = self._chunk_size
548549
chunk_len = len(chunk)
549550

550-
if required >= chunk_len:
551+
if required > chunk_len:
551552
self._chunk_size = required - chunk_len
552-
if self._chunk_size == 0:
553-
self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
554-
555553
self.payload.feed_data(chunk, chunk_len)
556554
return False, None
557555
else:
558556
self._chunk_size = 0
559557
self.payload.feed_data(chunk[:required], required)
560558
chunk = chunk[required:]
561559
self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
560+
self.payload.end_http_chunk_receiving()
562561

563562
# toss the CRLF at the end of the chunk
564563
if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
@@ -644,6 +643,12 @@ def feed_eof(self):
644643

645644
self.out.feed_eof()
646645

646+
def begin_http_chunk_receiving(self):
647+
self.out.begin_http_chunk_receiving()
648+
649+
def end_http_chunk_receiving(self):
650+
self.out.end_http_chunk_receiving()
651+
647652

648653
HttpRequestParser = HttpRequestParserPy
649654
HttpResponseParser = HttpResponseParserPy

aiohttp/streams.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ def __anext__(self):
4040
raise StopAsyncIteration # NOQA
4141
return rv
4242

43+
class ChunkTupleAsyncStreamIterator(AsyncStreamIterator):
44+
@asyncio.coroutine
45+
def __anext__(self):
46+
rv = yield from self.read_func()
47+
if rv == (b'', False):
48+
raise StopAsyncIteration # NOQA
49+
return rv
50+
4351

4452
class AsyncStreamReaderMixin:
4553

@@ -58,20 +66,21 @@ def iter_chunked(self, n):
5866
return AsyncStreamIterator(lambda: self.read(n))
5967

6068
def iter_any(self):
61-
"""Returns an asynchronous iterator that yields slices of data
62-
as they come.
69+
"""Returns an asynchronous iterator that yields all the available
70+
data as soon as it is received
6371
6472
Python-3.5 available for Python 3.5+ only
6573
"""
6674
return AsyncStreamIterator(self.readany)
6775

6876
def iter_chunks(self):
69-
"""Returns an asynchronous iterator that yields chunks of the
70-
size as received by the server.
77+
"""Returns an asynchronous iterator that yields chunks of data
78+
as they are received by the server. The yielded objects are tuples
79+
of (bytes, bool) as returned by the StreamReader.readchunk method.
7180
7281
Python-3.5 available for Python 3.5+ only
7382
"""
74-
return AsyncStreamIterator(self.readchunk)
83+
return ChunkTupleAsyncStreamIterator(self.readchunk)
7584

7685

7786
class StreamReader(AsyncStreamReaderMixin):
@@ -96,6 +105,8 @@ def __init__(self, limit=DEFAULT_LIMIT, timer=None, loop=None):
96105
loop = asyncio.get_event_loop()
97106
self._loop = loop
98107
self._size = 0
108+
self._cursor = 0
109+
self._http_chunk_splits = None
99110
self._buffer = collections.deque()
100111
self._buffer_offset = 0
101112
self._eof = False
@@ -200,6 +211,7 @@ def unread_data(self, data):
200211
self._buffer[0] = self._buffer[0][self._buffer_offset:]
201212
self._buffer_offset = 0
202213
self._size += len(data)
214+
self._cursor -= len(data)
203215
self._buffer.appendleft(data)
204216

205217
def feed_data(self, data):
@@ -218,6 +230,18 @@ def feed_data(self, data):
218230
if not waiter.done():
219231
waiter.set_result(False)
220232

233+
def begin_http_chunk_receiving(self):
234+
if self._http_chunk_splits is None:
235+
self._http_chunk_splits = []
236+
237+
def end_http_chunk_receiving(self):
238+
if self._http_chunk_splits is None:
239+
raise RuntimeError("Called end_chunk_receiving without calling "
240+
"begin_chunk_receiving first")
241+
if not self._http_chunk_splits or \
242+
self._http_chunk_splits[-1] != self.total_bytes:
243+
self._http_chunk_splits.append(self.total_bytes)
244+
221245
@asyncio.coroutine
222246
def _wait(self, func_name):
223247
# StreamReader uses a future to link the protocol feed_data() method
@@ -320,16 +344,34 @@ def readany(self):
320344

321345
@asyncio.coroutine
322346
def readchunk(self):
347+
"""Returns a tuple of (data, end_of_http_chunk). When chunked transfer
348+
encoding is used, end_of_http_chunk is a boolean indicating if the end
349+
of the data corresponds to the end of a HTTP chunk , otherwise it is
350+
always False.
351+
"""
323352
if self._exception is not None:
324353
raise self._exception
325354

326355
if not self._buffer and not self._eof:
356+
if (self._http_chunk_splits and
357+
self._cursor == self._http_chunk_splits[0]):
358+
# end of http chunk without available data
359+
self._http_chunk_splits = self._http_chunk_splits[1:]
360+
return (b"", True)
327361
yield from self._wait('readchunk')
328362

329-
if self._buffer:
330-
return self._read_nowait_chunk(-1)
363+
if not self._buffer:
364+
# end of file
365+
return (b"", False)
366+
elif self._http_chunk_splits is not None:
367+
while self._http_chunk_splits:
368+
pos = self._http_chunk_splits[0]
369+
self._http_chunk_splits = self._http_chunk_splits[1:]
370+
if pos > self._cursor:
371+
return (self._read_nowait(pos-self._cursor), True)
372+
return (self._read_nowait(-1), False)
331373
else:
332-
return b""
374+
return (self._read_nowait_chunk(-1), False)
333375

334376
@asyncio.coroutine
335377
def readexactly(self, n):
@@ -378,6 +420,7 @@ def _read_nowait_chunk(self, n):
378420
data = self._buffer.popleft()
379421

380422
self._size -= len(data)
423+
self._cursor += len(data)
381424
return data
382425

383426
def _read_nowait(self, n):
@@ -438,7 +481,7 @@ def readany(self):
438481

439482
@asyncio.coroutine
440483
def readchunk(self):
441-
return b''
484+
return (b'', False)
442485

443486
@asyncio.coroutine
444487
def readexactly(self, n):

changes/2150.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make the HTTP client able to return HTTP chunks when chunked transfer encoding is used.

docs/client_reference.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,10 @@ Response object
991991

992992
.. attribute:: content
993993

994-
Payload stream, contains response's BODY (:class:`StreamReader`).
994+
Payload stream, which contains response's BODY (:class:`StreamReader`).
995+
It supports various reading methods depending on the expected format.
996+
When chunked transfer encoding is used by the server, allows retrieving
997+
the actual http chunks.
995998

996999
Reading from the stream may raise
9971000
:exc:`aiohttp.ClientPayloadError` if the response object is

docs/streams.rst

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,21 @@ Reading Methods
7474
:return bytes: the given line
7575

7676

77+
.. comethod:: StreamReader.readchunk()
78+
79+
Read a chunk of data as it was received by the server.
80+
81+
Returns a tuple of (data, end_of_HTTP_chunk).
82+
83+
When chunked transfer encoding is used, end_of_HTTP_chunk is a :class:`bool`
84+
indicating if the end of the data corresponds to the end of a HTTP chunk,
85+
otherwise it is always ``False``.
86+
87+
:return tuple[bytes, bool]: a chunk of data and a :class:`bool` that is ``True``
88+
when the end of the returned chunk corresponds
89+
to the end of a HTTP chunk.
90+
91+
7792
Asynchronous Iteration Support
7893
------------------------------
7994

@@ -109,9 +124,20 @@ size limit and over any available data.
109124

110125
Iterates over data chunks as received from the server::
111126

112-
async for data in response.content.iter_chunks():
127+
async for data, _ in response.content.iter_chunks():
113128
print(data)
114129

130+
If chunked transfer encoding is used, the original http chunks formatting
131+
can be retrieved by reading the second element of returned tuples::
132+
133+
buffer = b""
134+
135+
async for data, end_of_http_chunk in response.content.iter_chunks():
136+
buffer += data
137+
if end_of_http_chunk:
138+
print(buffer)
139+
buffer = b""
140+
115141

116142
Helpers
117143
-------

tests/test_flowcontrol_streams.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,18 @@ def test_readany_resume_paused(self):
7171
def test_readchunk(self):
7272
r = self._make_one()
7373
r.feed_data(b'data', 4)
74-
res = self.loop.run_until_complete(r.readchunk())
74+
res, end_of_http_chunk = self.loop.run_until_complete(r.readchunk())
7575
self.assertEqual(res, b'data')
76+
self.assertFalse(end_of_http_chunk)
7677
self.assertFalse(r._protocol.resume_reading.called)
7778

7879
def test_readchunk_resume_paused(self):
7980
r = self._make_one()
8081
r._protocol._reading_paused = True
8182
r.feed_data(b'data', 4)
82-
res = self.loop.run_until_complete(r.readchunk())
83+
res, end_of_http_chunk = self.loop.run_until_complete(r.readchunk())
8384
self.assertEqual(res, b'data')
85+
self.assertFalse(end_of_http_chunk)
8486
self.assertTrue(r._protocol.resume_reading.called)
8587

8688
def test_readexactly(self):

tests/test_http_parser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ def test_http_request_chunked_payload(parser):
488488
parser.feed_data(b'4\r\ndata\r\n4\r\nline\r\n0\r\n\r\n')
489489

490490
assert b'dataline' == b''.join(d for d in payload._buffer)
491+
assert [4, 8] == payload._http_chunk_splits
491492
assert payload.is_eof()
492493

493494

@@ -502,6 +503,7 @@ def test_http_request_chunked_payload_and_next_message(parser):
502503
b'transfer-encoding: chunked\r\n\r\n')
503504

504505
assert b'dataline' == b''.join(d for d in payload._buffer)
506+
assert [4, 8] == payload._http_chunk_splits
505507
assert payload.is_eof()
506508

507509
assert len(messages) == 1
@@ -521,14 +523,17 @@ def test_http_request_chunked_payload_chunks(parser):
521523
parser.feed_data(b'\n4')
522524
parser.feed_data(b'\r')
523525
parser.feed_data(b'\n')
524-
parser.feed_data(b'line\r\n0\r\n')
526+
parser.feed_data(b'li')
527+
parser.feed_data(b'ne\r\n0\r\n')
525528
parser.feed_data(b'test: test\r\n')
526529

527530
assert b'dataline' == b''.join(d for d in payload._buffer)
531+
assert [4, 8] == payload._http_chunk_splits
528532
assert not payload.is_eof()
529533

530534
parser.feed_data(b'\r\n')
531535
assert b'dataline' == b''.join(d for d in payload._buffer)
536+
assert [4, 8] == payload._http_chunk_splits
532537
assert payload.is_eof()
533538

534539

@@ -541,6 +546,7 @@ def test_parse_chunked_payload_chunk_extension(parser):
541546
b'4;test\r\ndata\r\n4\r\nline\r\n0\r\ntest: test\r\n\r\n')
542547

543548
assert b'dataline' == b''.join(d for d in payload._buffer)
549+
assert [4, 8] == payload._http_chunk_splits
544550
assert payload.is_eof()
545551

546552

tests/test_py35/test_streams_35.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,24 @@ async def test_stream_reader_iter(loop):
8181
async for raw in create_stream(loop):
8282
assert raw == next(it)
8383
pytest.raises(StopIteration, next, it)
84+
85+
86+
async def test_stream_reader_iter_chunks_no_chunked_encoding(loop):
87+
it = iter([b'line1\nline2\nline3\n'])
88+
async for data, end_of_chunk in create_stream(loop).iter_chunks():
89+
assert (data, end_of_chunk) == (next(it), False)
90+
pytest.raises(StopIteration, next, it)
91+
92+
93+
async def test_stream_reader_iter_chunks_chunked_encoding(loop):
94+
stream = streams.StreamReader(loop=loop)
95+
for line in DATA.splitlines(keepends=True):
96+
stream.begin_http_chunk_receiving()
97+
stream.feed_data(line)
98+
stream.end_http_chunk_receiving()
99+
stream.feed_eof()
100+
101+
it = iter([b'line1\n', b'line2\n', b'line3\n'])
102+
async for data, end_of_chunk in stream.iter_chunks():
103+
assert (data, end_of_chunk) == (next(it), True)
104+
pytest.raises(StopIteration, next, it)

0 commit comments

Comments
 (0)