Skip to content

Commit e8b4d2d

Browse files
Mantisusvdusek
andauthored
fix: Fix serialization payload in Request. Fix Docs for Post Request (#683)
### Description An incorrect serialization for `payload` is happening now. `payload` is represented as a byte string, but bytes cannot be serialized to json. Therefore, when loading and unloading from the queue (`_persist_single_request_to_storage`, `_json_to_request`) we get strings of format “b'test'” Also corrected the documentation, as when filling forms the data should be passed not in json format and have the appropriate header. ### Issues - Closes: #668 ### Testing Updated `test_request_state_serialization` to also take `payload` into account when serializing and deserializing `Request` ### Checklist - [x] CI passed --------- Co-authored-by: Vlada Dusek <[email protected]>
1 parent 33be41e commit e8b4d2d

File tree

4 files changed

+14
-16
lines changed

4 files changed

+14
-16
lines changed

docs/examples/code/fill_and_submit_web_form_crawler.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import asyncio
2-
import json
2+
from urllib.parse import urlencode
33

44
from crawlee import Request
55
from crawlee.http_crawler import HttpCrawler, HttpCrawlingContext
@@ -19,7 +19,8 @@ async def request_handler(context: HttpCrawlingContext) -> None:
1919
request = Request.from_url(
2020
url='https://httpbin.org/post',
2121
method='POST',
22-
payload=json.dumps(
22+
headers={'content-type': 'application/x-www-form-urlencoded'},
23+
payload=urlencode(
2324
{
2425
'custname': 'John Doe',
2526
'custtel': '1234567890',

docs/examples/code/fill_and_submit_web_form_request.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
import json
1+
from urllib.parse import urlencode
22

33
from crawlee import Request
44

55
# Prepare a POST request to the form endpoint.
66
request = Request.from_url(
77
url='https://httpbin.org/post',
88
method='POST',
9-
payload=json.dumps(
9+
headers={'content-type': 'application/x-www-form-urlencoded'},
10+
payload=urlencode(
1011
{
1112
'custname': 'John Doe',
1213
'custtel': '1234567890',

src/crawlee/_request.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,7 @@
88
from enum import IntEnum
99
from typing import Annotated, Any, cast
1010

11-
from pydantic import (
12-
BaseModel,
13-
BeforeValidator,
14-
ConfigDict,
15-
Field,
16-
PlainSerializer,
17-
PlainValidator,
18-
TypeAdapter,
19-
)
11+
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, PlainSerializer, PlainValidator, TypeAdapter
2012
from typing_extensions import Self
2113

2214
from crawlee._types import EnqueueStrategy, HttpHeaders, HttpMethod, HttpPayload, JsonSerializable
@@ -143,7 +135,11 @@ class BaseRequestData(BaseModel):
143135
headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
144136
"""HTTP request headers."""
145137

146-
payload: HttpPayload | None = None
138+
payload: Annotated[
139+
HttpPayload | None,
140+
BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
141+
PlainSerializer(lambda v: v.decode() if isinstance(v, bytes) else None),
142+
] = None
147143
"""HTTP request payload."""
148144

149145
user_data: Annotated[

tests/unit/_memory_storage_client/test_request_queue_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,19 @@ async def test_list_head(request_queue_client: RequestQueueClient) -> None:
9393

9494

9595
async def test_request_state_serialization(request_queue_client: RequestQueueClient) -> None:
96-
request = Request.from_url('https://crawlee.dev')
96+
request = Request.from_url('https://crawlee.dev', payload=b'test')
9797
request.state = RequestState.UNPROCESSED
9898

9999
await request_queue_client.add_request(request)
100100

101101
result = await request_queue_client.list_head()
102-
103102
assert len(result.items) == 1
104103
assert result.items[0] == request
105104

106105
got_request = await request_queue_client.get_request(request.id)
107106

108107
assert request == got_request
108+
assert request.payload == got_request.payload
109109

110110

111111
async def test_add_record(request_queue_client: RequestQueueClient) -> None:

0 commit comments

Comments
 (0)