Skip to content

Commit ab63b98

Browse files
authored
Merge pull request #44 from dunderrrrrr/get-ad
feat: get_ad
2 parents a837d3a + e757cd1 commit ab63b98

File tree

9 files changed

+290
-13
lines changed

9 files changed

+290
-13
lines changed

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,16 @@ from blocket_api import (
3838

3939
api = BlocketAPI()
4040

41-
# will search all of blocket
42-
result = api.search(
41+
# search all of blocket
42+
api.search(
4343
"Vinterdäck Audi",
4444
sort_order=SortOrder.PRICE_ASC,
4545
locations=[Location.STOCKHOLM, Location.UPPSALA],
4646
category=Category.FORDONSTILLBEHOR,
4747
)
4848

4949
# search for cars only
50-
result = api.search_car(
50+
api.search_car(
5151
"Audi", # query is optional
5252
sort_order=CarSortOrder.MILEAGE_ASC,
5353
models=[CarModel.AUDI],
@@ -57,6 +57,12 @@ result = api.search_car(
5757
transmission=CarTransmission.MANUAL,
5858
locations=[Location.STOCKHOLM],
5959
)
60+
61+
# Get a recommerce or car ad
62+
from blocket_api import CarAd, RecommerceAd
63+
64+
api.get_ad(RecommerceAd(12345678))
65+
api.get_ad(CarAd(12345678))
6066
```
6167

6268
## 📝 Notes

blocket_api/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from .ad_parser import CarAd as CarAd
2+
from .ad_parser import RecommerceAd as RecommerceAd
13
from .blocket import BlocketAPI as BlocketAPI
24
from .blocket import Location as Location
35
from .constants import CarColor as CarColor

blocket_api/ad_parser.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import json
2+
import re
3+
from dataclasses import dataclass
4+
from typing import Any
5+
6+
from bs4 import BeautifulSoup, Tag
7+
from httpx import Response
8+
9+
from .constants import SITE_URL
10+
11+
12+
@dataclass(frozen=True)
13+
class RecommerceAd:
14+
id: int
15+
16+
@property
17+
def url(self) -> str:
18+
return f"{SITE_URL}/recommerce/forsale/item/{self.id}"
19+
20+
def parse(self, response: Response) -> Any:
21+
soup = BeautifulSoup(response.content, "html.parser")
22+
json_script_tag = soup.select_one(
23+
'script:-soup-contains("window.__staticRouterHydrationData")'
24+
)
25+
if not json_script_tag:
26+
return {}
27+
28+
if match := re.search(r'JSON\.parse\("(.+)"\)', json_script_tag.text):
29+
raw_json = match.group(1)
30+
escaped = raw_json.encode("utf-8").decode("unicode_escape")
31+
return json.loads(escaped.encode("latin1").decode("utf-8"))
32+
else:
33+
return {}
34+
35+
36+
@dataclass(frozen=True)
37+
class CarAd:
38+
id: int
39+
40+
@property
41+
def url(self) -> str:
42+
return f"{SITE_URL}/mobility/item/{self.id}"
43+
44+
def parse(self, response: Response) -> dict:
45+
soup = BeautifulSoup(response.content, "html.parser")
46+
grid = soup.find("div", class_="grid grid-cols-1 md:grid-cols-3 md:gap-x-32")
47+
48+
if not grid:
49+
return {}
50+
51+
data: dict[str, Any] = {"url": self.url}
52+
53+
self._extract_title_and_subtitle(grid, data)
54+
self._extract_quick_specs(grid, data)
55+
self._extract_price(grid, data)
56+
self._extract_description(grid, data)
57+
self._extract_specifications(grid, data)
58+
self._extract_equipment(grid, data)
59+
self._extract_seller_type(soup, data)
60+
self._extract_ad_id(soup, data)
61+
62+
return data
63+
64+
def _extract_title_and_subtitle(self, grid: Tag, data: dict[str, Any]) -> None:
65+
if title := grid.find("h1", class_=lambda x: x and "t1" in x):
66+
data["title"] = title.get_text(strip=True)
67+
68+
if subtitle := grid.find(
69+
"p", class_=lambda x: x and "s-text-subtle" in x and "mt-8" in x
70+
):
71+
data["subtitle"] = subtitle.get_text(strip=True)
72+
73+
def _extract_quick_specs(self, grid: Tag, data: dict[str, Any]) -> None:
74+
if specs_grid := grid.find(
75+
"div", class_=lambda x: x and "grid" in x and "gap-24" in x
76+
):
77+
spec_items = specs_grid.find_all("div", class_="flex gap-16 hyphens-auto")
78+
for item in spec_items:
79+
label = item.find("span", class_="s-text-subtle")
80+
value = item.find("p", class_="m-0 font-bold")
81+
if label and value:
82+
label_text = label.get_text(strip=True)
83+
value_text = value.get_text(strip=True)
84+
85+
key_mapping = {
86+
"Modellår": "model_year",
87+
"Miltal": "mileage",
88+
"Växellåda": "transmission",
89+
"Drivmedel": "fuel",
90+
}
91+
key = key_mapping.get(
92+
label_text, label_text.lower().replace(" ", "_")
93+
)
94+
data[key] = value_text
95+
96+
def _extract_price(self, grid: Tag, data: dict[str, Any]) -> None:
97+
if price_section := grid.find("div", class_="border-t pt-40 mt-40"):
98+
price_labels = price_section.find_all("p", class_="s-text-subtle mb-0")
99+
for price_label in price_labels:
100+
label_text = price_label.get_text(strip=True).lower()
101+
if "pris" in label_text:
102+
price_elem = price_section.find("span", class_="t2")
103+
if price_elem:
104+
data["price"] = price_elem.get_text(strip=True)
105+
break
106+
elif "månadskostnad" in label_text:
107+
monthly_elem = price_section.find("h2", class_="t2")
108+
if monthly_elem:
109+
data["monthly_cost"] = monthly_elem.get_text(strip=True)
110+
break
111+
112+
def _extract_description(self, grid: Tag, data: dict[str, Any]) -> None:
113+
desc_sections = grid.find_all("section", class_="border-t mt-40 pt-40")
114+
for section in desc_sections:
115+
h2 = section.find("h2", class_="t3 mb-0")
116+
if h2 and "beskrivning" in h2.get_text(strip=True).lower():
117+
desc_div = section.find("div", class_="whitespace-pre-wrap")
118+
if desc_div:
119+
data["description"] = desc_div.get_text(strip=True)
120+
break
121+
122+
def _extract_specifications(self, grid: Tag, data: dict[str, Any]) -> None:
123+
specs_section = grid.find("section", class_="key-info-section")
124+
if not specs_section:
125+
return
126+
127+
dl = specs_section.find("dl")
128+
if not dl:
129+
return
130+
131+
specifications: dict[str, str] = {}
132+
divs = dl.find_all("div", style="break-inside:avoid-column")
133+
for div in divs:
134+
dt = div.find("dt")
135+
dd = div.find("dd")
136+
if dt and dd:
137+
key_text = dt.get_text(strip=True)
138+
value_text = dd.get_text(strip=True)
139+
specifications[key_text] = value_text
140+
141+
if specifications:
142+
data["specifications"] = specifications
143+
144+
def _extract_equipment(self, grid: Tag, data: dict[str, Any]) -> None:
145+
equipment_section: Tag | None = None
146+
for section in grid.find_all("section", class_="border-t pt-40 mt-40"):
147+
h2 = section.find("h2", class_="t3 mb-0")
148+
if h2 and "utrustning" in h2.get_text(strip=True).lower():
149+
equipment_section = section
150+
break
151+
152+
if not equipment_section:
153+
return
154+
155+
equipment_list = equipment_section.find("ul")
156+
if equipment_list:
157+
equipment_items: list[str] = [
158+
li.get_text(strip=True) for li in equipment_list.find_all("li")
159+
]
160+
if equipment_items:
161+
data["equipment"] = equipment_items
162+
163+
def _extract_seller_type(self, soup: BeautifulSoup, data: dict[str, Any]) -> None:
164+
seller_type = (
165+
"dealer"
166+
if soup.find("div", class_=lambda x: x and "dealer" in str(x).lower())
167+
else "private"
168+
)
169+
data["seller_type"] = seller_type
170+
171+
def _extract_ad_id(self, soup: BeautifulSoup, data: dict[str, Any]) -> None:
172+
ad_info_divs = soup.find_all(
173+
"div", class_="text-m flex md:flex-row flex-col md:gap-x-56 gap-y-16"
174+
)
175+
for div in ad_info_divs:
176+
ad_id_labels = div.find_all("p", class_="s-text-subtle mb-0")
177+
for ad_id_label in ad_id_labels:
178+
if "Annons-ID" in ad_id_label.get_text(strip=True):
179+
ad_id_elem = ad_id_label.find_next_sibling("p")
180+
if ad_id_elem:
181+
data["ad_id"] = ad_id_elem.get_text(strip=True)
182+
break

blocket_api/blocket.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
from typing import Any
55

66
import httpx
7+
from httpx import Response
78

9+
from .ad_parser import CarAd, RecommerceAd
810
from .constants import (
911
HEADERS,
1012
SITE_URL,
@@ -29,14 +31,14 @@ def _request(
2931
*,
3032
url: str,
3133
params: list[QueryParam],
32-
) -> Any:
34+
) -> Response:
3335
response = httpx.get(
3436
url,
3537
headers=HEADERS,
3638
params=[(param.name, param.value) for param in params],
3739
)
3840
response.raise_for_status()
39-
return response.json()
41+
return response
4042

4143

4244
@dataclass(frozen=True)
@@ -62,7 +64,7 @@ def search(
6264
*([QueryParam("sub_category", sub_category.value)] if sub_category else []),
6365
]
6466

65-
return _request(url=url, params=params)
67+
return _request(url=url, params=params).json()
6668

6769
def search_car(
6870
self,
@@ -96,4 +98,8 @@ def search_car(
9698
*[QueryParam("transmission", t.value) for t in transmission],
9799
]
98100

99-
return _request(url=url, params=params)
101+
return _request(url=url, params=params).json()
102+
103+
def get_ad(self, ad: RecommerceAd | CarAd) -> dict:
104+
response = _request(url=ad.url, params=[])
105+
return ad.parse(response)

blocket_api/constants.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,24 +48,20 @@ class Location(StrEnum):
4848
VASTERBOTTEN = "0.300024"
4949
VASTERNORRLAND = "0.300022"
5050
VASTMANLAND = "0.300019"
51-
VASTRA_GÖTALAND = "0.300014"
51+
VASTRA_GOTALAND = "0.300014"
5252
OREBRO = "0.300018"
5353
OSTERGOTLAND = "0.300005"
5454

5555

5656
class Category(StrEnum):
5757
AFFARSVERKSAMHET = "0.91"
58-
# BIL_OCH_HUSVAGN = None
59-
# BAT = None
6058
DJUR_OCH_TILLBEHOR = "0.77"
6159
ELEKTRONIK_OCH_VITVAROR = "0.93"
62-
# ENTREPRENAD__OCH_LANTBRUKSMASKINER = None
6360
FORDONSTILLBEHOR = "0.90"
6461
FRITID_HOBBY_OCH_UNDERHALLNING = "0.86"
6562
FORALDRAR_OCH_BARN = "0.68"
6663
KLADER_KOSMETIKA_OCH_ACCESSOARER = "0.71"
6764
KONST_OCH_ANTIKT = "0.76"
68-
# MOTORCYKLAR = None
6965
MOBLER_OCH_INREDNING = "0.78"
7066
SPORT_OCH_FRITID = "0.69"
7167
TRADGARD_OCH_RENOVERING = "0.67"

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies = [
1616
"mkdocs-material>=9.6.21",
1717
"mkdocs-swagger-ui-tag>=0.7.2",
1818
"websockets>=15.0.1",
19+
"beautifulsoup4>=4.14.2",
1920
]
2021

2122

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ babel==2.17.0
99
backrefs==5.9
1010
# via mkdocs-material
1111
beautifulsoup4==4.14.2
12-
# via mkdocs-swagger-ui-tag
12+
# via
13+
# blocket-api (pyproject.toml)
14+
# mkdocs-swagger-ui-tag
1315
certifi==2025.6.15
1416
# via
1517
# httpcore

0 commit comments

Comments
 (0)