Skip to content

Commit bc0f5f4

Browse files
author
Adriano Sanges
committed
Add garage information parsing to property scraping and database schema
1 parent 4b48427 commit bc0f5f4

File tree

4 files changed

+25
-11
lines changed

4 files changed

+25
-11
lines changed

real-estate-etl/database.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def clean_properties(con) -> None:
1313
neighbourhood TEXT,
1414
road TEXT,
1515
square_meters INTEGER,
16-
floor TEXT
16+
floor TEXT,
17+
garage_info TEXT
1718
);
1819
CREATE OR REPLACE TABLE main.new_properties (
1920
url TEXT PRIMARY KEY,
@@ -24,13 +25,14 @@ def clean_properties(con) -> None:
2425
neighbourhood TEXT,
2526
road TEXT,
2627
square_meters INTEGER,
27-
floor TEXT
28+
floor TEXT,
29+
garage_info TEXT
2830
);
2931
"""
3032
con.sql(create_table_query)
3133
insert_query = """
32-
INSERT INTO main.cleaned_properties (url, title, price, city, neighbourhood, road, square_meters, floor)
33-
SELECT url, title, price, city, neighbourhood, road, square_meters, floor
34+
INSERT INTO main.cleaned_properties (url, title, price, city, neighbourhood, road, square_meters, floor, garage_info)
35+
SELECT url, title, price, city, neighbourhood, road, square_meters, floor, garage_info
3436
FROM main.properties
3537
WHERE NOT EXISTS (
3638
SELECT 1
@@ -39,8 +41,8 @@ def clean_properties(con) -> None:
3941
);
4042
"""
4143
insert_query_only_new = """
42-
INSERT INTO main.new_properties (url, title, price, city, neighbourhood, road, square_meters, floor)
43-
SELECT url, title, price, city, neighbourhood, road, square_meters, floor
44+
INSERT INTO main.new_properties (url, title, price, city, neighbourhood, road, square_meters, floor, garage_info)
45+
SELECT url, title, price, city, neighbourhood, road, square_meters, floor, garage_info
4446
FROM main.properties
4547
WHERE NOT EXISTS (
4648
SELECT 1

real-estate-etl/scan_properties.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
import requests
21
from bs4 import BeautifulSoup
3-
import re
4-
import logging
52
import polars
63
import duckdb
74
from scraper import parse_listing

real-estate-etl/scraper.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ def parse_page(url):
2929

3030
floor_match = re.search(r'Piano\s(\d+)', soup.text)
3131
floor = int(floor_match.group(1)) if floor_match else None
32+
33+
# Find the feature item related to parking/garage
34+
garage_feature = listing.find('dt', class_='re-featuresItem__title', string="Box, posti auto")
35+
36+
if garage_feature:
37+
# Get the associated description (dd)
38+
garage_description = garage_feature.find_next('dd', class_='re-featuresItem__description')
39+
garage_info = garage_description.get_text(strip=True) if garage_description else None
40+
else:
41+
garage_info = None
3242

3343
data = {
3444
"url": url,
@@ -40,6 +50,7 @@ def parse_page(url):
4050
"road": road.text.strip() if road else None,
4151
"square_meters": square_meters,
4252
"floor": floor,
53+
"garage_info": garage_info,
4354
}
4455

4556
return data

real-estate-etl/telegram_api.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
import telegram
22
import os
33

4+
from dotenv import load_dotenv
45

5-
TELEGRAM_BOT_API_KEY = os.getenv('telegram_bot_api_key')
6+
load_dotenv()
7+
8+
telegram_bot_api_key = os.getenv('telegram_bot_api_key')
69
chat_id = os.getenv('chat_id')
710
chat_tag = os.getenv('chat_tag')
8-
bot = telegram.Bot(TELEGRAM_BOT_API_KEY)
11+
12+
bot = telegram.Bot(telegram_bot_api_key)
913

1014

1115
# Function to format the message

0 commit comments

Comments
 (0)