Skip to content

Commit b66be53

Browse files
author
Adriano Sanges
committed
Refactor scan_properties.py to simplify script structure and remove type annotations
- Remove main() function and move code directly under `__main__` block - Simplify imports and remove unused type hints - Update DataFrame iteration method to use `iter_rows(named=True)` - Streamline variable assignments and message sending logic
1 parent a8205c2 commit b66be53

File tree

1 file changed

+20
-22
lines changed

1 file changed

+20
-22
lines changed

real-estate-etl/scan_properties.py

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,44 @@
1-
from typing import List, Dict, Any
21
from bs4 import BeautifulSoup
3-
import polars as pl
2+
import polars
43
import duckdb
54
from scraper import parse_listing
65
from database import clean_properties, get_new_properties
76
from dotenv import load_dotenv
87
import os
98
from telegram_api import send_message, format_property_message
109

11-
def main() -> None:
12-
"""Main function to scrape properties and send messages."""
13-
load_dotenv()
1410

15-
url: str = os.getenv("scrape_url")
16-
warehouse_name: str = os.getenv("warehouse_name")
17-
motherduck_token: str = os.getenv("motherduck_token")
1811

19-
if not url or not warehouse_name or not motherduck_token:
20-
raise ValueError("Environment variables for URL, warehouse name, or token are not set.")
12+
if __name__ == "__main__":
13+
14+
15+
load_dotenv()
2116

22-
data: List[Dict[str, Any]] = parse_listing(url)
17+
url = os.getenv("scrape_url")
18+
warehouse_name = os.getenv("warehouse_name")
19+
motherduck_token = os.getenv("motherduck_token")
2320

24-
polars_df: pl.DataFrame = pl.DataFrame(data)
21+
data = parse_listing(url)
22+
23+
polars_df = polars.DataFrame(data)
2524

2625
con = duckdb.connect(f"md:{warehouse_name}?motherduck_token={motherduck_token}")
2726

2827
con.sql("create table if not exists main.properties as select * from polars_df")
2928

3029
clean_properties(con)
3130

32-
new_properties: pl.DataFrame = get_new_properties(con)
33-
34-
# Format and send messages
35-
messages: List[str] = [format_property_message(row) for row in new_properties.to_dicts()]
31+
new_properties = get_new_properties(con)
32+
# Iterate over the DataFrame and format each property
33+
messages = [format_property_message(row) for row in new_properties.iter_rows(named=True)]
3634

37-
# Send messages in chunks of two
35+
# Send messages in chunks of two
3836
for i in range(0, len(messages), 2):
39-
message_chunk: List[str] = messages[i:i+2]
40-
full_message: str = "\n\n".join(message_chunk)
37+
# Get the current chunk of two messages
38+
message_chunk = messages[i:i+2]
39+
# Join the two messages with a separator
40+
full_message = "\n\n".join(message_chunk)
41+
# Send the combined message
4142
send_message(full_message)
4243

43-
if __name__ == "__main__":
44-
main()
45-
4644

0 commit comments

Comments
 (0)