|
1 | | -from typing import List, Dict, Any |
2 | 1 | from bs4 import BeautifulSoup |
3 | | -import polars as pl |
| 2 | +import polars |
4 | 3 | import duckdb |
5 | 4 | from scraper import parse_listing |
6 | 5 | from database import clean_properties, get_new_properties |
7 | 6 | from dotenv import load_dotenv |
8 | 7 | import os |
9 | 8 | from telegram_api import send_message, format_property_message |
10 | 9 |
|
11 | | -def main() -> None: |
12 | | - """Main function to scrape properties and send messages.""" |
13 | | - load_dotenv() |
14 | 10 |
|
15 | | - url: str = os.getenv("scrape_url") |
16 | | - warehouse_name: str = os.getenv("warehouse_name") |
17 | | - motherduck_token: str = os.getenv("motherduck_token") |
18 | 11 |
|
19 | | - if not url or not warehouse_name or not motherduck_token: |
20 | | - raise ValueError("Environment variables for URL, warehouse name, or token are not set.") |
| 12 | +if __name__ == "__main__": |
| 13 | + |
| 14 | + |
| 15 | + load_dotenv() |
21 | 16 |
|
22 | | - data: List[Dict[str, Any]] = parse_listing(url) |
| 17 | + url = os.getenv("scrape_url") |
| 18 | + warehouse_name = os.getenv("warehouse_name") |
| 19 | + motherduck_token = os.getenv("motherduck_token") |
23 | 20 |
|
24 | | - polars_df: pl.DataFrame = pl.DataFrame(data) |
| 21 | + data = parse_listing(url) |
| 22 | + |
| 23 | + polars_df = polars.DataFrame(data) |
25 | 24 |
|
26 | 25 | con = duckdb.connect(f"md:{warehouse_name}?motherduck_token={motherduck_token}") |
27 | 26 |
|
28 | 27 | con.sql("create table if not exists main.properties as select * from polars_df") |
29 | 28 |
|
30 | 29 | clean_properties(con) |
31 | 30 |
|
32 | | - new_properties: pl.DataFrame = get_new_properties(con) |
33 | | - |
34 | | - # Format and send messages |
35 | | - messages: List[str] = [format_property_message(row) for row in new_properties.to_dicts()] |
| 31 | + new_properties = get_new_properties(con) |
| 32 | + # Iterate over the DataFrame and format each property |
| 33 | + messages = [format_property_message(row) for row in new_properties.iter_rows(named=True)] |
36 | 34 |
|
37 | | - # Send messages in chunks of two |
| 35 | +# Send messages in chunks of two |
38 | 36 | for i in range(0, len(messages), 2): |
39 | | - message_chunk: List[str] = messages[i:i+2] |
40 | | - full_message: str = "\n\n".join(message_chunk) |
| 37 | + # Get the current chunk of two messages |
| 38 | + message_chunk = messages[i:i+2] |
| 39 | + # Join the two messages with a separator |
| 40 | + full_message = "\n\n".join(message_chunk) |
| 41 | + # Send the combined message |
41 | 42 | send_message(full_message) |
42 | 43 |
|
43 | | -if __name__ == "__main__": |
44 | | - main() |
45 | | - |
46 | 44 |
|
0 commit comments