Skip to content

Commit b0bf887

Browse files
committed
refactor logging paths, fix django async issue
1 parent ab09afd commit b0bf887

File tree

6 files changed

+15
-13
lines changed

6 files changed

+15
-13
lines changed

.github/workflows/process-single-user.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ jobs:
7575
with:
7676
name: logs-${{ github.run_number }}
7777
path: |
78-
backend/scraping/logs/events_scraped.csv
79-
backend/scraping/logs/scraping.log
78+
backend/scraping/events_scraped.csv
79+
backend/scraping/scraping.log
8080
backend/scraping/apify_raw_results.json
8181
if-no-files-found: 'ignore'

.github/workflows/update-events-data.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ jobs:
7373
with:
7474
name: logs-${{ github.run_number }}
7575
path: |
76-
backend/scraping/logs/events_scraped.csv
77-
backend/scraping/logs/scraping.log
76+
backend/scraping/events_scraped.csv
77+
backend/scraping/scraping.log
7878
backend/scraping/apify_raw_results.json
7979
if-no-files-found: 'ignore'

backend/scraping/event_processor.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import asyncio
22
import os
33
import sys
4-
from functools import lru_cache
54

65
# Set up Django
76
if "django" not in sys.modules:
@@ -35,9 +34,8 @@ def __init__(self, concurrency=5):
3534
self.concurrency = concurrency
3635
self.semaphore = asyncio.Semaphore(concurrency)
3736

38-
@staticmethod
39-
@lru_cache(maxsize=512)
40-
def _get_club_type(ig_handle):
37+
@sync_to_async(thread_sensitive=True)
38+
def _get_club_type(self, ig_handle):
4139
try:
4240
return Clubs.objects.get(ig=ig_handle).club_type
4341
except Clubs.DoesNotExist:

backend/scraping/logging_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys
33
from pathlib import Path
44

5-
LOG_DIR = Path("logs")
5+
LOG_DIR = Path(".")
66
LOG_DIR.mkdir(exist_ok=True)
77
LOG_FILE = LOG_DIR / "scraping.log"
88

backend/scraping/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import asyncio
2+
import json
23
import os
34
import sys
45
from datetime import timedelta
6+
from pathlib import Path
57

68
# 1. Setup Django
79
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -45,6 +47,10 @@ def main():
4547
else:
4648
posts = scraper.scrape(targets)
4749

50+
raw_path = Path(__file__).parent / "apify_raw_results.json"
51+
with raw_path.open("w", encoding="utf-8") as f:
52+
json.dump(posts, f, ensure_ascii=False, indent=2)
53+
4854
if not posts:
4955
logger.info("No posts retrieved. Exiting.")
5056
sys.exit(0)

backend/utils/scraping_utils.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,8 @@ def append_event_to_csv(
207207
added_to_db="success",
208208
club_type=None,
209209
):
210-
logs_dir = Path(__file__).parent / "logs"
211-
logs_dir.mkdir(parents=True, exist_ok=True)
212-
csv_file = logs_dir / "events_scraped.csv"
213-
210+
csv_file = Path(__file__).parent.parent / "scraping" / "events_scraped.csv"
211+
csv_file.parent.mkdir(parents=True, exist_ok=True)
214212
file_exists = csv_file.exists()
215213

216214
occurrences = event_data.get("occurrences", []) or []

0 commit comments

Comments
 (0)