Skip to content

Commit c4909d3

Browse files
committed
add ignore_cutoff=true variable for manual workflow triggers
1 parent f8744ad commit c4909d3

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

.github/workflows/process-single-user.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
# --- Apify Token ---
4444
APIFY_API_TOKEN: ${{ secrets.APIFY_API_TOKEN }}
4545
TARGET_USERNAME: ${{ github.event.client_payload.username || inputs.username }}
46+
IGNORE_CUTOFF: ${{ github.event_name == 'workflow_dispatch' && 'true' || 'false' }}
4647
MAX_CONCURRENT_TASKS: 1
4748

4849
steps:

backend/scraping/main.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,16 @@ def main():
7171
processor = EventProcessor(concurrency=5)
7272

7373
# Configure run based on mode
74+
ignore_cutoff = os.getenv("IGNORE_CUTOFF", "false").lower() == "true"
75+
7476
if mode == "single":
75-
# Single user: 1 day lookback, 1 post limit
76-
posts = scraper.scrape(targets[0], results_limit=1, cutoff_days=1)
77+
# Single user
78+
if ignore_cutoff:
79+
logger.info("Ignoring old post cutoff...")
80+
posts = scraper.scrape(targets[0], results_limit=1, cutoff_days=365 * 5)
81+
else:
82+
# Standard: 1 day lookback
83+
posts = scraper.scrape(targets[0], results_limit=1, cutoff_days=1)
7784
else:
7885
# Batch mode: 4 days lookback, 1 post per account
7986
posts = scraper.scrape(targets, results_limit=1, cutoff_days=4)
@@ -88,7 +95,10 @@ def main():
8895
logger.info("No posts retrieved. Exiting.")
8996
sys.exit(0)
9097

91-
cutoff_date = timezone.now() - timedelta(days=1)
98+
if ignore_cutoff:
99+
cutoff_date = timezone.now() - timedelta(days=365 * 5)
100+
else:
101+
cutoff_date = timezone.now() - timedelta(days=1)
92102
try:
93103
saved_count = asyncio.run(processor.process(posts, cutoff_date))
94104

0 commit comments

Comments
 (0)