fix: scraping multiple URLs with disabled saveSnapshots (#91)

* fix: scraping multiple URLs with disabled `saveSnapshots` * docs: update changelog
apify-projects · Dec 30, 2024 · b304920 · b304920
1 parent 226a7e9
commit b304920
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/code/src/routes/crawl-route.ts b/code/src/routes/crawl-route.ts
@@ -119,10 +119,9 @@ export const crawlRoute = async (context: PlaywrightCrawlingContext<CrawlRouteUs
     const contentMaxTokens = model.modelConfig.maxTokens * 0.9 - instructionTokenLength; // 10% buffer for answer
     const pageContent = maybeShortsTextByTokenLength(originPageContent, contentMaxTokens);
 
-    let snapshotKey: string | undefined;
+    const snapshotKey = Date.now().toString();
     let sentContentKey: string | undefined;
     if (saveSnapshots) {
-        snapshotKey = Date.now().toString();
         sentContentKey = `${snapshotKey}-sentContent.${pageFormat === PAGE_FORMAT.MARKDOWN ? 'md' : 'html'}`;
         await utils.puppeteer.saveSnapshot(page, {
             key: snapshotKey,

diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md
@@ -1,5 +1,9 @@
 This changelog tracks updates to both GTP Scraper and Extended GPT Scraper actors.
 
+# 2024-12-30
+*Fixes*
+- Fixed extraction of multiple URLs with disabled `saveSnapshots` option.
+
 # 2024-11-17
 *Features*
 - Improved GPT call handling, which should parallelize the calls together with the crawling better.