From 51f2432a042001ad5358fb413147d6c2bb0da86f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Pr=C5=AF=C5=A1a?= <87543374+Patai5@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:49:53 +0200 Subject: [PATCH] fix: use initial cookies hook (#77) * fix: use initial cookies hook * docs: update changelog --- code/src/crawler.ts | 2 ++ shared/CHANGELOG.md | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/code/src/crawler.ts b/code/src/crawler.ts index 6040d12..c9bd37a 100644 --- a/code/src/crawler.ts +++ b/code/src/crawler.ts @@ -1,5 +1,6 @@ import { Dataset, NonRetryableError, PlaywrightCrawler, createRequestDebugInfo, log } from 'crawlee'; +import { initialCookiesHook } from './hooks/initial-cookies.js'; import { crawlRoute } from './routes/crawl-route.js'; import { Config } from './types/config.js'; import { CrawlerState } from './types/crawler-state.js'; @@ -26,6 +27,7 @@ export const createCrawler = async (config: Config) => { maxRequestsPerCrawl: maxPagesPerCrawl, requestHandler: crawlRoute, preNavigationHooks: [ + initialCookiesHook, async () => { const state = await crawler.useState(); if (state.pagesOpened >= maxPagesPerCrawl) { diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index 8850913..fd5a4b2 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -1,5 +1,9 @@ This changelog tracks updates to both GTP Scraper and Extended GPT Scraper actors. +# 2024-10-07 +*Fixes* +- Fixed initial cookies not being set correctly from input. + # 2024-09-22 *Fixes* - Fixed a bug where HTML minimization was failing on some specific websites.