Skip to content

Commit 50f85a3

Browse files
authored
Merge pull request #185 from ZephyrZenn/main
fix: correctly set cookies
2 parents 2ca876e + 90c446e commit 50f85a3

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

CHANGELOG.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
# [1.5.0](https://github.com/BuilderIO/gpt-crawler/compare/v1.4.0...v1.5.0) (2024-07-05)
22

3-
43
### Features
54

6-
* git clone depth limit in docker ([87767db](https://github.com/BuilderIO/gpt-crawler/commit/87767dbda99b3259d44ec2c02dceb3a59bb2ca3c))
5+
- git clone depth limit in docker ([87767db](https://github.com/BuilderIO/gpt-crawler/commit/87767dbda99b3259d44ec2c02dceb3a59bb2ca3c))
76

87
# [1.4.0](https://github.com/BuilderIO/gpt-crawler/compare/v1.3.0...v1.4.0) (2024-01-15)
98

src/core.ts

+11-8
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,28 @@ export async function crawl(config: Config) {
104104
// Uncomment this option to see the browser window.
105105
// headless: false,
106106
preNavigationHooks: [
107-
// Abort requests for certain resource types
108-
async ({ request, page, log }) => {
109-
// If there are no resource exclusions, return
110-
const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? [];
111-
if (RESOURCE_EXCLUSTIONS.length === 0) {
112-
return;
113-
}
107+
// Abort requests for certain resource types and add cookies
108+
async (crawlingContext, _gotoOptions) => {
109+
const { request, page, log } = crawlingContext;
110+
// Add cookies to the page
111+
// Because the crawler has not yet navigated to the page, so the loadedUrl is always undefined. Use the request url instead.
114112
if (config.cookie) {
115113
const cookies = (
116114
Array.isArray(config.cookie) ? config.cookie : [config.cookie]
117115
).map((cookie) => {
118116
return {
119117
name: cookie.name,
120118
value: cookie.value,
121-
url: request.loadedUrl,
119+
url: request.url,
122120
};
123121
});
124122
await page.context().addCookies(cookies);
125123
}
124+
const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? [];
125+
// If there are no resource exclusions, return
126+
if (RESOURCE_EXCLUSTIONS.length === 0) {
127+
return;
128+
}
126129
await page.route(
127130
`**\/*.{${RESOURCE_EXCLUSTIONS.join()}}`,
128131
(route) => route.abort("aborted"),

0 commit comments

Comments
 (0)