Skip to content

Commit f7d2c06

Browse files
authored
feat: add support for AI crawl bots (#577)
1 parent 771dfb2 commit f7d2c06

File tree

4 files changed

+603
-8
lines changed

4 files changed

+603
-8
lines changed

src/constants.js

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,39 @@ export const BROWSER_ALIASES_MAP = {
88
Bada: 'bada',
99
BingCrawler: 'bingcrawler',
1010
BlackBerry: 'blackberry',
11+
'ChatGPT-User': 'chatgpt_user',
1112
Chrome: 'chrome',
13+
ClaudeBot: 'claudebot',
1214
Chromium: 'chromium',
15+
Diffbot: 'diffbot',
1316
DuckDuckBot: 'duckduckbot',
1417
Electron: 'electron',
1518
Epiphany: 'epiphany',
19+
FacebookExternalHit: 'facebookexternalhit',
1620
Firefox: 'firefox',
1721
Focus: 'focus',
1822
Generic: 'generic',
1923
'Google Search': 'google_search',
2024
Googlebot: 'googlebot',
25+
GPTBot: 'gptbot',
2126
'Internet Explorer': 'ie',
2227
InternetArchiveCrawler: 'internetarchivecrawler',
2328
'K-Meleon': 'k_meleon',
2429
Maxthon: 'maxthon',
25-
MetaWebCrawler: 'metawebcrawler',
30+
'Meta-ExternalAds': 'meta_externalads',
31+
'Meta-ExternalAgent': 'meta_externalagent',
32+
'Meta-ExternalFetcher': 'meta_externalfetcher',
33+
'Meta-WebIndexer': 'meta_webindexer',
2634
'Microsoft Edge': 'edge',
2735
'MZ Browser': 'mz',
2836
'NAVER Whale Browser': 'naver',
37+
'OAI-SearchBot': 'oai_searchbot',
38+
Omgilibot: 'omgilibot',
2939
Opera: 'opera',
3040
'Opera Coast': 'opera_coast',
3141
'Pale Moon': 'pale_moon',
42+
PerplexityBot: 'perplexitybot',
43+
'Perplexity-User': 'perplexity_user',
3244
PhantomJS: 'phantomjs',
3345
PingdomBot: 'pingdombot',
3446
Puffin: 'puffin',
@@ -50,6 +62,7 @@ export const BROWSER_ALIASES_MAP = {
5062
YahooSlurp: 'yahooslurp',
5163
'Yandex Browser': 'yandex',
5264
YandexBot: 'yandexbot',
65+
YouBot: 'youbot',
5366
};
5467

5568
export const BROWSER_MAP = {
@@ -60,27 +73,39 @@ export const BROWSER_MAP = {
6073
bada: 'Bada',
6174
bingcrawler: 'BingCrawler',
6275
blackberry: 'BlackBerry',
76+
chatgpt_user: 'ChatGPT-User',
6377
chrome: 'Chrome',
78+
claudebot: 'ClaudeBot',
6479
chromium: 'Chromium',
80+
diffbot: 'Diffbot',
6581
duckduckbot: 'DuckDuckBot',
6682
edge: 'Microsoft Edge',
6783
electron: 'Electron',
6884
epiphany: 'Epiphany',
85+
facebookexternalhit: 'FacebookExternalHit',
6986
firefox: 'Firefox',
7087
focus: 'Focus',
7188
generic: 'Generic',
7289
google_search: 'Google Search',
7390
googlebot: 'Googlebot',
91+
gptbot: 'GPTBot',
7492
ie: 'Internet Explorer',
7593
internetarchivecrawler: 'InternetArchiveCrawler',
7694
k_meleon: 'K-Meleon',
7795
maxthon: 'Maxthon',
78-
metawebcrawler: 'MetaWebCrawler',
96+
meta_externalads: 'Meta-ExternalAds',
97+
meta_externalagent: 'Meta-ExternalAgent',
98+
meta_externalfetcher: 'Meta-ExternalFetcher',
99+
meta_webindexer: 'Meta-WebIndexer',
79100
mz: 'MZ Browser',
80101
naver: 'NAVER Whale Browser',
102+
oai_searchbot: 'OAI-SearchBot',
103+
omgilibot: 'Omgilibot',
81104
opera: 'Opera',
82105
opera_coast: 'Opera Coast',
83106
pale_moon: 'Pale Moon',
107+
perplexitybot: 'PerplexityBot',
108+
perplexity_user: 'Perplexity-User',
84109
phantomjs: 'PhantomJS',
85110
pingdombot: 'PingdomBot',
86111
puffin: 'Puffin',
@@ -102,6 +127,7 @@ export const BROWSER_MAP = {
102127
yahooslurp: 'YahooSlurp',
103128
yandex: 'Yandex Browser',
104129
yandexbot: 'YandexBot',
130+
youbot: 'YouBot',
105131
};
106132

107133
export const PLATFORMS_MAP = {

src/parser-browsers.js

Lines changed: 223 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,227 @@ import Utils from './utils.js';
2828
const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i;
2929

3030
const browsersList = [
31+
/* GPTBot */
32+
{
33+
test: [/gptbot/i],
34+
describe(ua) {
35+
const browser = {
36+
name: 'GPTBot',
37+
};
38+
const version = Utils.getFirstMatch(/gptbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
39+
40+
if (version) {
41+
browser.version = version;
42+
}
43+
44+
return browser;
45+
},
46+
},
47+
48+
/* ChatGPT-User */
49+
{
50+
test: [/chatgpt-user/i],
51+
describe(ua) {
52+
const browser = {
53+
name: 'ChatGPT-User',
54+
};
55+
const version = Utils.getFirstMatch(/chatgpt-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
56+
57+
if (version) {
58+
browser.version = version;
59+
}
60+
61+
return browser;
62+
},
63+
},
64+
65+
/* OAI-SearchBot */
66+
{
67+
test: [/oai-searchbot/i],
68+
describe(ua) {
69+
const browser = {
70+
name: 'OAI-SearchBot',
71+
};
72+
const version = Utils.getFirstMatch(/oai-searchbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
73+
74+
if (version) {
75+
browser.version = version;
76+
}
77+
78+
return browser;
79+
},
80+
},
81+
82+
/* ClaudeBot */
83+
{
84+
test: [/claudebot/i, /claude-web/i, /claude-user/i, /claude-searchbot/i],
85+
describe(ua) {
86+
const browser = {
87+
name: 'ClaudeBot',
88+
};
89+
const version = Utils.getFirstMatch(/(?:claudebot|claude-web|claude-user|claude-searchbot)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
90+
91+
if (version) {
92+
browser.version = version;
93+
}
94+
95+
return browser;
96+
},
97+
},
98+
99+
/* Omgilibot */
100+
{
101+
test: [/omgilibot/i, /webzio-extended/i],
102+
describe(ua) {
103+
const browser = {
104+
name: 'Omgilibot',
105+
};
106+
const version = Utils.getFirstMatch(/(?:omgilibot|webzio-extended)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
107+
108+
if (version) {
109+
browser.version = version;
110+
}
111+
112+
return browser;
113+
},
114+
},
115+
116+
/* Diffbot */
117+
{
118+
test: [/diffbot/i],
119+
describe(ua) {
120+
const browser = {
121+
name: 'Diffbot',
122+
};
123+
const version = Utils.getFirstMatch(/diffbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
124+
125+
if (version) {
126+
browser.version = version;
127+
}
128+
129+
return browser;
130+
},
131+
},
132+
133+
/* PerplexityBot */
134+
{
135+
test: [/perplexitybot/i],
136+
describe(ua) {
137+
const browser = {
138+
name: 'PerplexityBot',
139+
};
140+
const version = Utils.getFirstMatch(/perplexitybot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
141+
142+
if (version) {
143+
browser.version = version;
144+
}
145+
146+
return browser;
147+
},
148+
},
149+
150+
/* Perplexity-User */
151+
{
152+
test: [/perplexity-user/i],
153+
describe(ua) {
154+
const browser = {
155+
name: 'Perplexity-User',
156+
};
157+
const version = Utils.getFirstMatch(/perplexity-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
158+
159+
if (version) {
160+
browser.version = version;
161+
}
162+
163+
return browser;
164+
},
165+
},
166+
167+
/* YouBot */
168+
{
169+
test: [/youbot/i],
170+
describe(ua) {
171+
const browser = {
172+
name: 'YouBot',
173+
};
174+
const version = Utils.getFirstMatch(/youbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
175+
176+
if (version) {
177+
browser.version = version;
178+
}
179+
180+
return browser;
181+
},
182+
},
183+
184+
/* Meta-WebIndexer */
185+
{
186+
test: [/meta-webindexer/i],
187+
describe(ua) {
188+
const browser = {
189+
name: 'Meta-WebIndexer',
190+
};
191+
const version = Utils.getFirstMatch(/meta-webindexer\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
192+
193+
if (version) {
194+
browser.version = version;
195+
}
196+
197+
return browser;
198+
},
199+
},
200+
201+
/* Meta-ExternalAds */
202+
{
203+
test: [/meta-externalads/i],
204+
describe(ua) {
205+
const browser = {
206+
name: 'Meta-ExternalAds',
207+
};
208+
const version = Utils.getFirstMatch(/meta-externalads\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
209+
210+
if (version) {
211+
browser.version = version;
212+
}
213+
214+
return browser;
215+
},
216+
},
217+
218+
/* Meta-ExternalAgent */
219+
{
220+
test: [/meta-externalagent/i],
221+
describe(ua) {
222+
const browser = {
223+
name: 'Meta-ExternalAgent',
224+
};
225+
const version = Utils.getFirstMatch(/meta-externalagent\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
226+
227+
if (version) {
228+
browser.version = version;
229+
}
230+
231+
return browser;
232+
},
233+
},
234+
235+
/* Meta-ExternalFetcher */
236+
{
237+
test: [/meta-externalfetcher/i],
238+
describe(ua) {
239+
const browser = {
240+
name: 'Meta-ExternalFetcher',
241+
};
242+
const version = Utils.getFirstMatch(/meta-externalfetcher\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
243+
244+
if (version) {
245+
browser.version = version;
246+
}
247+
248+
return browser;
249+
},
250+
},
251+
31252
/* Googlebot */
32253
{
33254
test: [/googlebot/i],
@@ -130,12 +351,12 @@ const browsersList = [
130351
},
131352
},
132353

133-
/* MetaWebCrawler */
354+
/* FacebookExternalHit */
134355
{
135356
test: [/facebookexternalhit/i, /facebookcatalog/i],
136357
describe() {
137358
return {
138-
name: 'MetaWebCrawler',
359+
name: 'FacebookExternalHit',
139360
};
140361
},
141362
},

0 commit comments

Comments
 (0)