@@ -2,6 +2,34 @@ const isSpider = (userAgent) => {
22 if ( typeof userAgent !== 'string' ) {
33 return false ;
44 }
5+
6+ // This list in in large based on https://github.com/monperrus/crawler-user-agents/blob/master/crawler-user-agents.json
7+ // which is under this license:
8+ // The MIT License (MIT)
9+ // Copyright (c) 2017 Martin Monperrus
10+ //
11+ // Permission is hereby granted, free of charge, to any person obtaining a copy of
12+ // this software and associated documentation files (the "Software"), to deal in
13+ // the Software without restriction, including without limitation the rights to
14+ // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15+ // the Software, and to permit persons to whom the Software is furnished to do so,
16+ // subject to the following conditions:
17+ //
18+ // The above copyright notice and this permission notice shall be included in all
19+ // copies or substantial portions of the Software.
20+ //
21+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23+ // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24+ // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25+ // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26+ // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27+ //
28+ // There are some differences though; this list is much smaller because we only want to include
29+ // crawlers where the extra open graph tags we put in will actually be useful to end users.
30+ // So we want things like search engines, social media, and chat apps generating link previews,
31+ // but don't need to waste time handling AI bots slurping up the entire internet, we have no
32+ // useful data for them anyways.
533 return (
634 userAgent . includes ( 'Googlebot' ) ||
735 userAgent . includes ( 'Discordbot' ) ||
@@ -20,7 +48,17 @@ const isSpider = (userAgent) => {
2048 userAgent . includes ( 'DuckDuckBot' ) ||
2149 userAgent . includes ( 'facebookexternalhit' ) ||
2250 userAgent . includes ( 'Applebot' ) ||
23- userAgent . includes ( 'MojeekBot' )
51+ userAgent . includes ( 'MojeekBot' ) ||
52+ userAgent . includes ( 'Bluesky' ) ||
53+ userAgent . includes ( 'Skype' ) ||
54+ userAgent . includes ( 'Pinterest' ) ||
55+ userAgent . includes ( 'search.marginalia.nu' ) ||
56+ userAgent . includes ( 'Valve/Steam' ) ||
57+ userAgent . includes ( 'Iframely' ) ||
58+ userAgent . includes ( 'opengraph' ) ||
59+ userAgent . includes ( 'OpenGraph' ) ||
60+ userAgent . includes ( 'GroupMeBot' ) ||
61+ userAgent . includes ( 'KeybaseBot' )
2462 ) ;
2563} ;
2664
0 commit comments