Skip to content

Commit 89bf42a

Browse files
committed
format
1 parent 413fea5 commit 89bf42a

File tree

1 file changed

+60
-60
lines changed

1 file changed

+60
-60
lines changed

apps/docs/lib/ai-agent-detection.ts

Lines changed: 60 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -18,84 +18,84 @@
1818
// Layer 1: Known AI agent UA substrings (lowercase).
1919
const AI_AGENT_UA_PATTERNS = [
2020
// Anthropic — https://support.claude.com/en/articles/8896518
21-
"claudebot",
22-
"claude-searchbot",
23-
"claude-user",
24-
"anthropic-ai",
25-
"claude-web",
21+
'claudebot',
22+
'claude-searchbot',
23+
'claude-user',
24+
'anthropic-ai',
25+
'claude-web',
2626

2727
// OpenAI — https://platform.openai.com/docs/bots
28-
"chatgpt",
29-
"gptbot",
30-
"oai-searchbot",
31-
"openai",
28+
'chatgpt',
29+
'gptbot',
30+
'oai-searchbot',
31+
'openai',
3232

3333
// Google AI
34-
"gemini",
35-
"bard",
36-
"google-cloudvertexbot",
37-
"google-extended",
34+
'gemini',
35+
'bard',
36+
'google-cloudvertexbot',
37+
'google-extended',
3838

3939
// Meta
40-
"meta-externalagent",
41-
"meta-externalfetcher",
42-
"meta-webindexer",
40+
'meta-externalagent',
41+
'meta-externalfetcher',
42+
'meta-webindexer',
4343

4444
// Search/Research AI
45-
"perplexity",
46-
"youbot",
47-
"you.com",
48-
"deepseekbot",
45+
'perplexity',
46+
'youbot',
47+
'you.com',
48+
'deepseekbot',
4949

5050
// Coding assistants
51-
"cursor",
52-
"github-copilot",
53-
"codeium",
54-
"tabnine",
55-
"sourcegraph",
51+
'cursor',
52+
'github-copilot',
53+
'codeium',
54+
'tabnine',
55+
'sourcegraph',
5656

5757
// Other AI agents / data scrapers (low-harm to serve markdown)
58-
"cohere-ai",
59-
"bytespider",
60-
"amazonbot",
61-
"ai2bot",
62-
"diffbot",
63-
"omgili",
64-
"omgilibot",
58+
'cohere-ai',
59+
'bytespider',
60+
'amazonbot',
61+
'ai2bot',
62+
'diffbot',
63+
'omgili',
64+
'omgilibot',
6565
];
6666

6767
// Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).
68-
const SIGNATURE_AGENT_DOMAINS = ["chatgpt.com"];
68+
const SIGNATURE_AGENT_DOMAINS = ['chatgpt.com'];
6969

7070
// Layer 3: Traditional bot exclusion list — bots that should NOT trigger
7171
// the heuristic layer (they're search engine crawlers, social previews, or
7272
// monitoring tools, not AI agents).
7373
const TRADITIONAL_BOT_PATTERNS = [
74-
"googlebot",
75-
"bingbot",
76-
"yandexbot",
77-
"baiduspider",
78-
"duckduckbot",
79-
"slurp",
80-
"msnbot",
81-
"facebot",
82-
"twitterbot",
83-
"linkedinbot",
84-
"whatsapp",
85-
"telegrambot",
86-
"pingdom",
87-
"uptimerobot",
88-
"newrelic",
89-
"datadog",
90-
"statuspage",
91-
"site24x7",
92-
"applebot",
74+
'googlebot',
75+
'bingbot',
76+
'yandexbot',
77+
'baiduspider',
78+
'duckduckbot',
79+
'slurp',
80+
'msnbot',
81+
'facebot',
82+
'twitterbot',
83+
'linkedinbot',
84+
'whatsapp',
85+
'telegrambot',
86+
'pingdom',
87+
'uptimerobot',
88+
'newrelic',
89+
'datadog',
90+
'statuspage',
91+
'site24x7',
92+
'applebot',
9393
];
9494

9595
// Broad regex for bot-like UA strings (used only in Layer 3 heuristic).
9696
const BOT_LIKE_REGEX = /bot|agent|fetch|crawl|spider|search/i;
9797

98-
export type DetectionMethod = "ua-match" | "signature-agent" | "heuristic";
98+
export type DetectionMethod = 'ua-match' | 'signature-agent' | 'heuristic';
9999

100100
export interface DetectionResult {
101101
detected: boolean;
@@ -111,36 +111,36 @@ export interface DetectionResult {
111111
export function isAIAgent(request: {
112112
headers: { get(name: string): string | null };
113113
}): DetectionResult {
114-
const userAgent = request.headers.get("user-agent");
114+
const userAgent = request.headers.get('user-agent');
115115

116116
// Layer 1: Known UA pattern match
117117
if (userAgent) {
118118
const lowerUA = userAgent.toLowerCase();
119119
if (AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {
120-
return { detected: true, method: "ua-match" };
120+
return { detected: true, method: 'ua-match' };
121121
}
122122
}
123123

124124
// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)
125-
const signatureAgent = request.headers.get("signature-agent");
125+
const signatureAgent = request.headers.get('signature-agent');
126126
if (signatureAgent) {
127127
const lowerSig = signatureAgent.toLowerCase();
128128
if (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {
129-
return { detected: true, method: "signature-agent" };
129+
return { detected: true, method: 'signature-agent' };
130130
}
131131
}
132132

133133
// Layer 3: Missing browser fingerprint heuristic
134134
// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode
135135
// on navigation requests. Its absence signals a programmatic client.
136-
const secFetchMode = request.headers.get("sec-fetch-mode");
136+
const secFetchMode = request.headers.get('sec-fetch-mode');
137137
if (!secFetchMode && userAgent && BOT_LIKE_REGEX.test(userAgent)) {
138138
const lowerUA = userAgent.toLowerCase();
139139
const isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) =>
140-
lowerUA.includes(pattern)
140+
lowerUA.includes(pattern),
141141
);
142142
if (!isTraditionalBot) {
143-
return { detected: true, method: "heuristic" };
143+
return { detected: true, method: 'heuristic' };
144144
}
145145
}
146146

0 commit comments

Comments
 (0)