@@ -4,16 +4,21 @@ import type { NextRequest } from 'next/server';
44import { middleware } from '../middleware' ;
55
66const createRequest = ( url : string , userAgent : string = '' ) => {
7- const parsedUrl = new URL ( url ) ;
7+ const u = new URL ( url ) ;
8+ const nextUrl = {
9+ // minimal NextURL-ish shape the middleware actually uses
10+ pathname : u . pathname ,
11+ search : u . search ,
12+ searchParams : new URLSearchParams ( u . search ) ,
13+ hash : u . hash ,
14+ href : u . href ,
15+ origin : u . origin ,
16+ clone : ( ) => new URL ( u . href ) ,
17+ toString : ( ) => u . toString ( ) ,
18+ } ;
19+
820 return {
9- nextUrl : {
10- ...parsedUrl ,
11- clone : ( ) => new URL ( parsedUrl . href ) ,
12- pathname : parsedUrl . pathname ,
13- search : parsedUrl . search ,
14- searchParams : parsedUrl . searchParams ,
15- hash : parsedUrl . hash ,
16- } ,
21+ nextUrl,
1722 headers : {
1823 get : ( name : string ) => ( name . toLowerCase ( ) === 'user-agent' ? userAgent : null ) ,
1924 } ,
@@ -25,9 +30,6 @@ const botUAs = [
2530 'Mozilla/5.0 (compatible; bingbot/2.0)' ,
2631 'GPTBot/1.0' ,
2732 'Claude-Web/1.0' ,
28- 'ChatGPT-User/1.0' ,
29- 'facebookexternalhit/1.1' ,
30- 'Twitterbot/1.0' ,
3133 'googlebot' ,
3234 'GOOGLEBOT' ,
3335 'GoogleBot' ,
@@ -39,8 +41,22 @@ const humanUAs = [
3941 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0' ,
4042] ;
4143
42- describe ( 'Middleware - Bot Redirects' , ( ) => {
43- it ( 'redirects bots from / to /bot' , ( ) => {
44+ const scriptedClients = [
45+ 'curl/8.5.0' ,
46+ 'Wget/1.21.3' ,
47+ 'python-requests/2.32.3' ,
48+ 'Apache-HttpClient/4.5.14' ,
49+ 'okhttp/4.12.0' ,
50+ 'GuzzleHttp/7' ,
51+ 'libwww-perl/6.68' ,
52+ 'Go-http-client/1.1' ,
53+ 'Java/11.0.24' ,
54+ 'node-fetch/1.0' ,
55+ 'axios/1.7.0' ,
56+ ] ;
57+
58+ describe ( 'Middleware - Bot/Agent Redirects with AFL heuristics' , ( ) => {
59+ it ( 'redirects known bot UAs from / to /bot' , ( ) => {
4460 for ( const ua of botUAs ) {
4561 const res = middleware ( createRequest ( 'http://localhost:3000/' , ua ) ) ;
4662 expect ( res . status ) . toBe ( 302 ) ;
@@ -51,7 +67,7 @@ describe('Middleware - Bot Redirects', () => {
5167 }
5268 } ) ;
5369
54- it ( 'redirects bots from any page to /bot/* (no bot /bot loops)' , ( ) => {
70+ it ( 'redirects known bot UAs from any page to /bot/* (no /bot loops)' , ( ) => {
5571 for ( const ua of botUAs ) {
5672 const res = middleware ( createRequest ( 'http://localhost:3000/gallery' , ua ) ) ;
5773 expect ( res . status ) . toBe ( 302 ) ;
@@ -62,29 +78,25 @@ describe('Middleware - Bot Redirects', () => {
6278 } ) ;
6379
6480 it ( 'preserves query strings on redirect' , ( ) => {
65- const res = middleware (
66- createRequest ( 'http://localhost:3000/about?ref=twitter&x=1' , 'Googlebot' )
67- ) ;
81+ const res = middleware ( createRequest ( 'http://localhost:3000/about?ref=twitter&x=1' , 'Googlebot' ) ) ;
6882 expect ( res . status ) . toBe ( 302 ) ;
6983 const loc = new URL ( res . headers . get ( 'Location' ) || '' ) ;
7084 expect ( loc . pathname ) . toBe ( '/bot/about' ) ;
7185 expect ( loc . searchParams . get ( 'ref' ) ) . toBe ( 'twitter' ) ;
7286 expect ( loc . searchParams . get ( 'x' ) ) . toBe ( '1' ) ;
7387 } ) ;
7488
75- it ( 'does not redirect already /bot/* paths (for bots or humans)' , ( ) => {
89+ it ( 'does not redirect already /bot/* paths (bots or humans)' , ( ) => {
7690 const botRes = middleware ( createRequest ( 'http://localhost:3000/bot/gallery' , 'Googlebot' ) ) ;
7791 expect ( botRes . headers . get ( 'Location' ) ) . toBeNull ( ) ;
7892 expect ( botRes . status ) . toBe ( 200 ) ;
7993
80- const humanRes = middleware (
81- createRequest ( 'http://localhost:3000/bot/gallery' , humanUAs [ 0 ] )
82- ) ;
94+ const humanRes = middleware ( createRequest ( 'http://localhost:3000/bot/gallery' , humanUAs [ 0 ] ) ) ;
8395 expect ( humanRes . headers . get ( 'Location' ) ) . toBeNull ( ) ;
8496 expect ( humanRes . status ) . toBe ( 200 ) ;
8597 } ) ;
8698
87- it ( 'does not redirect API, _next, static, or asset files even for bots' , ( ) => {
99+ it ( 'bypasses API, _next, static, and asset files even for bots' , ( ) => {
88100 const skipPaths = [
89101 'http://localhost:3000/api/test' ,
90102 'http://localhost:3000/_next/static/chunk.js' ,
@@ -105,32 +117,55 @@ describe('Middleware - Bot Redirects', () => {
105117 }
106118 } ) ;
107119
108- it ( 'does not redirect human user agents ' , ( ) => {
120+ it ( 'does not redirect normal human browsers ' , ( ) => {
109121 for ( const ua of humanUAs ) {
110122 const res = middleware ( createRequest ( 'http://localhost:3000/gallery' , ua ) ) ;
111123 expect ( res . headers . get ( 'Location' ) ) . toBeNull ( ) ;
112124 expect ( res . status ) . toBe ( 200 ) ;
113125 }
114126 } ) ;
115127
116- it ( 'handles deep nested paths correctly' , ( ) => {
117- const res = middleware (
118- createRequest ( 'http://localhost:3000/deep/nested/path' , 'Googlebot' )
119- ) ;
128+ it ( 'redirects generic crawler terms (substring backstop)' , ( ) => {
129+ const ua = 'MyAwesomeCrawler/1.0 (+https://example.com)' ;
130+ const res = middleware ( createRequest ( 'http://localhost:3000/deep/nested/path' , ua ) ) ;
120131 expect ( res . status ) . toBe ( 302 ) ;
121132 const loc = new URL ( res . headers . get ( 'Location' ) || '' ) ;
122133 expect ( loc . pathname ) . toBe ( '/bot/deep/nested/path' ) ;
123134 } ) ;
124135
125- it ( 'treats missing/empty user-agent as human (no redirect)' , ( ) => {
126- const res1 = middleware ( createRequest ( 'http://localhost:3000/gallery' ) ) ;
127- expect ( res1 . headers . get ( 'Location' ) ) . toBeNull ( ) ;
128- expect ( res1 . status ) . toBe ( 200 ) ;
136+ it ( 'redirects scripted HTTP clients to /bot/*' , ( ) => {
137+ for ( const ua of scriptedClients ) {
138+ const res = middleware ( createRequest ( 'http://localhost:3000/gallery' , ua ) ) ;
139+ expect ( res . status ) . toBe ( 302 ) ;
140+ const loc = new URL ( res . headers . get ( 'Location' ) || '' ) ;
141+ expect ( loc . pathname ) . toBe ( '/bot/gallery' ) ;
142+ }
143+ } ) ;
144+
145+ it ( 'treats empty or missing user-agent as scripted client (redirects to /bot/*)' , ( ) => {
146+ const res1 = middleware ( createRequest ( 'http://localhost:3000/gallery' ) ) ;
147+ expect ( res1 . status ) . toBe ( 302 ) ;
148+ const loc1 = new URL ( res1 . headers . get ( 'Location' ) || '' ) ;
149+ expect ( loc1 . pathname ) . toBe ( '/bot/gallery' ) ;
129150
130- const res2 = middleware ( createRequest ( 'http://localhost:3000/gallery' , ' ' ) ) ;
131- expect ( res2 . headers . get ( 'Location' ) ) . toBeNull ( ) ;
132- expect ( res2 . status ) . toBe ( 200 ) ;
133- } ) ;
151+ const res2 = middleware ( createRequest ( 'http://localhost:3000/gallery' , ' ' ) ) ;
152+ expect ( res2 . status ) . toBe ( 302 ) ;
153+ const loc2 = new URL ( res2 . headers . get ( 'Location' ) || '' ) ;
154+ expect ( loc2 . pathname ) . toBe ( '/bot/gallery' ) ;
155+ } ) ;
134156
135-
157+ it ( 'override: ?afl=human prevents redirect even for bots' , ( ) => {
158+ const res = middleware ( createRequest ( 'http://localhost:3000/gallery?afl=human' , 'Googlebot' ) ) ;
159+ expect ( res . headers . get ( 'Location' ) ) . toBeNull ( ) ;
160+ expect ( res . status ) . toBe ( 200 ) ;
161+ } ) ;
162+
163+ it ( 'override: ?afl=bot forces redirect even for humans and strips the override key' , ( ) => {
164+ const res = middleware ( createRequest ( 'http://localhost:3000/gallery?afl=bot&ref=x' , humanUAs [ 0 ] ) ) ;
165+ expect ( res . status ) . toBe ( 302 ) ;
166+ const loc = new URL ( res . headers . get ( 'Location' ) || '' ) ;
167+ expect ( loc . pathname ) . toBe ( '/bot/gallery' ) ;
168+ expect ( loc . searchParams . get ( 'ref' ) ) . toBe ( 'x' ) ;
169+ expect ( loc . searchParams . get ( 'afl' ) ) . toBeNull ( ) ;
170+ } ) ;
136171} ) ;
0 commit comments