remote-madeira/proxy.ts at main · notflip/remote-madeira · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import { NextResponse } from 'next/server'
import type { NextRequest } from 'next/server'

// Your allowed site URL from env
const ALLOWED_ORIGIN = process.env.NEXT_PUBLIC_SANITY_STUDIO_SITE_URL

// Known malicious bot user agents (partial matches)
const BOT_PATTERNS = [
  'bot',
  'crawler',
  'spider',
  'scraper',
  'curl',
  'wget',
  'python-requests',
  'python-urllib',
  'go-http-client',
  'java/',
  'libwww',
  'httpunit',
  'nutch',
  'phpcrawl',
  'msnbot',
  'jyxobot',
  'fast-webcrawler',
  'convera',
  'gigabot',
  'yandex',
  'seznambot',
  'ahrefsbot',
  'semrushbot',
  'dotbot',
  'mj12bot',
  'blexbot',
  'petalbot',
  'dataforseo',
  'bytespider',
  'gptbot',
  'claudebot',
  'anthropic',
  'ccbot',
]

// Allowed bots (search engines we want)
const ALLOWED_BOTS = [
  'googlebot',
  'bingbot',
  'duckduckbot',
  'slurp', // Yahoo
  'facebot',
  'twitterbot',
  'linkedinbot',
  'whatsapp',
  'telegrambot',
]

function isBot(userAgent: string | null): boolean {
  if (!userAgent) return true // No user agent = suspicious

  const ua = userAgent.toLowerCase()

  // Allow known good bots
  for (const allowed of ALLOWED_BOTS) {
    if (ua.includes(allowed)) return false
  }

  // Block known bad bots
  for (const pattern of BOT_PATTERNS) {
    if (ua.includes(pattern)) return true
  }

  return false
}

function isAllowedOrigin(request: NextRequest): boolean {
  if (!ALLOWED_ORIGIN) return true // If not configured, skip this check

  const referer = request.headers.get('referer')
  const origin = request.headers.get('origin')

  // Check if referer or origin matches our allowed site URL
  if (referer && referer.startsWith(ALLOWED_ORIGIN)) return true
  if (origin && origin.startsWith(ALLOWED_ORIGIN)) return true

  // Also allow Sanity webhook calls to /api/revalidate (they come from Sanity servers)
  // These are authenticated via signature, so we allow them through
  if (request.nextUrl.pathname === '/api/revalidate') {
    return true
  }

  return false
}

export function proxy(request: NextRequest) {
  const { pathname } = request.nextUrl

  // Only apply to /api/ routes
  if (pathname.startsWith('/api/')) {
    // Allow Sanity webhook calls through without browser-header checks
    // (authenticated via signature inside the route handler)
    if (pathname === '/api/revalidate') {
      return NextResponse.next()
    }

    // First check: must come from allowed origin
    if (!isAllowedOrigin(request)) {
      return new NextResponse('Forbidden', { status: 403 })
    }

    const userAgent = request.headers.get('user-agent')

    // Block bots from API routes entirely
    if (isBot(userAgent)) {
      return new NextResponse('Forbidden', { status: 403 })
    }

    // Block requests with no user agent
    if (!userAgent || userAgent.length < 10) {
      return new NextResponse('Forbidden', { status: 403 })
    }

    // Block if missing common browser headers (likely a bot/script)
    const acceptLanguage = request.headers.get('accept-language')
    const accept = request.headers.get('accept')

    if (!acceptLanguage && !accept) {
      return new NextResponse('Forbidden', { status: 403 })
    }
  }

  return NextResponse.next()
}

export const config = {
  matcher: '/api/:path*',
}