@@ -16,6 +16,38 @@ upstream webnodes {
1616 server web_haproxy:7072;
1717}
1818
19+ map $http_user_agent $is_sus_user_agent {
20+ default 0;
21+ "~ByteSpider" 1;
22+ # Check for ancient browser user agents
23+ "~Firefox/([1-7]\.)" 1;
24+ "~Chrome/([1-9]|10)\." 1;
25+ }
26+
27+ # Log likely bots caught in /authors random loop.
28+ map $request_uri$args $is_sus_random_sort {
29+ default 0;
30+ "~^/authors/.*sort=random_" 1;
31+ }
32+
33+ # Check empty referer
34+ map $http_referer $is_empty_referer {
35+ default 0;
36+ "" 1;
37+ }
38+
39+ map $request_uri$args $requires_referer {
40+ default 0;
41+ "~*^/(qrcode|admin|wp-login|show-records)" 1;
42+ "~*/edit" 1;
43+ "~*(v|m|action|redirect)=" 1;
44+ }
45+
46+ map $is_empty_referer$requires_referer $is_sus_referer {
47+ default 0;
48+ "11" 1;
49+ }
50+
1951# Keep in sync with covers_nginx.conf
2052server {
2153 listen 80 default;
@@ -85,38 +117,19 @@ server {
85117 limit_req zone=web_limit burst=100 delay=10;
86118 limit_req_status 429;
87119
88- if ($http_user_agent ~ (Bytespider) ) {
89- return 444;
90- }
91-
92-
93- # ===========================================
94- # Block certain patterns when no referer set:
95- # ===========================================
96-
97- # Create a variable to track if referer is empty
98- set $suspect_arg 0;
99-
100- # These requests should not be hit without referrer
101- if ($request_uri ~* "/(qrcode|admin|wp-login|show-records|edit)") {
102- set $suspect_arg 1;
103- }
104- if ($args ~* "(v=|m=|action=)") {
105- set $suspect_arg 1;
120+ # For returning 200 when someone tries to randomly sort author results.
121+ if ($is_sus_random_sort) {
122+ return 200;
106123 }
107124
108- # AND if the referer is set...
109- if ($http_referer = "" ) {
110- set $suspect_arg "${suspect_arg}1";
125+ if ($is_sus_user_agent) {
126+ return 403;
111127 }
112128
113- # Block requests with m= v= or action= parameters and empty referer
114- if ($suspect_arg = "11") {
129+ if ($is_sus_referer) {
115130 return 444;
116131 }
117132
118- # -------------------------------------------
119-
120133 proxy_pass http://webnodes;
121134 proxy_set_header Host $http_host;
122135
@@ -143,27 +156,6 @@ server {
143156 proxy_set_header X-Scheme $scheme;
144157 }
145158
146- # Log likely bots caught in /authors random loop.
147- location ~* ^/authors/.* {
148- # Web rate limit.
149- limit_req zone=web_limit burst=100 delay=10;
150- limit_req_status 429;
151-
152- # randomly sorting will be removed. For now just return 200
153- if ($is_random_sort = 1) {
154- return 200 "";
155- }
156-
157- if ($http_user_agent ~ (Bytespider) ) {
158- return 444;
159- }
160-
161- proxy_pass http://webnodes;
162- proxy_set_header Host $http_host;
163- proxy_set_header X-Forwarded-For $remote_addr;
164- proxy_set_header X-Scheme $scheme;
165- }
166-
167159 location ^~ /.well-known/acme-challenge/ {
168160 default_type "text/plain";
169161 root /openlibrary/static;
0 commit comments