Skip to content

Commit d76dd03

Browse files
authored
Merge pull request #11605 from cdrini/fix/counter-link-improvements
Tweak tagger/nginx rate limit logic
2 parents 43e1a7e + b6dd68d commit d76dd03

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

docker/covers_nginx.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ server {
6666

6767
# Covers rate limit.
6868
limit_req zone=cover_limit burst=400 nodelay;
69+
limit_req zone=global_crawler_cover_limit nodelay;
6970
limit_req_status 429;
7071
}
7172

docker/nginx.conf

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ http {
6161
default 0; # All other traffic
6262
}
6363

64-
# Provides $is_blessed_ua
65-
include /olsystem/etc/nginx/is_blessed_ua.map;
64+
include /olsystem/etc/nginx/is_blessed_ua.conf; # Provides $is_blessed_ua
65+
include /olsystem/etc/nginx/is_sus_ip.conf; # Provides $is_sus_ip
6666

6767
map "$is_blessed_ip:$is_blessed_ua" $rate_limit_key {
6868
"0:0" $binary_remote_addr; # Rate-limit by IP
@@ -83,13 +83,16 @@ http {
8383
js_set $has_hit_crawler_links tagger.check;
8484

8585
# The only crawlers we want to limit are the ones that don't identify themselves as such
86-
map "$is_blessed_ip:$is_identifying_ua:$has_hit_crawler_links" $global_nonidentifying_crawler_rate_limit_key {
86+
map "$is_blessed_ip:$is_identifying_ua:$has_hit_crawler_links:$is_sus_ip" $global_nonidentifying_crawler_rate_limit_key {
8787
default ''; # No shared rate limiting
88-
"0:0:1" '1'; # Shared rate limit
88+
# Shared rate limit
89+
"0:0:1:0" 1;
90+
"0:0:1:1" 1;
91+
"0:0:0:1" 1;
8992
}
9093

9194
# Limit the crawlers that scrape links but don't ID themselves globally
92-
limit_req_zone $global_nonidentifying_crawler_rate_limit_key zone=global_crawler_limit:5m rate=15r/s;
95+
limit_req_zone $global_nonidentifying_crawler_rate_limit_key zone=global_crawler_limit:5m rate=17r/s;
9396

9497
# Matches other sites
9598
limit_req_zone $rate_limit_key zone=web_limit:10m rate=1r/s;
@@ -98,6 +101,7 @@ http {
98101
limit_req_zone $rate_limit_key zone=api_limit:10m rate=180r/m;
99102
# Set a more permissive limit for covers because some pages might load 20+ covers.
100103
limit_req_zone $rate_limit_key zone=cover_limit:10m rate=400r/m;
104+
limit_req_zone $global_nonidentifying_crawler_rate_limit_key zone=global_crawler_cover_limit:5m rate=150r/s;
101105

102106
# Things are mounted into here by the docker compose file
103107
include /etc/nginx/sites-enabled/*;

docker/web_nginx.conf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ map $request_uri$args $is_sus_random_sort {
1616
# 1. Detect empty referer
1717
map $http_referer $is_empty_referer {
1818
default 0;
19+
"-" 1;
1920
"" 1;
2021
}
2122

@@ -26,13 +27,16 @@ map $cookie_session $is_logged_out {
2627
}
2728

2829
# 3. Check if path requires referer or login
29-
map $request_uri$args $requires_referer {
30+
map $request_uri $requires_referer {
3031
default 0;
3132
"~*^/(qrcode|admin|wp-login|show-records)" 1;
3233
"~*/edit(/|$|\?)" 1;
3334
"~*(v|m|action|redirect)=" 1;
35+
"~*^/(subjects|authors)/" 1;
36+
"~*^/search\?" 1;
3437
}
3538

39+
# This is used by olsystem etc/nginx/tagger.js
3640
map "$is_empty_referer$is_logged_out$requires_referer" $is_sus_referer {
3741
default 0;
3842
"111" 1;
@@ -120,10 +124,6 @@ server {
120124
return 403;
121125
}
122126

123-
if ($is_sus_referer) {
124-
return 403;
125-
}
126-
127127
# Haproxy to better handle load/traffic
128128
proxy_pass http://web_haproxy:7072;
129129
proxy_set_header Host $http_host;

0 commit comments

Comments
 (0)