Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ git_list_dirt: check_env
@$(call list_dirt)

%.json: %.json.template.py check_python .FORCE
python $< $@
python -m azul.template $< $@
.FORCE:

# The template output file depends on the template file, of course, as well as
Expand Down
14 changes: 0 additions & 14 deletions deployments/prod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1842,20 +1842,6 @@ def env() -> Mapping[str, Optional[str]]:

'AZUL_ENABLE_REPLICAS': '1',

# HCA allocates a daily budget for file downloads. To avoid exceeding
# that budget, we limit the download rate as follows:
#
# r = b/d/f/24/60*w
#
# where `r` is the rate limit (downloads/window), `b` is the daily
# download budget (dollars/day), `d` is the download cost (dollars/
# gibibyte/download), `f` is the average file size (gibibytes), and `w`
# is the evaluation window (minutes) (=10). The value for `d` varies by
# region, so a weighted average is calculated based on the observed
# number of daily downloads per region.
#
# 'azul_waf_download_rate_limit': '59/[email protected]'

'AZUL_ENABLE_VERBATIM_RELATIONS': '0',

'AZUL_ENABLE_MIRRORING': '1',
Expand Down
16 changes: 0 additions & 16 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,22 +950,6 @@ def env() -> Mapping[str, Optional[str]]:
#
'azul_it_flags': None,

# A global rate limit on file downloads across all regions and IP
# addresses, enforced by AWS WAF.
#
# The syntax is `<limit>/<window>@<concurrency>` where `<limit>` is the
# maximum allowed number of download requests made every `<window>`
# seconds, and `<concurrency>` is the expected number of distinct IPs
# making at least one download request during that time. The concurrency
# does not need to be an integer. See
#
# https://docs.aws.amazon.com/waf/latest/developerguide/waf-rule-statement-type-rate-based-high-level-settings.html
#
# for restrictions on the supported values for `<limit>` ("Rate limit")
# and `<window>` ("Evaluation window").
#
'azul_waf_download_rate_limit': None,

# Wether to enable bot control in AWS WAF. Setting this to 1 will enable
# two rules aimed at blocking requests from suspected and verified bots.
# As of January 2024, this will incur monthly cost of $10 per ACL plus
Expand Down
4 changes: 1 addition & 3 deletions lambdas/indexer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ def log_controller(self) -> LogForwardingController:

def __init__(self):
super().__init__(app_name=config.indexer_name,
app_module_path=__file__,
# see LocalAppTestCase.setUpClass()
unit_test=globals().get('unit_test', False),
globals=globals(),
spec=spec)

def log_forwarder(self, prefix: str):
Expand Down
3 changes: 1 addition & 2 deletions lambdas/layer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
# package and is removed from the final result.

app = AzulChaliceApp(app_name=config.qualified_resource_name('dependencies'),
app_module_path=__file__,
unit_test=False,
globals=globals(),
spec={})


Expand Down
4 changes: 1 addition & 3 deletions lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,7 @@ def synthetic_fields(self) -> Sequence[str]:

def __init__(self):
super().__init__(app_name=config.service_name,
app_module_path=__file__,
# see LocalAppTestCase.setUpClass()
unit_test=globals().get('unit_test', False),
globals=globals(),
spec=spec)

@attr.s(kw_only=True, auto_attribs=True, frozen=True)
Expand Down
2 changes: 1 addition & 1 deletion requirements.all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ google-crc32c==1.7.1
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
greenlet==3.2.3
grpcio==1.73.1
grpcio==1.74.0rc1
grpcio-status==1.62.3
http-message-signatures==0.6.1
http_sfv==0.9.9
Expand Down
2 changes: 1 addition & 1 deletion requirements.trans.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ google-cloud-core==2.4.3
google-crc32c==1.7.1
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
grpcio==1.73.1
grpcio==1.74.0rc1
grpcio-status==1.62.3
http_sfv==0.9.9
idna==3.10
Expand Down
80 changes: 48 additions & 32 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1772,45 +1772,61 @@ def docker_image_gists_path(self) -> Path:

blocked_user_agents_custom_regex_term = 'blocked_user_agents_custom'

waf_rate_rule_name = 'RateRule'
waf_rate_rule_name = 'rate_limit'

waf_rate_alarm_rule_name = 'RateAlarmRule'

waf_rate_rule_period = 300 # seconds; this value is fixed by AWS

waf_rate_rule_retry_after = 30 # seconds

waf_rate_rule_limit = 1000
waf_rate_alarm_rule_name = 'rate_limit_alarm'

@frozen(kw_only=True)
class FileDownloadLimit:
rate_limit: int
evaluation_window: int
assumed_request_concurrency: float
class RateLimit:
#: Name of the WAF rule
name: str

@classmethod
def parse(cls, s: str) -> Self:
rate, s = s.split('/')
window, concurrency = s.split('@')
return cls(rate_limit=int(rate),
evaluation_window=int(window),
assumed_request_concurrency=float(concurrency))
#: Number of requests per evaluation window
value: int

@property
def retry_after(self) -> int:
return round(self.evaluation_window /
self.rate_limit *
self.assumed_request_concurrency)
#: WAF rate limit evaluation window in seconds
period: int

@property
def waf_file_download_limit(self) -> FileDownloadLimit | None:
value = self.environ.get('azul_waf_download_rate_limit')
if value is None:
return None
else:
return self.FileDownloadLimit.parse(value)
#: Value of the Retry-After response header in seconds
retry_after: int

assert 100 <= waf_rate_rule_limit <= 2_000_000_000 # mandated by AWS
def __attrs_post_init__(self):
# Allowed range of the rate limit mandated by AWS
assert 10 <= self.value <= 2_000_000_000, R(
'Rate limit out of range', self)
# Valid values for the evaluation window mandated by AWS
assert self.period in [60, 120, 300, 600], R(
'Invalid period', self)

#: The rate limit per IP before WAF starts rejecting requests
waf_rate_limit = RateLimit(name='rate_limit',
value=1000,
period=5 * 60,
retry_after=30)

#: The rate limit per IP before a CloudWatch alarm is raised
waf_rate_limit_alarm = evolve(waf_rate_limit,
name='rate_limit_alarm',
value=waf_rate_limit.value * 2)

#: The rate limit per IP for requests that trigger a manifest generation
#:
waf_rate_limit_manifests = RateLimit(name='rate_limit_manifests',
value=10,
period=10 * 60,
retry_after=30)

#: The rate limit for file download requests
#:
#: We aim for a global limit of 60 file downloads per 10 minutes. Based on
#: an observed average of 2.9 distinct IPs concurrently downloading files
#: in any 10-minute window, the maximum per-IP request rate we can allow is
#: 20/10min, or 10/5min.
#:
waf_rate_limit_files = RateLimit(name='rate_limit_files',
value=10,
period=5 * 60,
retry_after=30)

@property
def waf_bot_control(self) -> bool:
Expand Down
16 changes: 7 additions & 9 deletions src/azul/chalice.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Callable,
Iterator,
Literal,
Mapping,
Self,
Sequence,
)
Expand Down Expand Up @@ -69,6 +70,9 @@
from azul.logging import (
http_body_log_message,
)
from azul.modules import (
module_loaded_dynamically,
)
from azul.openapi import (
format_description,
params,
Expand Down Expand Up @@ -132,14 +136,14 @@ class AzulChaliceApp(Chalice):

def __init__(self,
app_name: str,
app_module_path: str,
globals: Mapping[str, Any],
*,
unit_test: bool = False,
spec: JSON):
self._patch_event_source_handler()
app_module_path = globals['__file__']
require(app_module_path.endswith('/app.py'), app_module_path)
self.app_module_path = app_module_path
self.unit_test = unit_test
self.loaded_dynamically = module_loaded_dynamically(globals)
self.non_interactive_routes: set[tuple[str, str]] = set()
reject('paths' in spec, 'The top-level spec must not define paths')
self._specs = self._add_contact_to_spec(spec)
Expand Down Expand Up @@ -331,12 +335,6 @@ def decorator(view_func):
else:
return lambda view_func: view_func

def test_route(self, *args, **kwargs):
"""
A route that's only enabled during unit tests.
"""
return self.route(*args, enabled=self.unit_test, **kwargs)

def spec(self) -> JSON:
"""
Return the final OpenAPI spec, stripping out unused tags.
Expand Down
33 changes: 26 additions & 7 deletions src/azul/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
wraps,
)
import inspect
from ipaddress import (
IPv4Address,
)
import json
import logging
import os
Expand All @@ -22,7 +25,6 @@
Callable,
TYPE_CHECKING,
Tuple,
TypeVar,
cast,
)
from unittest.mock import (
Expand All @@ -37,16 +39,22 @@
import botocore.credentials
import botocore.session
import botocore.utils
from furl import (
furl,
)
from more_itertools import (
one,
)

from azul import (
Netloc,
R,
cache,
cached_property,
config,
reject,
)
from azul.http import (
http_client,
)
from azul.logging import (
azul_boto3_log as boto3_log,
Expand Down Expand Up @@ -119,10 +127,8 @@

log = logging.getLogger(__name__)

R = TypeVar('R')


def _cache(func: Callable[..., R]) -> Callable[..., R]:
def _cache[R](func: Callable[..., R]) -> Callable[..., R]:
"""
Methods and properties whose return values depend on the currently active
AWS credentials must be cached under the currently active Boto3 session.
Expand Down Expand Up @@ -721,8 +727,9 @@ def s3_access_log_bucket_policy(self,
}
]

def _validate_bucket_path_prefix(self, path_prefix):
reject(path_prefix.startswith('/') or path_prefix.endswith('/'), path_prefix)
def _validate_bucket_path_prefix(self, prefix: str) -> None:
assert not (prefix.startswith('/') or prefix.endswith('/')), R(
'Path prefix must not start or end in slash', prefix)

@property
def monitoring_topic_name(self):
Expand All @@ -741,3 +748,15 @@ def sqs_queue(self, queue_name: str) -> 'Queue':
aws = AWS()
del AWS
del _cache


def public_ip() -> IPv4Address:
"""
Return the public IPv4 address of the machine running this code.
"""
url = furl('https://checkip.amazonaws.com')
http = http_client(log)
response = http.request('GET', str(url))
assert response.status == 200, R('Unexpected response', response)
ip_address = response.data.decode().strip()
return IPv4Address(ip_address)
2 changes: 1 addition & 1 deletion src/azul/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def filter(self, record):

def configure_app_logging(app: 'AzulChaliceApp', *loggers):
_configure_log_levels(app.log, *loggers)
if not app.unit_test:
if not app.loaded_dynamically:
# Environment is not unit test
root_logger = logging.getLogger()
if root_logger.hasHandlers():
Expand Down
Loading