diff --git a/bin/wheels/runtime/grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl b/bin/wheels/runtime/grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl deleted file mode 100644 index bfeb86c362..0000000000 Binary files a/bin/wheels/runtime/grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl and /dev/null differ diff --git a/bin/wheels/runtime/grpcio-1.74.0rc1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl b/bin/wheels/runtime/grpcio-1.74.0rc1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl new file mode 100644 index 0000000000..26fd66aa40 Binary files /dev/null and b/bin/wheels/runtime/grpcio-1.74.0rc1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl differ diff --git a/common.mk b/common.mk index 648071d704..329e022af1 100644 --- a/common.mk +++ b/common.mk @@ -116,7 +116,7 @@ git_list_dirt: check_env @$(call list_dirt) %.json: %.json.template.py check_python .FORCE - python $< $@ + python -m azul.template $< $@ .FORCE: # The template output file depends on the template file, of course, as well as diff --git a/deployments/prod/environment.py b/deployments/prod/environment.py index 1e348f82de..1dc22ea3d0 100644 --- a/deployments/prod/environment.py +++ b/deployments/prod/environment.py @@ -1842,20 +1842,6 @@ def env() -> Mapping[str, Optional[str]]: 'AZUL_ENABLE_REPLICAS': '1', - # HCA allocates a daily budget for file downloads. To avoid exceeding - # that budget, we limit the download rate as follows: - # - # r = b/d/f/24/60*w - # - # where `r` is the rate limit (downloads/window), `b` is the daily - # download budget (dollars/day), `d` is the download cost (dollars/ - # gibibyte/download), `f` is the average file size (gibibytes), and `w` - # is the evaluation window (minutes) (=10). The value for `d` varies by - # region, so a weighted average is calculated based on the observed - # number of daily downloads per region. - # - # 'azul_waf_download_rate_limit': '59/600@2.9' - 'AZUL_ENABLE_VERBATIM_RELATIONS': '0', 'AZUL_ENABLE_MIRRORING': '1', diff --git a/environment.py b/environment.py index 92e7498258..f9f1b6a16f 100644 --- a/environment.py +++ b/environment.py @@ -950,22 +950,6 @@ def env() -> Mapping[str, Optional[str]]: # 'azul_it_flags': None, - # A global rate limit on file downloads across all regions and IP - # addresses, enforced by AWS WAF. - # - # The syntax is `/@` where `` is the - # maximum allowed number of download requests made every `` - # seconds, and `` is the expected number of distinct IPs - # making at least one download request during that time. The concurrency - # does not need to be an integer. See - # - # https://docs.aws.amazon.com/waf/latest/developerguide/waf-rule-statement-type-rate-based-high-level-settings.html - # - # for restrictions on the supported values for `` ("Rate limit") - # and `` ("Evaluation window"). - # - 'azul_waf_download_rate_limit': None, - # Wether to enable bot control in AWS WAF. Setting this to 1 will enable # two rules aimed at blocking requests from suspected and verified bots. # As of January 2024, this will incur monthly cost of $10 per ACL plus diff --git a/lambdas/indexer/app.py b/lambdas/indexer/app.py index b2ef0ec2d0..10ff2ec656 100644 --- a/lambdas/indexer/app.py +++ b/lambdas/indexer/app.py @@ -77,9 +77,7 @@ def log_controller(self) -> LogForwardingController: def __init__(self): super().__init__(app_name=config.indexer_name, - app_module_path=__file__, - # see LocalAppTestCase.setUpClass() - unit_test=globals().get('unit_test', False), + globals=globals(), spec=spec) def log_forwarder(self, prefix: str): diff --git a/lambdas/layer/app.py b/lambdas/layer/app.py index 7feec21189..f9865c4d82 100644 --- a/lambdas/layer/app.py +++ b/lambdas/layer/app.py @@ -9,8 +9,7 @@ # package and is removed from the final result. app = AzulChaliceApp(app_name=config.qualified_resource_name('dependencies'), - app_module_path=__file__, - unit_test=False, + globals=globals(), spec={}) diff --git a/lambdas/service/app.py b/lambdas/service/app.py index ab6d8ea87a..0b36a6f5fc 100644 --- a/lambdas/service/app.py +++ b/lambdas/service/app.py @@ -355,9 +355,7 @@ def synthetic_fields(self) -> Sequence[str]: def __init__(self): super().__init__(app_name=config.service_name, - app_module_path=__file__, - # see LocalAppTestCase.setUpClass() - unit_test=globals().get('unit_test', False), + globals=globals(), spec=spec) @attr.s(kw_only=True, auto_attribs=True, frozen=True) diff --git a/requirements.all.txt b/requirements.all.txt index 48f3f95fa2..c8b81041dd 100644 --- a/requirements.all.txt +++ b/requirements.all.txt @@ -45,7 +45,7 @@ google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 greenlet==3.2.3 -grpcio==1.73.1 +grpcio==1.74.0rc1 grpcio-status==1.62.3 http-message-signatures==0.6.1 http_sfv==0.9.9 diff --git a/requirements.trans.txt b/requirements.trans.txt index ca30cdc4a0..34824d6381 100644 --- a/requirements.trans.txt +++ b/requirements.trans.txt @@ -9,7 +9,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.1 +grpcio==1.74.0rc1 grpcio-status==1.62.3 http_sfv==0.9.9 idna==3.10 diff --git a/src/azul/__init__.py b/src/azul/__init__.py index 9fd84c1b17..b90de6f5d1 100644 --- a/src/azul/__init__.py +++ b/src/azul/__init__.py @@ -1772,45 +1772,61 @@ def docker_image_gists_path(self) -> Path: blocked_user_agents_custom_regex_term = 'blocked_user_agents_custom' - waf_rate_rule_name = 'RateRule' + waf_rate_rule_name = 'rate_limit' - waf_rate_alarm_rule_name = 'RateAlarmRule' - - waf_rate_rule_period = 300 # seconds; this value is fixed by AWS - - waf_rate_rule_retry_after = 30 # seconds - - waf_rate_rule_limit = 1000 + waf_rate_alarm_rule_name = 'rate_limit_alarm' @frozen(kw_only=True) - class FileDownloadLimit: - rate_limit: int - evaluation_window: int - assumed_request_concurrency: float + class RateLimit: + #: Name of the WAF rule + name: str - @classmethod - def parse(cls, s: str) -> Self: - rate, s = s.split('/') - window, concurrency = s.split('@') - return cls(rate_limit=int(rate), - evaluation_window=int(window), - assumed_request_concurrency=float(concurrency)) + #: Number of requests per evaluation window + value: int - @property - def retry_after(self) -> int: - return round(self.evaluation_window / - self.rate_limit * - self.assumed_request_concurrency) + #: WAF rate limit evaluation window in seconds + period: int - @property - def waf_file_download_limit(self) -> FileDownloadLimit | None: - value = self.environ.get('azul_waf_download_rate_limit') - if value is None: - return None - else: - return self.FileDownloadLimit.parse(value) + #: Value of the Retry-After response header in seconds + retry_after: int - assert 100 <= waf_rate_rule_limit <= 2_000_000_000 # mandated by AWS + def __attrs_post_init__(self): + # Allowed range of the rate limit mandated by AWS + assert 10 <= self.value <= 2_000_000_000, R( + 'Rate limit out of range', self) + # Valid values for the evaluation window mandated by AWS + assert self.period in [60, 120, 300, 600], R( + 'Invalid period', self) + + #: The rate limit per IP before WAF starts rejecting requests + waf_rate_limit = RateLimit(name='rate_limit', + value=1000, + period=5 * 60, + retry_after=30) + + #: The rate limit per IP before a CloudWatch alarm is raised + waf_rate_limit_alarm = evolve(waf_rate_limit, + name='rate_limit_alarm', + value=waf_rate_limit.value * 2) + + #: The rate limit per IP for requests that trigger a manifest generation + #: + waf_rate_limit_manifests = RateLimit(name='rate_limit_manifests', + value=10, + period=10 * 60, + retry_after=30) + + #: The rate limit for file download requests + #: + #: We aim for a global limit of 60 file downloads per 10 minutes. Based on + #: an observed average of 2.9 distinct IPs concurrently downloading files + #: in any 10-minute window, the maximum per-IP request rate we can allow is + #: 20/10min, or 10/5min. + #: + waf_rate_limit_files = RateLimit(name='rate_limit_files', + value=10, + period=5 * 60, + retry_after=30) @property def waf_bot_control(self) -> bool: diff --git a/src/azul/chalice.py b/src/azul/chalice.py index bc03cf18ca..cd18d85279 100644 --- a/src/azul/chalice.py +++ b/src/azul/chalice.py @@ -17,6 +17,7 @@ Callable, Iterator, Literal, + Mapping, Self, Sequence, ) @@ -69,6 +70,9 @@ from azul.logging import ( http_body_log_message, ) +from azul.modules import ( + module_loaded_dynamically, +) from azul.openapi import ( format_description, params, @@ -132,14 +136,14 @@ class AzulChaliceApp(Chalice): def __init__(self, app_name: str, - app_module_path: str, + globals: Mapping[str, Any], *, - unit_test: bool = False, spec: JSON): self._patch_event_source_handler() + app_module_path = globals['__file__'] require(app_module_path.endswith('/app.py'), app_module_path) self.app_module_path = app_module_path - self.unit_test = unit_test + self.loaded_dynamically = module_loaded_dynamically(globals) self.non_interactive_routes: set[tuple[str, str]] = set() reject('paths' in spec, 'The top-level spec must not define paths') self._specs = self._add_contact_to_spec(spec) @@ -331,12 +335,6 @@ def decorator(view_func): else: return lambda view_func: view_func - def test_route(self, *args, **kwargs): - """ - A route that's only enabled during unit tests. - """ - return self.route(*args, enabled=self.unit_test, **kwargs) - def spec(self) -> JSON: """ Return the final OpenAPI spec, stripping out unused tags. diff --git a/src/azul/deployment.py b/src/azul/deployment.py index 29f5747857..0d58f0b940 100644 --- a/src/azul/deployment.py +++ b/src/azul/deployment.py @@ -8,6 +8,9 @@ wraps, ) import inspect +from ipaddress import ( + IPv4Address, +) import json import logging import os @@ -22,7 +25,6 @@ Callable, TYPE_CHECKING, Tuple, - TypeVar, cast, ) from unittest.mock import ( @@ -37,16 +39,22 @@ import botocore.credentials import botocore.session import botocore.utils +from furl import ( + furl, +) from more_itertools import ( one, ) from azul import ( Netloc, + R, cache, cached_property, config, - reject, +) +from azul.http import ( + http_client, ) from azul.logging import ( azul_boto3_log as boto3_log, @@ -119,10 +127,8 @@ log = logging.getLogger(__name__) -R = TypeVar('R') - -def _cache(func: Callable[..., R]) -> Callable[..., R]: +def _cache[R](func: Callable[..., R]) -> Callable[..., R]: """ Methods and properties whose return values depend on the currently active AWS credentials must be cached under the currently active Boto3 session. @@ -721,8 +727,9 @@ def s3_access_log_bucket_policy(self, } ] - def _validate_bucket_path_prefix(self, path_prefix): - reject(path_prefix.startswith('/') or path_prefix.endswith('/'), path_prefix) + def _validate_bucket_path_prefix(self, prefix: str) -> None: + assert not (prefix.startswith('/') or prefix.endswith('/')), R( + 'Path prefix must not start or end in slash', prefix) @property def monitoring_topic_name(self): @@ -741,3 +748,15 @@ def sqs_queue(self, queue_name: str) -> 'Queue': aws = AWS() del AWS del _cache + + +def public_ip() -> IPv4Address: + """ + Return the public IPv4 address of the machine running this code. + """ + url = furl('https://checkip.amazonaws.com') + http = http_client(log) + response = http.request('GET', str(url)) + assert response.status == 200, R('Unexpected response', response) + ip_address = response.data.decode().strip() + return IPv4Address(ip_address) diff --git a/src/azul/logging.py b/src/azul/logging.py index c97f27cef2..069cf6440b 100644 --- a/src/azul/logging.py +++ b/src/azul/logging.py @@ -57,7 +57,7 @@ def filter(self, record): def configure_app_logging(app: 'AzulChaliceApp', *loggers): _configure_log_levels(app.log, *loggers) - if not app.unit_test: + if not app.loaded_dynamically: # Environment is not unit test root_logger = logging.getLogger() if root_logger.hasHandlers(): diff --git a/src/azul/modules.py b/src/azul/modules.py index 997f9f5307..e02dcb409b 100644 --- a/src/azul/modules.py +++ b/src/azul/modules.py @@ -6,6 +6,9 @@ ) import importlib.util import os +from pathlib import ( + Path, +) from typing import ( Any, ) @@ -14,11 +17,12 @@ R, config, ) +from azul.types import ( + not_none, +) -def load_module(path: str, - module_name: str, - module_attributes: Mapping[str, Any] | None = None): +def load_module(path: str, module_name: str): """ Load a module from the .py file at the given path without affecting `sys.path` or `sys.modules`. @@ -38,23 +42,37 @@ def load_module(path: str, """ spec = importlib.util.spec_from_file_location(module_name, path) assert spec is not None, R('Unable to load module', module_name, path) - module = importlib.util.module_from_spec(spec) - if module_attributes is not None: - for k, v in module_attributes.items(): - setattr(module, k, v) assert isinstance(spec.loader, Loader) - spec.loader.exec_module(module) - assert path == module.__file__ + module = importlib.util.module_from_spec(spec) + setattr(module, _loaded_dynamically, True) + assert Path(path).samefile(not_none(module.__file__)) assert module.__name__ == module_name + spec.loader.exec_module(module) return module -def load_app_module(lambda_name, **module_attributes): +def load_app_module(lambda_name): path = os.path.join(config.project_root, 'lambdas', lambda_name, 'app.py') # Changing the module name here will break doctest discoverability - return load_module(path, f'lambdas.{lambda_name}.app', module_attributes) + return load_module(path, f'lambdas.{lambda_name}.app') def load_script(script_name: str): path = os.path.join(config.project_root, 'scripts', f'{script_name}.py') return load_module(path, script_name) + + +_loaded_dynamically = '__azul_loaded_dynamically__' + + +def module_loaded_dynamically(module_globals: Mapping[str, Any]) -> bool: + """ + Determine if a module was loaded dynamically + + :param module_globals: The return value of globals() when invoked from + within the module in question + + :return: True, if the module with the given globals was loaded dynamically + via a facility in this module, False otherwise + """ + return module_globals.get(_loaded_dynamically, False) diff --git a/src/azul/queues.py b/src/azul/queues.py index 044742bd3d..be5fc7785c 100644 --- a/src/azul/queues.py +++ b/src/azul/queues.py @@ -494,7 +494,7 @@ def functions_by_queue(self) -> dict[str, str]: Returns a dictionary that maps queues to the Lambda function triggered by the queue. The keys and values are fully qualified resource names. """ - indexer = load_app_module('indexer', unit_test=True) + indexer = load_app_module('indexer') functions_by_queue = { handler.queue: config.indexer_function_name(handler.name) for handler in indexer.app.handler_map.values() diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py index 08d0bebb62..cbf4862c68 100644 --- a/src/azul/service/manifest_service.py +++ b/src/azul/service/manifest_service.py @@ -1398,13 +1398,14 @@ def command_lines(cls, '--location', '--fail', ] + rate_limit = config.waf_rate_limit # Some options are added to the command-line instead of the curl # manifest so that the user can more easily customize them. file_options = [ # We want curl to make enough retries so that it waits a total of - # one and a half times the evaluation period of the WAF rate rule, + # one and a half times the evaluation window of the WAF rate rule, # long enough for the tripped rule to clear. - f'--retry {ceil(config.waf_rate_rule_period * 1.5 / config.waf_rate_rule_retry_after)}', + f'--retry {ceil(rate_limit.period * 1.5 / rate_limit.retry_after)}', # Curl will respect the 'Retry-After' header if given in a response, # like the one returned when the WAF rate rule is tripped. Otherwise, # curl will wait for the number of seconds specified here. diff --git a/src/azul/template.py b/src/azul/template/__init__.py similarity index 100% rename from src/azul/template.py rename to src/azul/template/__init__.py diff --git a/src/azul/template/__main__.py b/src/azul/template/__main__.py new file mode 100644 index 0000000000..e32723781d --- /dev/null +++ b/src/azul/template/__main__.py @@ -0,0 +1,35 @@ +""" +Usage: python -m azul.template foo.json.template.py foo.json + +Same as ``python foo.json.template.py foo.json`` but configures script logging +""" +import logging +import sys + +from azul.logging import ( + configure_script_logging, +) +from azul.modules import ( + load_module, +) + +# This module is the real __main__ +# +assert __name__ == '__main__' + +# Even though we don't directly use the logger here, we need to instantiate and +# configure it. If we called configure_script_logging() without passing the +# logger, any logger instantiated by the template script would not be considered +# an Azul logger +# +log = logging.getLogger(__name__) +configure_script_logging(log) + +# Shift the arguments so that the output file name becomes sys.argv[1] as +# expected by the emit… functions in the sibling __init__.py +# +template = sys.argv.pop(1) + +# Invoke the template script and pretend that its module name is also __main__ +# +load_module(template, __name__) diff --git a/terraform/api_gateway.tf.json.template.py b/terraform/api_gateway.tf.json.template.py index 442924fe8e..cc1f8d8a91 100644 --- a/terraform/api_gateway.tf.json.template.py +++ b/terraform/api_gateway.tf.json.template.py @@ -19,6 +19,7 @@ ) from azul.deployment import ( aws, + public_ip, ) from azul.modules import ( load_app_module, @@ -32,6 +33,7 @@ vpc, ) from azul.types import ( + JSON, JSONs, ) @@ -144,7 +146,37 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'status': '$context.status' } -file_download_limit = config.waf_file_download_limit + +def waf_match_method(http_method: str) -> JSON: + return { + 'byte_match_statement': { + 'field_to_match': { + 'method': {} + }, + 'positional_constraint': 'EXACTLY', + 'search_string': http_method, + 'text_transformation': { + 'priority': 0, + 'type': 'NONE' + } + } + } + + +def waf_match_path(path_regex: str) -> JSON: + return { + 'regex_match_statement': { + 'regex_string': path_regex, + 'field_to_match': { + 'uri_path': {} + }, + 'text_transformation': { + 'priority': 0, + 'type': 'NONE' + } + } + } + emit_tf({ 'data': [ @@ -225,6 +257,27 @@ def check_waf_rules(rules: JSONs) -> JSONs: ], 'resource': [ { + 'aws_wafv2_ip_set': { + # The IPs in this set are exempt from the rate limit on service + # API requests so as to prevent integration tests from tripping + # them. In the set, we include the IP of the GitLab instance and + # that of the machine deploying the set because those are the + # machines most likely to run integration tests. + # + 'it_v4_ips': { + 'name': config.qualified_resource_name('it_v4_ips'), + 'scope': 'REGIONAL', + 'ip_address_version': 'IPV4', + 'addresses': [ + f'{public_ip()}/32', + *[ + # Data source defined in data_sources.tf.json + f'${{data.aws_nat_gateway.gitlab_{zone}.public_ip}}/32' + for zone in range(vpc.num_zones) + ] + ] + } + }, 'aws_wafv2_web_acl': { 'api_gateway': { 'name': config.qualified_resource_name('api_gateway'), @@ -234,52 +287,12 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'rule': check_waf_rules([ {**rule, 'priority': i} for i, rule in enumerate([ - *([] if file_download_limit is None else [ - { - 'name': 'FileDownloadRateLimit', - 'statement': { - 'rate_based_statement': { - 'limit': file_download_limit.rate_limit, - 'evaluation_window_sec': file_download_limit.evaluation_window, - 'aggregate_key_type': 'CONSTANT', - 'scope_down_statement': { - 'regex_match_statement': { - 'regex_string': '^(/fetch)?/repository/files', - 'field_to_match': {'uri_path': {}}, - 'text_transformation': { - 'priority': 0, - 'type': 'NONE' - } - } - } - } - }, - 'action': { - 'block': { - 'custom_response': { - 'response_code': 429, - 'response_header': [ - { - 'name': 'Retry-After', - 'value': str(file_download_limit.retry_after), - } - ] - } - } - }, - 'visibility_config': { - 'metric_name': 'FileDownloadRateLimit', - 'sampled_requests_enabled': True, - 'cloudwatch_metrics_enabled': True - } - } - ]), *[ { 'name': name, 'statement': { 'ip_set_reference_statement': { - 'arn': '${data.aws_wafv2_ip_set.%s.arn}' % ip_set_term + 'arn': '${data.aws_wafv2_ip_set.%s.arn}' % name } }, 'action': { @@ -291,7 +304,7 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'name': config.blocked_v4_ips_term } } - if ip_set_term == config.blocked_v4_ips_term else + if name == config.blocked_v4_ips_term else {} ), 'visibility_config': { @@ -300,13 +313,13 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'cloudwatch_metrics_enabled': True } } - for name, action, ip_set_term in [ - ('BlockedIPs', 'block', config.blocked_v4_ips_term), - ('AllowedIPs', 'allow', config.allowed_v4_ips_term) + for name, action in [ + (config.blocked_v4_ips_term, 'block'), + (config.allowed_v4_ips_term, 'allow') ] ], { - 'name': 'BlockedUserAgents', + 'name': 'blocked_user_agents', 'statement': { 'or_statement': { 'statement': [ @@ -338,54 +351,13 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'name': config.blocked_user_agents_regex_term }, 'visibility_config': { - 'metric_name': 'BlockedUserAgents', + 'metric_name': 'blocked_user_agents', 'sampled_requests_enabled': True, 'cloudwatch_metrics_enabled': True } }, - *[ - { - 'name': name, - 'statement': { - 'rate_based_statement': { - 'limit': limit, - 'aggregate_key_type': 'IP' - } - }, - 'action': { - 'block': { - 'custom_response': { - 'response_code': 429, - 'response_header': [ - { - 'name': 'Retry-After', - 'value': str(config.waf_rate_rule_retry_after) - } - ] - } - } - }, - 'visibility_config': { - 'metric_name': name, - 'sampled_requests_enabled': True, - 'cloudwatch_metrics_enabled': True - } - } - # We use two rate rules, one with a lower - # threshold that will block requests, and one - # with a higher threshold that will block - # requests and trigger an alarm. Note, the rules - # need to be defined in order of descending - # threshold size since once a rate rule is - # tripped, it will prevent evaluation of any - # following rules. - for name, limit in [ - (config.waf_rate_alarm_rule_name, config.waf_rate_rule_limit * 2), - (config.waf_rate_rule_name, config.waf_rate_rule_limit), - ] - ], { - 'name': 'AWS-CommonRuleSet', + 'name': 'aws_common_rule_set', 'statement': { 'managed_rule_group_statement': { 'name': 'AWSManagedRulesCommonRuleSet', @@ -427,13 +399,13 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'none': {} }, 'visibility_config': { - 'metric_name': 'AWS-CommonRuleSet', + 'metric_name': 'aws_common_rule_set', 'sampled_requests_enabled': True, 'cloudwatch_metrics_enabled': True } }, { - 'name': 'AWS-AmazonIpReputationList', + 'name': 'aws_amazon_ip_reputation_list', 'statement': { 'managed_rule_group_statement': { 'name': 'AWSManagedRulesAmazonIpReputationList', @@ -444,13 +416,13 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'none': {} }, 'visibility_config': { - 'metric_name': 'AWS-AmazonIpReputationList', + 'metric_name': 'aws_amazon_ip_reputation_list', 'sampled_requests_enabled': True, 'cloudwatch_metrics_enabled': True } }, { - 'name': 'AWS-UnixRuleSet', + 'name': 'aws_unix_rule_set', 'statement': { 'managed_rule_group_statement': { 'name': 'AWSManagedRulesUnixRuleSet', @@ -461,14 +433,14 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'none': {} }, 'visibility_config': { - 'metric_name': 'AWS-UnixRuleSet', + 'metric_name': 'aws_unix_rule_set', 'sampled_requests_enabled': True, 'cloudwatch_metrics_enabled': True } }, *iif(config.waf_bot_control, [ { - 'name': 'AWS-AWSManagedRulesBotControlRuleSet', + 'name': 'aws_managed_rules_bot_control_rule_set', 'statement': { 'managed_rule_group_statement': { 'name': 'AWSManagedRulesBotControlRuleSet', @@ -476,18 +448,9 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'version': 'Version_3.1', 'scope_down_statement': { 'not_statement': { - 'statement': { - 'regex_match_statement': { - # Keep consistent with the rules in the response of the - # /robots.txt route in src/azul/chalice.py - 'regex_string': r'^/($|swagger/|robots.txt$)', - 'field_to_match': {'uri_path': {}}, - 'text_transformation': { - 'priority': 0, - 'type': 'NONE' - } - } - } + # Keep consistent with the rules in the response of the + # /robots.txt route in src/azul/chalice.py + 'statement': waf_match_path(r'^/($|swagger/|robots.txt$)') } }, 'managed_rule_group_configs': [ @@ -516,7 +479,7 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'none': {} }, 'visibility_config': { - 'metric_name': 'AWS-AWSManagedRulesBotControlRuleSet', + 'metric_name': 'aws_managed_rules_bot_control_rule_set', 'sampled_requests_enabled': True, 'cloudwatch_metrics_enabled': True } @@ -529,7 +492,7 @@ def check_waf_rules(rules: JSONs) -> JSONs: # requests. The managed rule is scoped down # to URLs dissallowed in robots.txt, so this # rule shouldn't affect well-behaved bot. - 'name': 'BlockVerifiedBotsRule', + 'name': 'block_verified_bots_rule', 'statement': { 'label_match_statement': { 'scope': 'LABEL', @@ -540,12 +503,125 @@ def check_waf_rules(rules: JSONs) -> JSONs: 'block': {} }, "visibility_config": { - 'metric_name': 'BlockVerifiedBotsRule', + 'metric_name': 'block_verified_bots_rule', 'sampled_requests_enabled': True, 'cloudwatch_metrics_enabled': True } } - ]) + ]), + *[ + { + 'name': rate_limit.name, + 'statement': { + 'rate_based_statement': { + 'limit': rate_limit.value, + 'evaluation_window_sec': rate_limit.period, + 'aggregate_key_type': 'IP' + } + }, + 'action': { + 'block': { + 'custom_response': { + 'response_code': 429, + 'response_header': [ + { + 'name': 'Retry-After', + 'value': str(rate_limit.retry_after) + } + ] + } + } + }, + 'visibility_config': { + 'metric_name': rate_limit.name, + 'sampled_requests_enabled': True, + 'cloudwatch_metrics_enabled': True + } + } + # We use two rate rules, one with a lower + # threshold that will block requests, and one + # with a higher threshold that will block + # requests and trigger an alarm. Note, the rules + # need to be defined in order of descending + # threshold size since once a rate rule is + # tripped, it will prevent evaluation of any + # following rules. + for rate_limit in [ + config.waf_rate_limit_alarm, + config.waf_rate_limit, + ] + ], + { + # See it_v4_ips above + 'name': 'allow_it_requests', + 'statement': { + 'and_statement': [ + { + 'statement': [ + { + 'ip_set_reference_statement': { + 'arn': '${aws_wafv2_ip_set.%s.arn}' % 'it_v4_ips' + } + }, + waf_match_method('PUT'), + waf_match_path('^(/fetch)?/manifest/files') + ] + } + ] + }, + 'action': { + 'allow': {} + }, + 'visibility_config': { + 'metric_name': 'allow_it_requests', + 'sampled_requests_enabled': True, + 'cloudwatch_metrics_enabled': True + } + }, + *[ + { + 'name': limit.name, + 'statement': { + 'rate_based_statement': { + 'limit': limit.value, + 'evaluation_window_sec': limit.period, + 'aggregate_key_type': 'IP', + 'scope_down_statement': { + 'and_statement': [ + { + 'statement': [ + waf_match_method(method), + waf_match_path(path) + ] + } + ] + } + } + }, + 'action': { + 'block': { + 'custom_response': { + 'response_code': 429, + 'response_header': [ + { + 'name': 'Retry-After', + 'value': str(limit.retry_after) + } + ] + } + } + }, + 'visibility_config': { + 'metric_name': limit.name, + 'sampled_requests_enabled': True, + 'cloudwatch_metrics_enabled': True + } + } + for method, path, limit in [ + ('GET', '^(/fetch)?/repository/files', config.waf_rate_limit_files), + ('PUT', '^(/fetch)?/manifest/files', config.waf_rate_limit_manifests) + ] + ] ]) ]), 'scope': 'REGIONAL', diff --git a/terraform/cloudwatch.tf.json.template.py b/terraform/cloudwatch.tf.json.template.py index 9ec246a465..adfe3b517b 100644 --- a/terraform/cloudwatch.tf.json.template.py +++ b/terraform/cloudwatch.tf.json.template.py @@ -49,17 +49,6 @@ def dashboard_body(name: str) -> str: } }, { - 'aws_nat_gateway': { - **{ - f'gitlab_{zone}': { - 'filter': { - 'name': 'tag:Name', - 'values': [f'azul-gitlab_{zone}'] - }, - } - for zone in range(vpc.num_zones) - } - }, 'aws_ec2_client_vpn_endpoint': { 'gitlab': { 'filter': { @@ -141,7 +130,7 @@ def dashboard_body(name: str) -> str: 'ok_actions': ['${data.aws_sns_topic.monitoring.arn}'], # CloudWatch uses an unconfigurable "evaluation range" when missing # data is involved. In practice this means that an alarm on the - # absence of logs with an evaluation period of ten minutes would + # absence of logs with an evaluation window of ten minutes would # require thirty minutes of no logs before the alarm is raised. # Using a metric query we can fill in missing datapoints with a # value of zero and avoid the need for the evaluation range. @@ -189,6 +178,7 @@ def dashboard_body(name: str) -> str: 'id': f'm{zone}', 'metric': { 'dimensions': { + # Data source defined in data_sources.tf.json 'NatGatewayId': f'${{data.aws_nat_gateway.gitlab_{zone}.id}}' }, 'namespace': 'AWS/NATGateway', @@ -316,7 +306,7 @@ def dashboard_body(name: str) -> str: 'dimensions': { 'WebACL': '${aws_wafv2_web_acl.api_gateway.name}', 'Region': config.region, - 'Rule': config.waf_rate_alarm_rule_name + 'Rule': config.waf_rate_limit.name }, 'alarm_actions': ['${data.aws_sns_topic.monitoring.arn}'], 'ok_actions': ['${data.aws_sns_topic.monitoring.arn}'], diff --git a/terraform/data_sources.tf.json.template.py b/terraform/data_sources.tf.json.template.py new file mode 100644 index 0000000000..159b45735f --- /dev/null +++ b/terraform/data_sources.tf.json.template.py @@ -0,0 +1,22 @@ +from azul.terraform import ( + emit_tf, + vpc, +) + +emit_tf({ + 'data': [ + { + 'aws_nat_gateway': { + **{ + f'gitlab_{zone}': { + 'filter': { + 'name': 'tag:Name', + 'values': [f'azul-gitlab_{zone}'] + }, + } + for zone in range(vpc.num_zones) + } + } + } + ] +}) diff --git a/terraform/shared/shared.tf.json.template.py b/terraform/shared/shared.tf.json.template.py index 9d041b7d63..15181de574 100644 --- a/terraform/shared/shared.tf.json.template.py +++ b/terraform/shared/shared.tf.json.template.py @@ -580,7 +580,7 @@ def conformance_pack(name: str) -> str: 'ok_actions': ['${aws_sns_topic.monitoring.arn}'], # CloudWatch uses an unconfigurable "evaluation range" when missing # data is involved. In practice this means that an alarm on the - # absence of logs with an evaluation period of ten minutes would + # absence of logs with an evaluation window of ten minutes would # require thirty minutes of no logs before the alarm is raised. # Using a metric query we can fill in missing datapoints with a # value of zero and avoid the need for the evaluation range. diff --git a/test/app_test_case.py b/test/app_test_case.py index d35f452381..8f289f1193 100644 --- a/test/app_test_case.py +++ b/test/app_test_case.py @@ -100,7 +100,7 @@ def setUpClass(cls): # app modules from different lambdas loaded by different concrete # subclasses. It does, however, violate this one invariant: # `sys.modules[module.__name__] == module` - cls.app_module = load_app_module(cls.lambda_name(), unit_test=True) + cls.app_module = load_app_module(cls.lambda_name()) @classmethod def tearDownClass(cls): diff --git a/test/health_check_test_case.py b/test/health_check_test_case.py index 7ef0b71b76..a8ef810bb8 100644 --- a/test/health_check_test_case.py +++ b/test/health_check_test_case.py @@ -137,7 +137,7 @@ def test_cached_health(self): # A successful response is obtained when all the systems are functional self._create_mock_queues() - app = load_app_module(self.lambda_name(), unit_test=True) + app = load_app_module(self.lambda_name()) with self._mock(): app.update_health_cache(MagicMock(), MagicMock()) response = self._test('/health/cached') diff --git a/test/integration_test.py b/test/integration_test.py index 60b49c1737..c683f5a4f5 100644 --- a/test/integration_test.py +++ b/test/integration_test.py @@ -693,7 +693,8 @@ def _test_manifest_tagging_race(self, catalog: CatalogName): # racy state. However, we still have to throttle the # requests in order to prevent tripping the WAF rate # limit. - time.sleep(config.waf_rate_rule_period / config.waf_rate_rule_limit) + rate_limit = config.waf_rate_limit + time.sleep(rate_limit.period / rate_limit.value) elif response.status == 302: responses.append(response) method, manifest_url = GET, furl(response.headers['Location']) @@ -1833,7 +1834,7 @@ class AzulChaliceLocalIntegrationTest(AzulTestCase): @classmethod def setUpClass(cls) -> None: super().setUpClass() - app_module = load_app_module('service', unit_test=True) + app_module = load_app_module('service') app_dir = os.path.dirname(app_module.__file__) factory = chalice.cli.factory.CLIFactory(app_dir) config = factory.create_config_obj() diff --git a/test/test_app_logging.py b/test/test_app_logging.py index c34d31381f..f927ddc7ee 100644 --- a/test/test_app_logging.py +++ b/test/test_app_logging.py @@ -52,7 +52,9 @@ def test(self): with mock.patch.dict(os.environ, AZUL_DEBUG=str(debug)): with self.subTest(debug=debug): log_level = azul_log_level() - app = AzulChaliceApp(__name__, '/app.py', unit_test=True, spec={}) + app = AzulChaliceApp(app_name=__name__, + globals={'__file__': '/app.py'}, + spec={}) path = '/fail/path' @app.route(path, spec={}) diff --git a/test/test_openapi.py b/test/test_openapi.py index e2a33d4d86..a66f287141 100644 --- a/test/test_openapi.py +++ b/test/test_openapi.py @@ -37,7 +37,9 @@ def setUpModule(): class TestAppSpecs(AzulUnitTestCase): def app(self, spec): - return AzulChaliceApp('testing', '/app.py', spec=spec) + return AzulChaliceApp(app_name='testing', + globals={'__file__': '/app.py'}, + spec=spec) def test_top_level_spec(self): spec = {'foo': 'bar'}