Skip to content

Commit 75d3313

Browse files
committed
Bump version to 0.2.7 and add unanchored_regex plugin to detect regex patterns without anchors
1 parent 14ba2b9 commit 75d3313

File tree

15 files changed

+439
-187
lines changed

15 files changed

+439
-187
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Right now Gixy can find:
4242
* [[proxy_pass_normalized] `proxy_pass` will decode and normalize paths when specified with a path](https://joshua.hu/proxy-pass-nginx-decoding-normalizing-url-path-dangerous#nginx-proxy_pass)
4343
* [[worker_rlimit_nofile_vs_connections] `worker_rlimit_nofile` must be at least twice `worker_connections`](https://gixy.getpagespeed.com/en/plugins/worker_rlimit_nofile_vs_connections/)
4444
* [[error_log_off] `error_log` set to `off`](https://gixy.getpagespeed.com/en/plugins/error_log_off/)
45+
* [[unanchored_regex] Regular expression without anchors](https://gixy.getpagespeed.com/en/plugins/unanchored_regex/)
4546

4647
You can find things that Gixy is learning to detect at [Issues labeled with "new plugin"](https://github.com/dvershinin/gixy/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+plugin%22)
4748

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# [unanchored_regex]: Regular expression without anchors
2+
3+
In NGINX, when definining location with regular expression, it's recommended to anchor the regex at least to the beginning or end of the string.
4+
Otherwise, the regex will match any part of the string, which may lead to unexpected behavior or decreased performance.
5+
6+
For example, the following location block will match any URL that contains `/v1/`:
7+
8+
```nginx
9+
location ~ /v1/ {
10+
# ...
11+
}
12+
```
13+
14+
This will match:
15+
16+
- `/v1/`
17+
- `/v1/foo`
18+
- `/foo/v1/bar`
19+
- `/foo/v1/`
20+
21+
To match only URLs that start with `/v1/`, the regex should be anchored:
22+
23+
```nginx
24+
location ~ ^/v1/ {
25+
# ...
26+
}
27+
```
28+
29+
This way, the regex will match only URLs that start with `/v1/`.
30+
31+
For matching file extensions, e.g., PHP files, the regex should be anchored at the end of the string.
32+
33+
Incorrect:
34+
35+
```nginx
36+
location ~ \.php {
37+
# ...
38+
}
39+
```
40+
41+
It will match any URL that contains `.php`: `/foo.php`, `/foo.phpanything`, which is incorrect.
42+
43+
Correct:
44+
45+
```nginx
46+
location ~ \.php$ {
47+
# ...
48+
}
49+
```
50+
51+
This way, the regex will match only URLs that end with `.php`.

gixy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
from gixy.core import severity
44

5-
version = "0.2.6"
5+
version = "0.2.7"

gixy/cli/__main__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1+
"""Entry point for the CLI."""
2+
13
from gixy.cli.main import main
24

35
main()
4-

gixy/cli/argparser.py

Lines changed: 74 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""This module provides a custom argument parser for Gixy."""
2+
13
# flake8: noqa
24

35
from configargparse import *
@@ -6,54 +8,61 @@
68
from gixy.core.plugins_manager import PluginsManager
79

810
# used while parsing args to keep track of where they came from
9-
_COMMAND_LINE_SOURCE_KEY = 'command_line'
10-
_ENV_VAR_SOURCE_KEY = 'environment_variables'
11-
_CONFIG_FILE_SOURCE_KEY = 'config_file'
12-
_DEFAULTS_SOURCE_KEY = 'defaults'
11+
_COMMAND_LINE_SOURCE_KEY = "command_line"
12+
_ENV_VAR_SOURCE_KEY = "environment_variables"
13+
_CONFIG_FILE_SOURCE_KEY = "config_file"
14+
_DEFAULTS_SOURCE_KEY = "defaults"
1315

1416

1517
class GixyConfigParser(DefaultConfigFileParser):
1618
def get_syntax_description(self):
17-
return ''
19+
return ""
1820

1921
def parse(self, stream):
2022
"""Parses the keys + values from a config file."""
2123

2224
items = OrderedDict()
23-
prefix = ''
25+
prefix = ""
26+
27+
# Precompile regex patterns for performance.
28+
white_space = r"\s*"
29+
key_pattern = r"(?P<key>[^:=;#\s]+?)"
30+
value_pattern = white_space + r"[:=\s]" + white_space + r"(?P<value>.+?)"
31+
comment_pattern = white_space + r"(?P<comment>\s[;#].*)?"
32+
regex_key_only = re.compile(r"^" + key_pattern + comment_pattern + r"$")
33+
regex_key_value = re.compile(
34+
r"^" + key_pattern + value_pattern + comment_pattern + r"$"
35+
)
36+
2437
for i, line in enumerate(stream):
2538
line = line.strip()
26-
if not line or line[0] in ['#', ';'] or line.startswith('---'):
39+
if not line or line[0] in ["#", ";"] or line.startswith("---"):
2740
continue
28-
if line[0] == '[':
29-
prefix = '%s-' % line[1:-1].replace('_', '-')
41+
if line[0] == "[":
42+
prefix = f"{line[1:-1].replace('_', '-')}-"
3043
continue
3144

32-
white_space = '\\s*'
33-
key = r'(?P<key>[^:=;#\s]+?)'
34-
value = white_space + r'[:=\s]' + white_space + r'(?P<value>.+?)'
35-
comment = white_space + r'(?P<comment>\s[;#].*)?'
36-
37-
key_only_match = re.match('^' + key + comment + '$', line)
45+
key_only_match = regex_key_only.match(line)
3846
if key_only_match:
39-
key = key_only_match.group('key')
40-
items[key] = 'true'
47+
key = key_only_match.group("key")
48+
items[key] = "true"
4149
continue
4250

43-
key_value_match = re.match('^' + key + value + comment + '$', line)
51+
key_value_match = regex_key_value.match(line)
4452
if key_value_match:
45-
key = key_value_match.group('key')
46-
value = key_value_match.group('value')
53+
key = key_value_match.group("key")
54+
value = key_value_match.group("value")
4755

48-
if value.startswith('[') and value.endswith(']'):
49-
# handle special case of lists
50-
value = [elem.strip() for elem in value[1:-1].split(',')]
56+
if value.startswith("[") and value.endswith("]"):
57+
# handle a special case of lists
58+
value = [elem.strip() for elem in value[1:-1].split(",")]
5159

5260
items[prefix + key] = value
5361
continue
5462

55-
raise ConfigFileParserException('Unexpected line %s in %s: %s' % (i,
56-
getattr(stream, 'name', 'stream'), line))
63+
raise ConfigFileParserException(
64+
f"Unexpected line {i} in {getattr(stream, 'name', 'stream')}: {line}"
65+
)
5766
return items
5867

5968
def serialize(self, items):
@@ -62,45 +71,50 @@ def serialize(self, items):
6271
"""
6372
r = StringIO()
6473
for key, value in items.items():
65-
if type(value) == OrderedDict:
66-
r.write('\n[%s]\n' % key)
74+
if isinstance(value, OrderedDict):
75+
r.write(f"\n[{key}]\n")
6776
r.write(self.serialize(value))
6877
else:
6978
value, help = value
7079
if help:
71-
r.write('; %s\n' % help)
72-
r.write('%s = %s\n' % (key, value))
80+
r.write(f"; {help}\n")
81+
r.write(f"{key} = {value}\n")
7382
return r.getvalue()
7483

7584

7685
class GixyHelpFormatter(HelpFormatter):
86+
"""Custom help formatter for Gixy."""
87+
7788
def format_help(self):
7889
manager = PluginsManager()
7990
help_message = super(GixyHelpFormatter, self).format_help()
80-
if 'plugins options:' in help_message:
91+
if "plugins options:" in help_message:
8192
# Print available blugins _only_ if we prints options for it
82-
plugins = '\n'.join('\t' + plugin.__name__ for plugin in manager.plugins_classes)
83-
help_message = '{orig}\n\navailable plugins:\n{plugins}\n'.format(orig=help_message, plugins=plugins)
93+
plugins = "\n".join(
94+
"\t" + plugin.__name__ for plugin in manager.plugins_classes
95+
)
96+
help_message = f"{help_message}\n\navailable plugins:\n{plugins}\n"
8497
return help_message
8598

8699

87100
class ArgsParser(ArgumentParser):
101+
"""Custom argument parser for Gixy."""
102+
88103
def get_possible_config_keys(self, action):
89104
"""This method decides which actions can be set in a config file and
90105
what their keys will be. It returns a list of zero or more config keys that
91106
can be used to set the given action's value in a config file.
92107
"""
93108
keys = []
94109
for arg in action.option_strings:
95-
if arg in ['--config', '--write-config', '--version']:
110+
if arg in ["--config", "--write-config", "--version"]:
96111
continue
97112
if any([arg.startswith(2 * c) for c in self.prefix_chars]):
98113
keys += [arg[2:], arg] # eg. for '--bla' return ['bla', '--bla']
99114

100115
return keys
101116

102-
def get_items_for_config_file_output(self, source_to_settings,
103-
parsed_namespace):
117+
def get_items_for_config_file_output(self, source_to_settings, parsed_namespace):
104118
"""Converts the given settings back to a dictionary that can be passed
105119
to ConfigFormatParser.serialize(..).
106120
@@ -114,29 +128,36 @@ def get_items_for_config_file_output(self, source_to_settings,
114128
config_file_items = OrderedDict()
115129
for source, settings in source_to_settings.items():
116130
if source == _COMMAND_LINE_SOURCE_KEY:
117-
_, existing_command_line_args = settings['']
131+
_, existing_command_line_args = settings[""]
118132
for action in self._actions:
119133
config_file_keys = self.get_possible_config_keys(action)
120-
if config_file_keys and not action.is_positional_arg and \
121-
already_on_command_line(existing_command_line_args,
122-
action.option_strings):
134+
if (
135+
config_file_keys
136+
and not action.is_positional_arg
137+
and already_on_command_line(
138+
existing_command_line_args, action.option_strings
139+
)
140+
):
123141
value = getattr(parsed_namespace, action.dest, None)
124142
if value is not None:
125143
if type(value) is bool:
126144
value = str(value).lower()
127-
if ':' in action.dest:
128-
section, key = action.dest.split(':', 2)
129-
key = key.replace('_', '-')
145+
if ":" in action.dest:
146+
section, key = action.dest.split(":", 2)
147+
key = key.replace("_", "-")
130148
if section not in config_file_items:
131149
config_file_items[section] = OrderedDict()
132150
config_file_items[section][key] = (value, action.help)
133151
else:
134-
config_file_items[config_file_keys[0]] = (value, action.help)
152+
config_file_items[config_file_keys[0]] = (
153+
value,
154+
action.help,
155+
)
135156
elif source.startswith(_CONFIG_FILE_SOURCE_KEY):
136157
for key, (action, value) in settings.items():
137-
if ':' in action.dest:
138-
section, key = action.dest.split(':', 2)
139-
key = key.replace('_', '-')
158+
if ":" in action.dest:
159+
section, key = action.dest.split(":", 2)
160+
key = key.replace("_", "-")
140161
if section not in config_file_items:
141162
config_file_items[section] = OrderedDict()
142163
config_file_items[section][key] = (value, action.help)
@@ -146,14 +167,15 @@ def get_items_for_config_file_output(self, source_to_settings,
146167

147168

148169
def create_parser():
170+
"""Create an argument parser for Gixy."""
149171
return ArgsParser(
150-
description='Gixy - a Nginx configuration [sec]analyzer\n\n',
172+
description="Gixy - a Nginx configuration [sec]analyzer\n\n",
151173
formatter_class=GixyHelpFormatter,
152174
config_file_parser_class=GixyConfigParser,
153-
auto_env_var_prefix='GIXY_',
175+
auto_env_var_prefix="GIXY_",
154176
add_env_var_help=False,
155-
default_config_files=['/etc/gixy/gixy.cfg', '~/.config/gixy/gixy.conf'],
156-
args_for_setting_config_path=['-c', '--config'],
157-
args_for_writing_out_config_file=['--write-config'],
158-
add_config_file_help=False
177+
default_config_files=["/etc/gixy/gixy.cfg", "~/.config/gixy/gixy.conf"],
178+
args_for_setting_config_path=["-c", "--config"],
179+
args_for_writing_out_config_file=["--write-config"],
180+
add_config_file_help=False,
159181
)

0 commit comments

Comments
 (0)