Skip to content

Commit fa4a00a

Browse files
authored
Check for CREATE/DROP INDEX in schema deltas (#18440)
As these should be background updates.
1 parent 7d4c3b6 commit fa4a00a

File tree

2 files changed

+99
-29
lines changed

2 files changed

+99
-29
lines changed

changelog.d/18440.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add lint to ensure we don't add a `CREATE/DROP INDEX` in a schema delta.

scripts-dev/check_schema_delta.py

Lines changed: 98 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/usr/bin/env python3
22

33
# Check that no schema deltas have been added to the wrong version.
4+
#
5+
# Also checks that schema deltas do not try and create or drop indices.
46

57
import re
68
from typing import Any, Dict, List
@@ -9,6 +11,13 @@
911
import git
1012

1113
SCHEMA_FILE_REGEX = re.compile(r"^synapse/storage/schema/(.*)/delta/(.*)/(.*)$")
14+
INDEX_CREATION_REGEX = re.compile(r"CREATE .*INDEX .*ON ([a-z_]+)", flags=re.IGNORECASE)
15+
INDEX_DELETION_REGEX = re.compile(r"DROP .*INDEX ([a-z_]+)", flags=re.IGNORECASE)
16+
TABLE_CREATION_REGEX = re.compile(r"CREATE .*TABLE ([a-z_]+)", flags=re.IGNORECASE)
17+
18+
# The base branch we want to check against. We use the main development branch
19+
# on the assumption that is what we are developing against.
20+
DEVELOP_BRANCH = "develop"
1221

1322

1423
@click.command()
@@ -20,6 +29,9 @@
2029
help="Always output ANSI colours",
2130
)
2231
def main(force_colors: bool) -> None:
32+
# Return code. Set to non-zero when we encounter an error
33+
return_code = 0
34+
2335
click.secho(
2436
"+++ Checking schema deltas are in the right folder",
2537
fg="green",
@@ -30,17 +42,17 @@ def main(force_colors: bool) -> None:
3042
click.secho("Updating repo...")
3143

3244
repo = git.Repo()
33-
repo.remote().fetch()
45+
repo.remote().fetch(refspec=DEVELOP_BRANCH)
3446

3547
click.secho("Getting current schema version...")
3648

37-
r = repo.git.show("origin/develop:synapse/storage/schema/__init__.py")
49+
r = repo.git.show(f"origin/{DEVELOP_BRANCH}:synapse/storage/schema/__init__.py")
3850

3951
locals: Dict[str, Any] = {}
4052
exec(r, locals)
4153
current_schema_version = locals["SCHEMA_VERSION"]
4254

43-
diffs: List[git.Diff] = repo.remote().refs.develop.commit.diff(None)
55+
diffs: List[git.Diff] = repo.remote().refs[DEVELOP_BRANCH].commit.diff(None)
4456

4557
# Get the schema version of the local file to check against current schema on develop
4658
with open("synapse/storage/schema/__init__.py") as file:
@@ -53,7 +65,7 @@ def main(force_colors: bool) -> None:
5365
# local schema version must be +/-1 the current schema version on develop
5466
if abs(local_schema_version - current_schema_version) != 1:
5567
click.secho(
56-
"The proposed schema version has diverged more than one version from develop, please fix!",
68+
f"The proposed schema version has diverged more than one version from {DEVELOP_BRANCH}, please fix!",
5769
fg="red",
5870
bold=True,
5971
color=force_colors,
@@ -67,21 +79,28 @@ def main(force_colors: bool) -> None:
6779
click.secho(f"Current schema version: {current_schema_version}")
6880

6981
seen_deltas = False
70-
bad_files = []
82+
bad_delta_files = []
83+
changed_delta_files = []
7184
for diff in diffs:
72-
if not diff.new_file or diff.b_path is None:
85+
if diff.b_path is None:
86+
# We don't lint deleted files.
7387
continue
7488

7589
match = SCHEMA_FILE_REGEX.match(diff.b_path)
7690
if not match:
7791
continue
7892

93+
changed_delta_files.append(diff.b_path)
94+
95+
if not diff.new_file:
96+
continue
97+
7998
seen_deltas = True
8099

81100
_, delta_version, _ = match.groups()
82101

83102
if delta_version != str(current_schema_version):
84-
bad_files.append(diff.b_path)
103+
bad_delta_files.append(diff.b_path)
85104

86105
if not seen_deltas:
87106
click.secho(
@@ -92,41 +111,91 @@ def main(force_colors: bool) -> None:
92111
)
93112
return
94113

95-
if not bad_files:
114+
if bad_delta_files:
115+
bad_delta_files.sort()
116+
96117
click.secho(
97-
f"All deltas are in the correct folder: {current_schema_version}!",
98-
fg="green",
118+
"Found deltas in the wrong folder!",
119+
fg="red",
99120
bold=True,
100121
color=force_colors,
101122
)
102-
return
103123

104-
bad_files.sort()
105-
106-
click.secho(
107-
"Found deltas in the wrong folder!",
108-
fg="red",
109-
bold=True,
110-
color=force_colors,
111-
)
124+
for f in bad_delta_files:
125+
click.secho(
126+
f"\t{f}",
127+
fg="red",
128+
bold=True,
129+
color=force_colors,
130+
)
112131

113-
for f in bad_files:
132+
click.secho()
114133
click.secho(
115-
f"\t{f}",
134+
f"Please move these files to delta/{current_schema_version}/",
116135
fg="red",
117136
bold=True,
118137
color=force_colors,
119138
)
120139

121-
click.secho()
122-
click.secho(
123-
f"Please move these files to delta/{current_schema_version}/",
124-
fg="red",
125-
bold=True,
126-
color=force_colors,
127-
)
140+
else:
141+
click.secho(
142+
f"All deltas are in the correct folder: {current_schema_version}!",
143+
fg="green",
144+
bold=True,
145+
color=force_colors,
146+
)
128147

129-
click.get_current_context().exit(1)
148+
# Make sure we process them in order. This sort works because deltas are numbered
149+
# and delta files are also numbered in order.
150+
changed_delta_files.sort()
151+
152+
# Now check that we're not trying to create or drop indices. If we want to
153+
# do that they should be in background updates. The exception is when we
154+
# create indices on tables we've just created.
155+
created_tables = set()
156+
for delta_file in changed_delta_files:
157+
with open(delta_file) as fd:
158+
delta_lines = fd.readlines()
159+
160+
for line in delta_lines:
161+
# Strip SQL comments
162+
line = line.split("--", maxsplit=1)[0]
163+
164+
# Check and track any tables we create
165+
match = TABLE_CREATION_REGEX.search(line)
166+
if match:
167+
table_name = match.group(1)
168+
created_tables.add(table_name)
169+
170+
# Check for dropping indices, these are always banned
171+
match = INDEX_DELETION_REGEX.search(line)
172+
if match:
173+
clause = match.group()
174+
175+
click.secho(
176+
f"Found delta with index deletion: '{clause}' in {delta_file}\nThese should be in background updates.",
177+
fg="red",
178+
bold=True,
179+
color=force_colors,
180+
)
181+
return_code = 1
182+
183+
# Check for index creation, which is only allowed for tables we've
184+
# created.
185+
match = INDEX_CREATION_REGEX.search(line)
186+
if match:
187+
clause = match.group()
188+
table_name = match.group(1)
189+
if table_name not in created_tables:
190+
click.secho(
191+
f"Found delta with index creation: '{clause}' in {delta_file}\nThese should be in background updates.",
192+
fg="red",
193+
bold=True,
194+
color=force_colors,
195+
)
196+
return_code = 1
197+
198+
click.get_current_context().exit(return_code)
130199

131200

132201
if __name__ == "__main__":

0 commit comments

Comments
 (0)