Skip to content

Commit 5b874ed

Browse files
committed
Compute attestable metadata before starting a database transaction
1 parent 881a3ee commit 5b874ed

File tree

2 files changed

+41
-26
lines changed

2 files changed

+41
-26
lines changed

atr/attestable.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -131,18 +131,30 @@ def migrate_to_paths_files() -> int:
131131
return count
132132

133133

134+
async def paths_to_hashes_and_sizes(directory: pathlib.Path) -> tuple[dict[str, str], dict[str, int]]:
135+
path_to_hash: dict[str, str] = {}
136+
path_to_size: dict[str, int] = {}
137+
async for rel_path in util.paths_recursive(directory):
138+
full_path = directory / rel_path
139+
path_key = str(rel_path)
140+
if "\\" in path_key:
141+
# TODO: We should centralise this, and forbid some other characters too
142+
raise ValueError(f"Backslash in path is forbidden: {path_key}")
143+
path_to_hash[path_key] = await compute_file_hash(full_path)
144+
path_to_size[path_key] = (await aiofiles.os.stat(full_path)).st_size
145+
return path_to_hash, path_to_size
146+
147+
134148
async def write(
135-
release_directory: pathlib.Path,
136149
project_name: str,
137150
version_name: str,
138151
revision_number: str,
139152
uploader_uid: str,
140-
parent_revision_number: str | None,
153+
previous: models.AttestableV1 | None,
154+
path_to_hash: dict[str, str],
155+
path_to_size: dict[str, int],
141156
) -> None:
142-
previous: models.AttestableV1 | None = None
143-
if parent_revision_number is not None:
144-
previous = await load(project_name, version_name, parent_revision_number)
145-
result = await _generate(release_directory, revision_number, uploader_uid, previous)
157+
result = _generate(path_to_hash, path_to_size, revision_number, uploader_uid, previous)
146158
file_path = attestable_path(project_name, version_name, revision_number)
147159
await util.atomic_write_file(file_path, result.model_dump_json(indent=2))
148160
paths_result = models.AttestablePathsV1(paths=result.paths)
@@ -185,34 +197,22 @@ def _compute_hashes_with_attribution(
185197
return new_hashes
186198

187199

188-
async def _generate(
189-
directory: pathlib.Path,
200+
def _generate(
201+
path_to_hash: dict[str, str],
202+
path_to_size: dict[str, int],
190203
revision_number: str,
191204
uploader_uid: str,
192205
previous: models.AttestableV1 | None,
193206
) -> models.AttestableV1:
194-
current_path_to_hash: dict[str, str] = {}
195207
current_hash_to_paths: dict[str, set[str]] = {}
196-
path_to_size: dict[str, int] = {}
197-
198-
async for rel_path in util.paths_recursive(directory):
199-
full_path = directory / rel_path
200-
path_key = str(rel_path)
201-
if "\\" in path_key:
202-
# TODO: We should centralise this, and forbid some other characters too
203-
raise ValueError(f"Backslash in path is forbidden: {path_key}")
204-
hash_ref = await compute_file_hash(full_path)
205-
file_size = (await aiofiles.os.stat(full_path)).st_size
206-
207-
current_path_to_hash[path_key] = hash_ref
208-
path_to_size[path_key] = file_size
208+
for path_key, hash_ref in path_to_hash.items():
209209
current_hash_to_paths.setdefault(hash_ref, set()).add(path_key)
210210

211211
new_hashes = _compute_hashes_with_attribution(
212212
current_hash_to_paths, path_to_size, previous, uploader_uid, revision_number
213213
)
214214

215215
return models.AttestableV1(
216-
paths=dict(current_path_to_hash),
216+
paths=dict(path_to_hash),
217217
hashes=dict(new_hashes),
218218
)

atr/storage/writers/revision.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def __init__(
105105
self.__committee_name = committee_name
106106

107107
@contextlib.asynccontextmanager
108-
async def create_and_manage(
108+
async def create_and_manage( # noqa: C901
109109
self,
110110
project_name: str,
111111
version_name: str,
@@ -165,6 +165,16 @@ async def create_and_manage(
165165
await aioshutil.rmtree(temp_dir)
166166
raise
167167

168+
try:
169+
path_to_hash, path_to_size = await attestable.paths_to_hashes_and_sizes(temp_dir_path)
170+
parent_revision_number = old_revision.number if old_revision else None
171+
previous_attestable = None
172+
if parent_revision_number is not None:
173+
previous_attestable = await attestable.load(project_name, version_name, parent_revision_number)
174+
except Exception:
175+
await aioshutil.rmtree(temp_dir)
176+
raise
177+
168178
async with SafeSession(temp_dir) as data:
169179
try:
170180
# This is the only place where models.Revision is constructed
@@ -206,9 +216,14 @@ async def create_and_manage(
206216
await aioshutil.rmtree(temp_dir)
207217
raise
208218

209-
parent_revision_number = old_revision.number if old_revision else None
210219
await attestable.write(
211-
new_revision_dir, project_name, version_name, new_revision.number, asf_uid, parent_revision_number
220+
project_name,
221+
version_name,
222+
new_revision.number,
223+
asf_uid,
224+
previous_attestable,
225+
path_to_hash,
226+
path_to_size,
212227
)
213228

214229
# Commit to end the transaction started by data.begin_immediate

0 commit comments

Comments
 (0)