Skip to content

Commit 47c4c43

Browse files
author
Thomas Desveaux
committed
base_commands: improve download_fileset._find_matching_artifact for git artifacts
1 parent 0a0c5b8 commit 47c4c43

File tree

3 files changed

+229
-54
lines changed

3 files changed

+229
-54
lines changed

nimp/artifacts.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
from __future__ import annotations
2626

27-
import copy
2827
import datetime
2928
import hashlib
3029
import json
@@ -66,15 +65,18 @@
6665

6766
class Artifact(TypedDict):
6867
revision: str
69-
sortable_revision: str
7068
uri: str
7169

7270

7371
def _is_http_url(string: str) -> bool:
7472
return re.match(r'^http[s]?:\/\/.*$', string) is not None
7573

7674

77-
def list_artifacts(artifact_pattern: str, format_arguments: Mapping[str, Any], api_context) -> list[Artifact]:
75+
def list_artifacts(
76+
artifact_pattern: str,
77+
format_arguments: Mapping[str, Any],
78+
api_context_: nimp.utils.git.GitApiContext | None,
79+
) -> list[Artifact]:
7880
'''List all artifacts and their revision using the provided pattern after formatting'''
7981

8082
artifact_pattern = artifact_pattern.format_map(
@@ -103,17 +105,12 @@ def list_artifacts(artifact_pattern: str, format_arguments: Mapping[str, Any], a
103105
continue
104106

105107
group_revision = artifact_match.group('revision')
106-
sortable_revision = copy.deepcopy(group_revision)
107-
if api_context:
108-
sortable_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, group_revision)
109-
if sortable_revision is not None:
110-
all_artifacts.append(
111-
{
112-
'revision': group_revision,
113-
'sortable_revision': sortable_revision,
114-
'uri': file_uri,
115-
}
116-
)
108+
all_artifacts.append(
109+
{
110+
'revision': group_revision,
111+
'uri': file_uri,
112+
}
113+
)
117114
return all_artifacts
118115

119116

nimp/base_commands/download_fileset.py

Lines changed: 154 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,24 @@
2222

2323
'''Downloads a previously uploaded fileset to the local workspace'''
2424

25+
from __future__ import annotations
26+
2527
import copy
28+
import io
2629
import logging
2730
import os
2831
import shutil
32+
import subprocess
33+
import tempfile
34+
from pathlib import Path
2935
from pathlib import PurePosixPath
36+
from typing import Iterator
3037

3138
import nimp.artifacts
3239
import nimp.command
3340
import nimp.system
41+
from nimp.environment import Environment
42+
from nimp.utils import git
3443

3544

3645
class DownloadFileset(nimp.command.Command):
@@ -59,31 +68,35 @@ def configure_arguments(self, env, parser):
5968
def is_available(self, env):
6069
return True, ''
6170

62-
def run(self, env):
63-
api_context = nimp.utils.git.initialize_gitea_api_context(env)
71+
def run(self, env: Environment) -> bool:
72+
api_context = git.initialize_gitea_api_context(env)
6473

65-
artifacts_source = env.artifact_repository_source
74+
artifacts_source: str = env.artifact_repository_source
6675
if env.prefer_http:
6776
artifacts_http_source = getattr(env, 'artifact_http_repository_source', None)
6877
if artifacts_http_source:
6978
artifacts_source = artifacts_http_source
7079
else:
7180
logging.warning('prefer-http provided but no artifact_http_repository_source in configuration')
7281

73-
artifact_uri_pattern = artifacts_source.rstrip('/') + '/' + env.artifact_collection[env.fileset]
82+
artifact_uri_pattern: str = artifacts_source.rstrip('/') + '/' + str(env.artifact_collection[env.fileset])
7483

7584
install_directory = env.root_dir
7685
if env.destination:
7786
install_directory = str(PurePosixPath(install_directory) / env.format(env.destination))
7887

7988
format_arguments = copy.deepcopy(vars(env))
80-
format_arguments['revision'] = '*'
81-
logging.info('Searching %s', artifact_uri_pattern.format(**format_arguments))
89+
logging.info('Searching %s', artifact_uri_pattern.format_map({**format_arguments, 'revision': '*'}))
8290
all_artifacts = nimp.system.try_execute(
83-
lambda: nimp.artifacts.list_artifacts(artifact_uri_pattern, format_arguments, api_context), OSError
91+
lambda: nimp.artifacts.list_artifacts(artifact_uri_pattern, format_arguments, api_context),
92+
OSError,
8493
)
8594
artifact_to_download = DownloadFileset._find_matching_artifact(
86-
all_artifacts, env.revision, env.min_revision, env.max_revision, api_context
95+
all_artifacts,
96+
env.revision,
97+
env.min_revision,
98+
env.max_revision,
99+
api_context,
87100
)
88101

89102
logging.info('Downloading %s%s', artifact_to_download['uri'], ' (simulation)' if env.dry_run else '')
@@ -123,39 +136,140 @@ def run(self, env):
123136

124137
return True
125138

126-
# TODO: Handle revision comparison when identified by a hash
127139
@staticmethod
128-
def _find_matching_artifact(all_artifacts, exact_revision, minimum_revision, maximum_revision, api_context):
129-
all_artifacts = sorted(all_artifacts, key=lambda artifact: int(artifact['sortable_revision'], 16), reverse=True)
130-
has_revision_input = exact_revision or minimum_revision or maximum_revision
131-
132-
if api_context:
133-
exact_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, exact_revision)
134-
minimum_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, minimum_revision)
135-
maximum_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, maximum_revision)
136-
revision_not_found = not exact_revision and not minimum_revision and not maximum_revision
137-
if has_revision_input and revision_not_found:
138-
raise ValueError('Searched commit not found on gitea repo')
139-
140-
if not api_context and (has_revision_input is not None and not has_revision_input.isdigit()):
141-
raise ValueError(
142-
'Revision seems to be a git commit hash but missing gitea api information. Please check project_branches in project configuration.'
143-
)
140+
def _find_matching_artifact(
141+
all_artifacts: list[nimp.artifacts.Artifact],
142+
exact_revision: str | None,
143+
minimum_revision: str | None,
144+
maximum_revision: str | None,
145+
api_context: git.GitApiContext | None,
146+
) -> nimp.artifacts.Artifact:
147+
# fastpath for exact_revision
148+
if exact_revision is not None:
149+
if (artifact := next((a for a in all_artifacts if a['revision'] == exact_revision), None)) is not None:
150+
return artifact
151+
raise ValueError('Matching artifact not found')
152+
153+
# fastpath for maximum_revision
154+
if maximum_revision is not None:
155+
if (artifact := next((a for a in all_artifacts if a['revision'] == maximum_revision), None)) is not None:
156+
return artifact
157+
158+
if (
159+
any(git.maybe_git_revision(a['revision']) for a in all_artifacts)
160+
or (minimum_revision is not None and git.maybe_git_revision(minimum_revision))
161+
or (maximum_revision is not None and git.maybe_git_revision(maximum_revision))
162+
):
163+
if (
164+
newest_rev := DownloadFileset._get_newest_revision(
165+
revisions=[a['revision'] for a in all_artifacts],
166+
minimum_revision=minimum_revision,
167+
maximum_revision=maximum_revision,
168+
api_context=api_context,
169+
)
170+
) is not None:
171+
return next(a for a in all_artifacts if a['revision'] == newest_rev)
172+
173+
probably_p4_rev = all(a['revision'].isdigit() for a in all_artifacts)
174+
if probably_p4_rev:
175+
iter_: Iterator[int] = iter(int(a['revision']) for a in all_artifacts)
176+
if minimum_revision:
177+
minimum_revision_int = int(minimum_revision)
178+
iter_ = filter(lambda rev: rev >= minimum_revision_int, iter_)
179+
180+
if maximum_revision:
181+
maximum_revision_int = int(maximum_revision)
182+
iter_ = filter(lambda rev: rev <= maximum_revision_int, iter_)
183+
184+
if (revision := max(iter_, default=None)) is not None:
185+
revision_str = str(revision)
186+
return next(a for a in all_artifacts if a['revision'] == revision_str)
187+
188+
raise ValueError('Matching artifact not found')
189+
190+
@staticmethod
191+
def _get_newest_revision(
192+
revisions: list[str],
193+
minimum_revision: str | None,
194+
maximum_revision: str | None,
195+
api_context: git.GitApiContext | None,
196+
) -> str | None:
197+
remote: str | None = None
198+
if api_context is not None and (api_client := api_context['instance'].api_client) is not None:
199+
remote = f"{api_client.configuration.host}/{api_context['repo_owner']}/{api_context['repo_name']}"
200+
201+
cwd_git_dir = git.get_git_dir()
202+
203+
if remote is not None:
204+
with tempfile.TemporaryDirectory(prefix="nimp_git_") as tmp_git_dir:
205+
Path(tmp_git_dir).mkdir(parents=True, exist_ok=True)
206+
subprocess.check_call(['git', 'init', '--bare'], cwd=tmp_git_dir)
144207

145-
try:
146-
if exact_revision is not None:
147-
return next(a for a in all_artifacts if a['sortable_revision'] == exact_revision)
148-
if minimum_revision is not None and maximum_revision is not None:
149-
return next(
150-
a
151-
for a in all_artifacts
152-
if int(a['sortable_revision']) >= int(minimum_revision)
153-
and int(a['sortable_revision']) <= int(maximum_revision)
208+
# if current workdir contains a git repo, use it as alternate to prevent unnecessary burden on remote
209+
if cwd_git_dir is not None and git.is_shallow_repository(cwd_git_dir) is False:
210+
git.add_alternates(cwd_git_dir, cwd=tmp_git_dir)
211+
212+
return DownloadFileset._find_newest_revision(
213+
tmp_git_dir,
214+
revisions=revisions,
215+
minimum_revision=minimum_revision,
216+
maximum_revision=maximum_revision,
154217
)
155-
if minimum_revision is not None:
156-
return next(a for a in all_artifacts if int(a['sortable_revision']) >= int(minimum_revision))
218+
219+
elif cwd_git_dir is not None:
220+
# no remote, fallback to current git
221+
return DownloadFileset._find_newest_revision(
222+
cwd_git_dir,
223+
revisions=revisions,
224+
minimum_revision=minimum_revision,
225+
maximum_revision=maximum_revision,
226+
)
227+
228+
# no current git. Can't find revisions informations
229+
return None
230+
231+
@staticmethod
232+
def _find_newest_revision(
233+
git_dir: str, revisions: list[str], minimum_revision: str | None, maximum_revision: str | None
234+
):
235+
remotes = git.get_remotes(git_dir)
236+
237+
to_fetch = [*revisions]
238+
if minimum_revision is not None:
239+
to_fetch.append(minimum_revision)
240+
if maximum_revision is not None:
241+
to_fetch.append(maximum_revision)
242+
243+
for remote in remotes:
244+
if subprocess.call(['git', 'fetch', '--no-recurse-submodules', remote, *to_fetch], cwd=git_dir) != 0:
245+
# might have failed due to one (or more) unknown ref,
246+
# try one-by-one and ignore failures
247+
for rev in to_fetch:
248+
subprocess.call(['git', 'fetch', '--no-recurse-submodules', remote, rev], cwd=git_dir)
249+
250+
rev_list_cmd = ['git', 'rev-list', '--ignore-missing', *revisions]
251+
if maximum_revision is not None:
252+
rev_list_cmd.append(maximum_revision)
253+
254+
if minimum_revision is not None:
255+
rev_list_cmd.append(minimum_revision)
256+
257+
process = subprocess.run(rev_list_cmd, text=True, stdout=subprocess.PIPE, cwd=git_dir)
258+
assert process.returncode == 0
259+
# read line by line, no need to split all
260+
with io.StringIO(process.stdout) as buffer:
261+
revisions_set = set(revisions)
157262
if maximum_revision is not None:
158-
return next(a for a in all_artifacts if int(a['sortable_revision']) <= int(maximum_revision))
159-
return next(a for a in all_artifacts)
160-
except StopIteration:
161-
raise ValueError('Matching artifact not found')
263+
# should not happen, handled as a special case before
264+
assert maximum_revision not in revisions_set
265+
while revision := buffer.readline():
266+
if revision == maximum_revision:
267+
break
268+
269+
while revision := buffer.readline():
270+
if revision in revisions_set:
271+
return revision
272+
if revision == minimum_revision:
273+
break
274+
275+
return None

nimp/utils/git.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,12 @@
2525
from __future__ import annotations
2626

2727
import logging
28+
import os
29+
import subprocess
2830
import time
2931
from datetime import datetime
3032
from datetime import timezone
33+
from pathlib import Path
3134
from typing import TypedDict
3235

3336
import giteapy
@@ -86,6 +89,10 @@ def get_commit_version(commit_hash):
8689
return output
8790

8891

92+
def maybe_git_revision(candidate: str) -> bool:
93+
return candidate.isalnum() and candidate.islower()
94+
95+
8996
def is_full_sha1(string):
9097
'''cheap logic to check if we have full git commit sha1 string'''
9198
if len(string) != 40:
@@ -144,3 +151,60 @@ def get_gitea_commit_timestamp(gitea_context: GitApiContext, commit_sha):
144151
reason = str(e.reason).lower() if hasattr(e, 'reason') else ''
145152
logging.debug(f'[GITEA API] {gitea_context["repo_owner"]}@{gitea_context["repo_name"]}@{commit_sha} {reason}')
146153
return api_commit_timestamp
154+
155+
156+
def get_git_dir(cwd: os.PathLike[str] | str | None = None) -> str | None:
157+
process = subprocess.run(
158+
["git", "rev-parse", "--git-dir"],
159+
text=True,
160+
cwd=cwd,
161+
stdout=subprocess.PIPE,
162+
)
163+
if process.returncode == 0:
164+
return process.stdout.strip()
165+
166+
return None
167+
168+
169+
def is_shallow_repository(cwd: os.PathLike[str] | str | None = None) -> bool | None:
170+
process = subprocess.run(
171+
["git", "rev-parse", "--is-shallow-repository"],
172+
text=True,
173+
cwd=cwd,
174+
stdout=subprocess.PIPE,
175+
)
176+
if process.returncode == 0:
177+
return {"true": True, "false": False}.get(process.stdout.strip().lower())
178+
179+
return None
180+
181+
182+
def add_alternates(*alternates: str, cwd: os.PathLike[str] | str | None = None) -> None:
183+
git_dir = get_git_dir(cwd)
184+
if git_dir is None:
185+
return
186+
187+
if not Path(git_dir).is_dir():
188+
# git-dir might be a file pointing to the real git-dir
189+
# Ignore this case for now
190+
return
191+
192+
alternates_file = Path(git_dir, "objects/info/alternates")
193+
alternates_file.parent.mkdir(parents=True, exist_ok=True)
194+
195+
current_alternates = alternates_file.read_text().splitlines(keepends=False)
196+
197+
new_alternates = set(alternates).difference(current_alternates)
198+
199+
current_alternates.extend(new_alternates)
200+
201+
alternates_file.write_text('\n'.join(current_alternates))
202+
203+
204+
def get_remotes(cwd: os.PathLike[str] | str) -> list[str]:
205+
return subprocess.run(
206+
['git', 'remote'],
207+
text=True,
208+
cwd=cwd,
209+
stdout=subprocess.PIPE,
210+
).stdout.splitlines(keepends=False)

0 commit comments

Comments
 (0)