Skip to content

Commit b6649c2

Browse files
committed
fixup(FindSources): switch to new approach
* replace all calls to get_matching_tag to get_matching_source_url (3 occurances) * rewrote get_github_info() and check_for_github_error() to raise a NotImplementedError. This will help consumers of these methods notify the change in capycli. In capycli, these methods are not used outside of FindSources. * refactored the unittest test_find_golang_url_github for historical reasons: Due to a bug in the unittest it was the first test to break when I integrated the new approach. Through this the test became my workhorse for integrating the new approach.
1 parent 669e0b9 commit b6649c2

File tree

2 files changed

+68
-49
lines changed

2 files changed

+68
-49
lines changed

capycli/bom/findsources.py

Lines changed: 16 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -200,27 +200,14 @@ def get_repo_name(github_url: str) -> str:
200200
@staticmethod
201201
def get_github_info(repository_url: str, username: str = "",
202202
token: str = "") -> get_github_info_type:
203+
"""This method used to iterate through all resource pages of
204+
GitHub's /tags API, collect the results, then return a huge
205+
list with all results.
206+
Removed because this approach does not scale well and we did
207+
encounter projects with tens of thousands of tags.
203208
"""
204-
Query tag infos from GitHub.
205-
206-
In the good case a list of tags entries (= dictionaries) is returned.
207-
In the bad case a JSON error message is returned.
208-
"""
209-
length_per_page = 100
210-
page = 1
211-
tags: List[Dict[str, Any]] = []
212-
tag_url = "https://api.github.com/repos/" + repository_url + "/tags"
213-
query = "?per_page=%s&page=%s" % (length_per_page, page)
214-
tmp = FindSources.github_request(tag_url + query, username, token)
215-
if not isinstance(tmp, list):
216-
return tags
217-
tags.extend(tmp)
218-
while len(tmp) == length_per_page:
219-
page += 1
220-
query = "?per_page=%s&page=%s" % (length_per_page, page)
221-
tmp = FindSources.github_request(tag_url + query, username, token)
222-
tags.extend(tmp)
223-
return tags
209+
raise NotImplementedError(
210+
"Removed with introduction of get_matchting_source_tag!")
224211

225212
def _get_github_repo(self, github_ref: str) -> Dict[str, Any]:
226213
"""Fetch GitHub API object identified by @github_ref.
@@ -378,7 +365,7 @@ def find_github_url(self, component: Component, use_language: bool = True) -> st
378365
if len(name_match):
379366
for match in name_match:
380367
tag_info = self.github_request(match["tags_url"], self.github_name, self.github_token)
381-
source_url = self.get_matching_tag(tag_info, component.version or "", match["html_url"])
368+
source_url = self.get_matching_source_url(component.version, match["tags_url"])
382369
if len(name_match) == 1:
383370
return source_url
384371
elif source_url:
@@ -445,10 +432,7 @@ def find_golang_url(self, component: Component) -> str:
445432

446433
if repository_name.startswith("https://github.com/"):
447434
repository_name = repository_name[len("https://github.com/"):]
448-
tag_info = self.get_github_info(repository_name, self.github_name, self.github_token)
449-
tag_info_checked = self.check_for_github_error(tag_info)
450-
source_url = self.get_matching_tag(tag_info_checked, component_version,
451-
repository_name, version_prefix or "")
435+
source_url = self.get_matching_source_url(component_version, repository_name, version_prefix)
452436

453437
# component["RepositoryUrl"] = repository_name
454438
return source_url
@@ -468,26 +452,15 @@ def get_github_source_url(self, github_url: str, version: str) -> str:
468452

469453
if self.verbose:
470454
print_text(" repo_name:", repo_name)
471-
472-
tag_info = self.get_github_info(repo_name, self.github_name, self.github_token)
473-
tag_info_checked = self.check_for_github_error(tag_info)
474-
return self.get_matching_tag(tag_info_checked, version, github_url)
455+
return self.get_matching_source_url(version, repo_name)
475456

476457
def check_for_github_error(self, tag_info: get_github_info_type) -> List[Dict[str, Any]]:
477-
if isinstance(tag_info, list):
478-
# assume valid answer
479-
return tag_info
480-
481-
# check for 'rate limit exceeded' message
482-
if "message" in tag_info:
483-
if tag_info["message"].startswith("API rate limit exceeded"):
484-
print_red("GitHub API rate limit exceeded - aborting!")
485-
sys.exit(ResultCode.RESULT_ERROR_ACCESSING_SERVICE)
486-
if tag_info["message"].startswith("Bad credentials"):
487-
print_red("Invalid GitHub credential provided - aborting!")
488-
sys.exit(ResultCode.RESULT_ERROR_ACCESSING_SERVICE)
489-
490-
return []
458+
"""This method was introduced to check the output of
459+
get_github_info() for errors.
460+
Removed, becasue get_github_info was removed.
461+
"""
462+
raise NotImplementedError(
463+
"Removed with introduction of get_matchting_source_tag!")
491464

492465
def get_matching_tag(self, tag_info: List[Dict[str, Any]], version: str, github_url: str,
493466
version_prefix: str = "") -> str:

tests/test_find_sources.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -417,19 +417,65 @@ def test_get_pkg_go_repo_url_error(self, mock_requests_get: Any) -> None:
417417
repo_url = find_sources.get_pkg_go_repo_url('some/package')
418418
self.assertEqual(repo_url, 'https://pkg.go.dev/some/package')
419419

420-
@patch('capycli.bom.findsources.FindSources.get_github_info')
420+
@patch('capycli.bom.findsources.FindSources.get_matching_source_url')
421+
@patch('capycli.bom.findsources.FindSources.get_pkg_go_repo_url')
421422
@patch('capycli.bom.findsources.FindSources.get_matching_tag')
422-
def test_find_golang_url_github(self, mock_get_github_info: Any, mock_get_matching_tag: Any) -> None:
423+
@patch('capycli.bom.findsources.FindSources.get_github_info')
424+
def test_find_golang_url_github(self,
425+
mock_get_github_info: Any,
426+
mock_get_matching_tag: Any,
427+
mock_get_pkg_go_repo_url: Any,
428+
mock_get_matching_source_url: Any,
429+
) -> None:
423430
# Mocking a GitHub scenario
424-
mock_get_github_info.return_value = 'https://pkg.go.dev/github.com/opencontainers/runc'
425-
mock_get_matching_tag.return_value = 'https://github.com/opencontainers/runc/archive/refs/tags/v1.0.1.zip'
431+
runc = { # real data as of 2024-11-18
432+
'html_url': 'https://github.com/opencontainers/runc',
433+
'zipball_url': 'https://github.com/opencontainers/runc/archive/refs/tags/v1.0.1.zip',
434+
}
435+
mock_get_github_info.return_value = []
436+
mock_get_matching_tag.return_value = runc['zipball_url']
437+
mock_get_pkg_go_repo_url.return_value = runc['html_url']
438+
mock_get_matching_source_url.return_value = runc['zipball_url']
426439
find_sources = FindSources()
427440
component = MagicMock()
428441
component.name = 'github.com/opencontainers/runc'
429442
component.version = 'v1.0.1'
430-
source_url = find_sources.find_golang_url(component)
431443

432-
self.assertEqual(source_url, 'https://pkg.go.dev/github.com/opencontainers/runc')
444+
# semantic versioning, sunshine and rainbows
445+
source_url = find_sources.find_golang_url(component)
446+
self.assertEqual(source_url, runc['zipball_url'])
447+
448+
# version with +incompatible
449+
with patch.object(component, 'version', new='v1.0.1+incompatible'):
450+
source_url = find_sources.find_golang_url(component)
451+
self.assertEqual(source_url, runc['zipball_url'])
452+
453+
# '-'-separated version with commit id
454+
with patch.object(component, 'version', new='foo-bar-ThisIsACommitId'):
455+
source_url = find_sources.find_golang_url(component)
456+
self.assertEqual(source_url,
457+
runc['html_url'] + '/archive/ThisIsACommitId.zip')
458+
459+
# component name w/o github.com
460+
# with patch.object(component, 'name', new='opencontainers/runc'):
461+
# no point in testing because in this case get_pkg_go_repo_url
462+
# would return an empty string and we would never reach the
463+
# corresponding test in find_golang_url()
464+
# There is test_find_golang_url_non_github() ...
465+
466+
# missing data for remaining tests:
467+
# - opencontainers/runc/<version_prefix>
468+
# - component.name.startswith('gopkg.in')
469+
# - component.name.startswith('https://github.com')
470+
# I think these would also fail at find_golang_url()
471+
472+
# WARNING it would seem when patch()-ing a MagicMock the change
473+
# is not (always) reversed properly. Therefore this test
474+
# goes last!
475+
# not published on pkg.go.dev
476+
with patch.object(mock_get_pkg_go_repo_url, 'return_value', new=''):
477+
source_url = find_sources.find_golang_url(component)
478+
self.assertEqual(source_url, '')
433479

434480
def test_find_golang_url_non_github(self) -> None:
435481
# Mocking a non-GitHub scenario

0 commit comments

Comments
 (0)