Skip to content

Commit a9ad33c

Browse files
authored
Merge pull request #119 from aboutcode-org/116-cocoapods-pypi-support
Add cocoapods support to package.py
2 parents f0dc808 + 3b38ed8 commit a9ad33c

30 files changed

+53729
-388
lines changed

Diff for: src/fetchcode/package.py

+107-53
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,19 @@
3232
from fetchcode.package_util import GitHubSource
3333
from fetchcode.package_util import MiniupnpPackagesGitHubSource
3434
from fetchcode.package_util import OpenSSLGitHubSource
35+
from fetchcode.package_util import construct_cocoapods_package
36+
from fetchcode.package_util import get_cocoapod_tags
3537
from fetchcode.packagedcode_models import Package
38+
from fetchcode.utils import get_hashed_path
3639
from fetchcode.utils import get_response
3740

3841
router = Router()
3942

4043

4144
def info(url):
4245
"""
43-
Return data according to the `url` string
44-
`url` string can be purl too
46+
Return package metadata for a URL or PURL.
47+
Return None if there is no URL, or the URL or PURL is not supported.
4548
"""
4649
if url:
4750
try:
@@ -83,13 +86,7 @@ def get_cargo_data_from_purl(purl):
8386
crate = response.get("crate") or {}
8487
homepage_url = crate.get("homepage")
8588
code_view_url = crate.get("repository")
86-
yield Package(
87-
homepage_url=homepage_url,
88-
api_url=api_url,
89-
code_view_url=code_view_url,
90-
download_url=download_url,
91-
**purl.to_dict(),
92-
)
89+
9390
versions = response.get("versions", [])
9491
for version in versions:
9592
version_purl = PackageURL(type=purl.type, name=name, version=version.get("num"))
@@ -100,6 +97,9 @@ def get_cargo_data_from_purl(purl):
10097
download_url = None
10198
declared_license = version.get("license")
10299

100+
if purl.version and version_purl.version != purl.version:
101+
continue
102+
103103
yield Package(
104104
homepage_url=homepage_url,
105105
api_url=api_url,
@@ -109,6 +109,9 @@ def get_cargo_data_from_purl(purl):
109109
**version_purl.to_dict(),
110110
)
111111

112+
if purl.version:
113+
break
114+
112115

113116
@router.route("pkg:npm/.*")
114117
def get_npm_data_from_purl(purl):
@@ -120,39 +123,30 @@ def get_npm_data_from_purl(purl):
120123
name = purl.name
121124
version = purl.version
122125
api_url = f"{base_path}/{name}"
126+
123127
response = get_response(api_url)
124128
vcs_data = response.get("repository") or {}
125129
bugs = response.get("bugs") or {}
126-
127130
download_url = f"{base_path}/{name}/-/{name}-{version}.tgz" if version else None
128131
vcs_url = vcs_data.get("url")
129132
bug_tracking_url = bugs.get("url")
130133
license = response.get("license")
131134
homepage_url = response.get("homepage")
132135

133-
yield Package(
134-
homepage_url=homepage_url,
135-
api_url=api_url,
136-
vcs_url=vcs_url,
137-
bug_tracking_url=bug_tracking_url,
138-
download_url=download_url,
139-
declared_license=license,
140-
**purl.to_dict(),
141-
)
142-
143136
versions = response.get("versions", [])
144-
tags = []
145137
for num in versions:
146138
version = versions[num]
147139
version_purl = PackageURL(type=purl.type, name=name, version=version.get("version"))
148140
repository = version.get("repository") or {}
149141
bugs = response.get("bugs") or {}
150142
dist = version.get("dist") or {}
151-
licenses = version.get("licenses") or [{}]
152143
vcs_url = repository.get("url")
153144
download_url = dist.get("tarball")
154145
bug_tracking_url = bugs.get("url")
155-
declared_license = licenses[0].get("type")
146+
declared_license = license
147+
148+
if purl.version and version_purl.version != purl.version:
149+
continue
156150

157151
yield Package(
158152
homepage_url=homepage_url,
@@ -164,6 +158,9 @@ def get_npm_data_from_purl(purl):
164158
**version_purl.to_dict(),
165159
)
166160

161+
if purl.version:
162+
break
163+
167164

168165
@router.route("pkg:pypi/.*")
169166
def get_pypi_data_from_purl(purl):
@@ -172,6 +169,7 @@ def get_pypi_data_from_purl(purl):
172169
"""
173170
purl = PackageURL.from_string(purl)
174171
name = purl.name
172+
175173
base_path = "https://pypi.org/pypi"
176174
api_url = f"{base_path}/{name}/json"
177175
response = get_response(api_url)
@@ -182,19 +180,14 @@ def get_pypi_data_from_purl(purl):
182180
project_urls = info.get("project_urls") or {}
183181
code_view_url = get_pypi_codeview_url(project_urls)
184182
bug_tracking_url = get_pypi_bugtracker_url(project_urls)
185-
yield Package(
186-
homepage_url=homepage_url,
187-
api_url=api_url,
188-
bug_tracking_url=bug_tracking_url,
189-
code_view_url=code_view_url,
190-
declared_license=license,
191-
**purl.to_dict(),
192-
)
183+
193184
for num in releases:
194185
version_purl = PackageURL(type=purl.type, name=name, version=num)
195186
release = releases.get(num) or [{}]
196187
release = release[0]
197188
download_url = release.get("url")
189+
if purl.version and version_purl.version != purl.version:
190+
continue
198191
yield Package(
199192
homepage_url=homepage_url,
200193
api_url=api_url,
@@ -205,6 +198,9 @@ def get_pypi_data_from_purl(purl):
205198
**version_purl.to_dict(),
206199
)
207200

201+
if purl.version:
202+
break
203+
208204

209205
@router.route("pkg:github/.*")
210206
def get_github_data_from_purl(purl):
@@ -291,24 +287,24 @@ def get_bitbucket_data_from_purl(purl):
291287
bitbucket_url = "https://bitbucket.org"
292288
bug_tracking_url = f"{bitbucket_url}/{namespace}/{name}/issues"
293289
code_view_url = f"{bitbucket_url}/{namespace}/{name}"
294-
yield Package(
295-
api_url=api_url,
296-
bug_tracking_url=bug_tracking_url,
297-
code_view_url=code_view_url,
298-
**purl.to_dict(),
299-
)
290+
300291
links = response.get("links") or {}
301292
tags_url = links.get("tags") or {}
302293
tags_url = tags_url.get("href")
303294
if not tags_url:
304295
return []
305296
tags_data = get_response(tags_url)
306297
tags = tags_data.get("values") or {}
298+
307299
for tag in tags:
308300
version = tag.get("name") or ""
309301
version_purl = PackageURL(type=purl.type, namespace=namespace, name=name, version=version)
310302
download_url = f"{base_path}/{namespace}/{name}/downloads/{name}-{version}.tar.gz"
311303
code_view_url = f"{bitbucket_url}/{namespace}/{name}/src/{version}"
304+
305+
if purl.version and version_purl.version != purl.version:
306+
continue
307+
312308
yield Package(
313309
api_url=api_url,
314310
bug_tracking_url=bug_tracking_url,
@@ -317,6 +313,9 @@ def get_bitbucket_data_from_purl(purl):
317313
**version_purl.to_dict(),
318314
)
319315

316+
if purl.version:
317+
break
318+
320319

321320
@router.route("pkg:rubygems/.*")
322321
def get_rubygems_data_from_purl(purl):
@@ -325,22 +324,38 @@ def get_rubygems_data_from_purl(purl):
325324
"""
326325
purl = PackageURL.from_string(purl)
327326
name = purl.name
328-
api_url = f"https://rubygems.org/api/v1/gems/{name}.json"
329-
response = get_response(api_url)
330-
declared_license = response.get("licenses") or None
331-
homepage_url = response.get("homepage_uri")
332-
code_view_url = response.get("source_code_uri")
333-
bug_tracking_url = response.get("bug_tracker_uri")
334-
download_url = response.get("gem_uri")
335-
yield Package(
336-
homepage_url=homepage_url,
337-
api_url=api_url,
338-
bug_tracking_url=bug_tracking_url,
339-
code_view_url=code_view_url,
340-
declared_license=declared_license,
341-
download_url=download_url,
342-
**purl.to_dict(),
343-
)
327+
all_versions_url = f"https://rubygems.org/api/v1/versions/{name}.json"
328+
all_versions = get_response(all_versions_url)
329+
330+
for vers in all_versions:
331+
version_purl = PackageURL(type=purl.type, name=name, version=vers.get("number"))
332+
333+
if purl.version and version_purl.version != purl.version:
334+
continue
335+
336+
number = vers.get("number")
337+
version_api = f"https://rubygems.org/api/v2/rubygems/{name}/versions/{number}.json"
338+
version_api_response = get_response(version_api)
339+
declared_license = version_api_response.get("licenses") or None
340+
homepage_url = version_api_response.get("homepage_uri")
341+
code_view_url = version_api_response.get("source_code_uri")
342+
bug_tracking_url = version_api_response.get("bug_tracker_uri")
343+
download_url = version_api_response.get("gem_uri")
344+
repository_homepage_url = version_api_response.get("project_uri")
345+
346+
yield Package(
347+
homepage_url=homepage_url,
348+
api_url=version_api,
349+
bug_tracking_url=bug_tracking_url,
350+
code_view_url=code_view_url,
351+
declared_license=declared_license,
352+
download_url=download_url,
353+
repository_homepage_url=repository_homepage_url,
354+
**version_purl.to_dict(),
355+
)
356+
357+
if purl.version:
358+
break
344359

345360

346361
@router.route("pkg:gnu/.*")
@@ -354,6 +369,45 @@ def get_gnu_data_from_purl(purl):
354369
yield from extract_packages_from_listing(purl, source_archive_url, version_regex, [])
355370

356371

372+
@router.route("pkg:cocoapods/.*")
373+
def get_cocoapods_data_from_purl(purl):
374+
purl = PackageURL.from_string(purl)
375+
name = purl.name
376+
cocoapods_org_url = f"https://cocoapods.org/pods/{name}"
377+
api = "https://cdn.cocoapods.org"
378+
hashed_path = get_hashed_path(name)
379+
hashed_path_underscore = hashed_path.replace("/", "_")
380+
file_prefix = "all_pods_versions_"
381+
spec = f"{api}/{file_prefix}{hashed_path_underscore}.txt"
382+
data_list = get_cocoapod_tags(spec, name)
383+
384+
for tag in data_list:
385+
version_purl = PackageURL(type=purl.type, name=name, version=tag)
386+
if purl.version and version_purl.version != purl.version:
387+
continue
388+
389+
gh_repo_owner = None
390+
gh_repo_name = name
391+
podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{name}/{tag}/{name}.podspec.json"
392+
podspec_api_response = get_response(podspec_api_url)
393+
podspec_homepage = podspec_api_response.get("homepage")
394+
395+
if podspec_homepage.startswith("https://github.com/"):
396+
podspec_homepage_remove_gh_prefix = podspec_homepage.replace("https://github.com/", "")
397+
podspec_homepage_split = podspec_homepage_remove_gh_prefix.split("/")
398+
gh_repo_owner = podspec_homepage_split[0]
399+
gh_repo_name = podspec_homepage_split[-1]
400+
401+
tag_pkg = construct_cocoapods_package(
402+
version_purl, name, hashed_path, cocoapods_org_url, gh_repo_owner, gh_repo_name, tag
403+
)
404+
405+
yield tag_pkg
406+
407+
if purl.version:
408+
break
409+
410+
357411
@dataclasses.dataclass
358412
class DirectoryListedSource:
359413
source_url: str = dataclasses.field(

Diff for: src/fetchcode/package_util.py

+98
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,101 @@ def get_package_info(cls, gh_purl, package_name):
289289
# Since there will be no new releases of ipkg, it's better to
290290
# store them in a dictionary rather than fetching them every time.
291291
IPKG_RELEASES = json.loads((DATA / "ipkg_releases.json").read_text(encoding="UTF-8"))
292+
293+
294+
def get_cocoapod_tags(spec, name):
295+
try:
296+
response = utils.get_text_response(spec)
297+
data = response.strip()
298+
for line in data.splitlines():
299+
line = line.strip()
300+
if line.startswith(name):
301+
data_list = line.split("/")
302+
if data_list[0] == name:
303+
data_list.pop(0)
304+
return data_list
305+
return None
306+
except:
307+
return None
308+
309+
310+
def construct_cocoapods_package(
311+
purl, name, hashed_path, cocoapods_org_url, gh_repo_owner, gh_repo_name, tag
312+
):
313+
name = name
314+
homepage_url = None
315+
vcs_url = None
316+
github_url = None
317+
bug_tracking_url = None
318+
code_view_url = None
319+
license_data = None
320+
declared_license = None
321+
primary_language = None
322+
323+
if gh_repo_owner and gh_repo_name:
324+
base_path = "https://api.github.com/repos"
325+
api_url = f"{base_path}/{gh_repo_owner}/{gh_repo_name}"
326+
gh_repo_api_response = utils.get_github_rest(api_url)
327+
gh_repo_api_head_request = utils.make_head_request(api_url)
328+
gh_repo_api_status_code = gh_repo_api_head_request.status_code
329+
330+
if gh_repo_api_status_code == 200:
331+
homepage_url = gh_repo_api_response.get("homepage")
332+
vcs_url = gh_repo_api_response.get("git_url")
333+
license_data = gh_repo_api_response.get("license") or {}
334+
declared_license = license_data.get("spdx_id")
335+
primary_language = gh_repo_api_response.get("language")
336+
337+
github_url = "https://github.com"
338+
bug_tracking_url = f"{github_url}/{gh_repo_owner}/{gh_repo_name}/issues"
339+
code_view_url = f"{github_url}/{gh_repo_owner}/{gh_repo_name}"
340+
341+
podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{name}/{tag}/{name}.podspec.json"
342+
podspec_api_response = utils.get_response(podspec_api_url)
343+
homepage_url = podspec_api_response.get("homepage")
344+
345+
lic = podspec_api_response.get("license")
346+
extracted_license_statement = None
347+
if isinstance(lic, dict):
348+
extracted_license_statement = lic
349+
else:
350+
extracted_license_statement = lic
351+
if not declared_license:
352+
declared_license = extracted_license_statement
353+
354+
source = podspec_api_response.get("source")
355+
download_url = None
356+
if isinstance(source, dict):
357+
git_url = source.get("git", "")
358+
http_url = source.get("http", "")
359+
if http_url:
360+
download_url = http_url
361+
if git_url and not http_url:
362+
if git_url.endswith(".git") and git_url.startswith("https://github.com/"):
363+
gh_path = git_url[:-4]
364+
github_tag = source.get("tag")
365+
if github_tag and github_tag.startswith("v"):
366+
tag = github_tag
367+
download_url = f"{gh_path}/archive/refs/tags/{tag}.tar.gz"
368+
vcs_url = git_url
369+
elif git_url:
370+
vcs_url = git_url
371+
elif isinstance(source, str):
372+
if not vcs_url:
373+
vcs_url = source
374+
375+
purl_pkg = Package(
376+
homepage_url=homepage_url,
377+
api_url=podspec_api_url,
378+
bug_tracking_url=bug_tracking_url,
379+
code_view_url=code_view_url,
380+
download_url=download_url,
381+
declared_license=declared_license,
382+
primary_language=primary_language,
383+
repository_homepage_url=cocoapods_org_url,
384+
vcs_url=vcs_url,
385+
**purl.to_dict(),
386+
)
387+
purl_pkg.version = tag
388+
389+
return purl_pkg

0 commit comments

Comments
 (0)