Skip to content

Commit b9b3391

Browse files
committed
Refactor swift
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent f6c9d06 commit b9b3391

File tree

4 files changed

+86
-106
lines changed

4 files changed

+86
-106
lines changed

minecode_pipelines/miners/swift.py

Lines changed: 0 additions & 99 deletions
This file was deleted.

minecode_pipelines/pipelines/mine_swift.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def packages_count(self):
7979

8080
def mine_packageurls(self):
8181
self.swift_packages_urls = load_swift_package_urls(swift_index_repo=self.swift_index_repo)
82+
self.log(f"Total Swift packages to process: {len(self.swift_packages_urls)}")
8283
return mine_swift_packageurls(
8384
packages_urls=self.swift_packages_urls,
8485
start_index=self.start_index,

minecode_pipelines/pipes/swift.py

Lines changed: 84 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
from pathlib import Path
2525
from packageurl import PackageURL
2626

27-
from minecode_pipelines.miners.swift import fetch_git_tags_raw
28-
from minecode_pipelines.miners.swift import get_tags_and_commits_from_git_output
29-
from minecode_pipelines.miners.swift import split_org_repo
3027
from minecode_pipelines.utils import cycle_from_index, grouper
28+
import shutil
29+
import subprocess
30+
from urllib.parse import urlparse
3131

3232
PACKAGE_BATCH_SIZE = 100
3333

@@ -39,10 +39,10 @@ def mine_swift_packageurls(packages_urls, start_index, logger):
3939
for batch_index, package_batch in enumerate(
4040
grouper(n=PACKAGE_BATCH_SIZE, iterable=packages_iter)
4141
):
42-
for item in package_batch:
43-
if not item:
42+
for package_repo_url in package_batch:
43+
if not package_repo_url:
4444
continue
45-
package_repo_url = item
45+
logger(f"Processing package repo URL: {package_repo_url}")
4646
git_ls_remote = fetch_git_tags_raw(package_repo_url, 60, logger)
4747
if not git_ls_remote:
4848
continue
@@ -83,3 +83,81 @@ def generate_package_urls(package_repo_url, tags_and_commits):
8383
updated_purls.append(purl)
8484

8585
return base_purl, updated_purls
86+
87+
88+
def is_safe_repo_url(repo_url: str) -> bool:
89+
"""Return True if the URL is HTTPS GitHub with .git suffix or has at least two path segments."""
90+
parsed = urlparse(repo_url)
91+
return (
92+
parsed.scheme == "https" and parsed.netloc == "github.com" and parsed.path.endswith(".git")
93+
)
94+
95+
96+
def fetch_git_tags_raw(repo_url: str, timeout: int = 60, logger=None) -> str | None:
97+
"""Run `git ls-remote` on a GitHub repo and return raw output, or None on error."""
98+
git_executable = shutil.which("git")
99+
if git_executable is None:
100+
logger("Git executable not found in PATH")
101+
return None
102+
103+
if not is_safe_repo_url(repo_url):
104+
raise ValueError(f"Unsafe repo URL: {repo_url}")
105+
106+
try:
107+
result = subprocess.run( # NOQA
108+
[git_executable, "ls-remote", repo_url],
109+
capture_output=True,
110+
text=True,
111+
check=True,
112+
timeout=timeout,
113+
)
114+
return result.stdout.strip()
115+
except subprocess.CalledProcessError as e:
116+
logger(f"Failed to fetch tags for {repo_url}: {e}")
117+
except subprocess.TimeoutExpired:
118+
logger(f"Timeout fetching tags for {repo_url}")
119+
return None
120+
121+
122+
# FIXME duplicated with miners github
123+
def split_org_repo(url_like):
124+
"""
125+
Given a URL-like string to a GitHub repo or a repo name as in org/name,
126+
split and return the org and name.
127+
128+
For example:
129+
>>> split_org_repo('foo/bar')
130+
('foo', 'bar')
131+
>>> split_org_repo('https://api.github.com/repos/foo/bar/')
132+
('foo', 'bar')
133+
>>> split_org_repo('github.com/foo/bar/')
134+
('foo', 'bar')
135+
>>> split_org_repo('git://github.com/foo/bar.git')
136+
('foo', 'bar')
137+
"""
138+
segments = [s.strip() for s in url_like.split("/") if s.strip()]
139+
if not len(segments) >= 2:
140+
raise ValueError(f"Not a GitHub-like URL: {url_like}")
141+
org = segments[-2]
142+
name = segments[-1]
143+
if name.endswith(".git"):
144+
name, _, _ = name.rpartition(".git")
145+
return org, name
146+
147+
148+
def get_tags_and_commits_from_git_output(git_ls_remote):
149+
"""
150+
Yield tuples of (tag, commit), given a git ls-remote output
151+
"""
152+
tags_and_commits = []
153+
for line in git_ls_remote.split("\n"):
154+
# line: kjwfgeklngelkfjofjeo123 refs/tags/1.2.3
155+
line_segments = line.split("\t")
156+
# segments: ["kjwfgeklngelkfjofjeo123", "refs/tags/1.2.3"]
157+
if len(line_segments) > 1 and (
158+
line_segments[1].startswith("refs/tags/") or line_segments[1] == "HEAD"
159+
):
160+
commit = line_segments[0]
161+
tag = line_segments[1].replace("refs/tags/", "")
162+
tags_and_commits.append((tag, commit))
163+
return tags_and_commits

pyproject-minecode_pipelines.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "flot.buildapi"
44

55
[project]
66
name = "minecode_pipelines"
7-
version = "0.0.1b44"
7+
version = "0.0.1b45"
88
description = "A library for mining packageURLs and package metadata from ecosystem repositories."
99
readme = "minecode_pipelines/README.rst"
1010
license = { text = "Apache-2.0" }

0 commit comments

Comments
 (0)