Skip to content

Commit 33a3e82

Browse files
committed
Store cran_db in pipeline working_path
Fix mine_cran_packageurls return type Update minecode-pipelines version to 0.0.1b57 Signed-off-by: ziad hany <[email protected]>
1 parent 15b4359 commit 33a3e82

File tree

4 files changed

+10
-8
lines changed

4 files changed

+10
-8
lines changed

minecode_pipelines/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@
88
#
99

1010

11-
VERSION = "0.0.1b56"
11+
VERSION = "0.0.1b57"

minecode_pipelines/pipelines/mine_cran.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def download_cran_db(self):
4444
"""
4545
Download the full CRAN package database
4646
"""
47-
self.db_path = fetch_cran_db(logger=self.log)
47+
self.db_path = fetch_cran_db(working_path=self.working_path, logger=self.log)
4848

4949
def packages_count(self):
5050
"""

minecode_pipelines/pipes/cran.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,21 @@
2222

2323
import json
2424
from pathlib import Path
25+
from typing import Iterable
26+
from typing import Tuple
27+
from typing import List
28+
2529
import requests
2630
from packageurl import PackageURL
2731
from aboutcode.hashid import get_core_purl
28-
import tempfile
2932

3033

31-
def fetch_cran_db(logger) -> Path:
34+
def fetch_cran_db(working_path, logger) -> Path:
3235
"""
3336
Download the CRAN package database (~250MB JSON) in a memory-efficient way.
3437
Saves it to a file instead of loading everything into memory.
3538
"""
36-
temp_dir = Path(tempfile.mkdtemp())
37-
output_path = temp_dir / "cran_db.json"
39+
output_path = working_path / "cran_db.json"
3840
logger(f"Target download path: {output_path}")
3941

4042
url = "https://crandb.r-pkg.org/-/all"
@@ -47,7 +49,7 @@ def fetch_cran_db(logger) -> Path:
4749
return output_path
4850

4951

50-
def mine_cran_packageurls(db_path: Path) -> list:
52+
def mine_cran_packageurls(db_path: Path) -> Iterable[Tuple[str, List[str]]]:
5153
"""
5254
Extract package names and their versions from a CRAN DB JSON file.
5355
Yields a tuple: (base_purl, list_of_purls)

pyproject-minecode_pipelines.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "flot.buildapi"
44

55
[project]
66
name = "minecode_pipelines"
7-
version = "0.0.1b56"
7+
version = "0.0.1b57"
88
description = "A library for mining packageURLs and package metadata from ecosystem repositories."
99
readme = "minecode_pipelines/README.rst"
1010
license = { text = "Apache-2.0" }

0 commit comments

Comments
 (0)