Skip to content

Commit 235fb85

Browse files
Update npm pipeline to MineCodeBasePipeline
Reference: #798 Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 6cba2e4 commit 235fb85

File tree

7 files changed

+128
-114
lines changed

7 files changed

+128
-114
lines changed

minecode_pipelines/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@
88
#
99

1010

11-
VERSION = "0.0.1b60"
11+
VERSION = "0.0.1b61"

minecode_pipelines/miners/npm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343

4444
NPM_REPLICATE_REPO = "https://replicate.npmjs.com/"
4545
NPM_REGISTRY_REPO = "https://registry.npmjs.org/"
46-
NPM_TYPE = "NPM"
46+
NPM_TYPE = "npm"
4747
NPM_REPLICATE_BATCH_SIZE = 10000
4848

4949

minecode_pipelines/pipelines/mine_npm.py

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,47 +20,88 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23-
from scanpipe.pipelines import Pipeline
24-
from scanpipe.pipes import federatedcode
25-
2623
from minecode_pipelines.pipes import npm
27-
from minecode_pipelines import pipes
24+
from minecode_pipelines.pipelines import MineCodeBasePipeline
25+
from minecode_pipelines.pipelines import _mine_and_publish_packageurls
2826

2927

30-
class MineNPM(Pipeline):
28+
class MineNPM(MineCodeBasePipeline):
3129
"""
3230
Mine all packageURLs from a npm index and publish them to
3331
a FederatedCode repo.
3432
"""
3533

34+
package_batch_size = 70
35+
3636
@classmethod
3737
def steps(cls):
3838
return (
3939
cls.check_federatedcode_eligibility,
40+
cls.create_federatedcode_working_dir,
4041
cls.mine_npm_packages,
41-
cls.mine_and_publish_npm_packageurls,
42-
cls.delete_cloned_repos,
42+
cls.get_npm_packages_to_sync,
43+
cls.fetch_federation_config,
44+
cls.mine_and_publish_packageurls,
45+
cls.update_state_and_checkpoints,
46+
cls.delete_working_dir,
4347
)
4448

45-
def check_federatedcode_eligibility(self):
46-
"""
47-
Check if the project fulfills the following criteria for
48-
pushing the project result to FederatedCode.
49-
"""
50-
federatedcode.check_federatedcode_configured_and_available(logger=self.log)
51-
5249
def mine_npm_packages(self):
5350
"""Mine npm package names from npm indexes or checkpoint."""
54-
self.npm_packages, self.state, self.last_seq = npm.mine_npm_packages(logger=self.log)
51+
(
52+
self.npm_packages, self.state, self.last_seq, self.config_repo
53+
) = npm.mine_npm_packages(logger=self.log)
5554

56-
def mine_and_publish_npm_packageurls(self):
57-
"""Get npm packageURLs for all mined npm package names."""
58-
self.repos = npm.mine_and_publish_npm_packageurls(
55+
def get_npm_packages_to_sync(self):
56+
"""Get npm packages which needs to be synced using checkpoint."""
57+
self.packages, self.synced_packages = npm.get_npm_packages_to_sync(
5958
packages_file=self.npm_packages,
6059
state=self.state,
61-
last_seq=self.last_seq,
6260
logger=self.log,
6361
)
6462

65-
def delete_cloned_repos(self):
66-
pipes.delete_cloned_repos(repos=self.repos, logger=self.log)
63+
def packages_count(self):
64+
return len(self.packages)
65+
66+
def mine_packageurls(self):
67+
"""Yield npm packageURLs for all mined npm package names."""
68+
self.packages_mined = []
69+
yield from npm.mine_and_publish_npm_packageurls(
70+
packages_to_sync=self.packages,
71+
packages_mined=self.packages_mined,
72+
logger=self.log,
73+
)
74+
75+
def save_check_point(self):
76+
npm.save_mined_packages_in_checkpoint(
77+
packages_mined=self.packages_mined,
78+
synced_packages=self.synced_packages,
79+
config_repo=self.config_repo,
80+
logger=self.log,
81+
)
82+
self.packages_mined = []
83+
84+
def mine_and_publish_packageurls(self):
85+
"""Mine and publish PackageURLs."""
86+
87+
_mine_and_publish_packageurls(
88+
packageurls=self.mine_packageurls(),
89+
total_package_count=self.packages_count(),
90+
data_cluster=self.data_cluster,
91+
checked_out_repos=self.checked_out_repos,
92+
working_path=self.working_path,
93+
append_purls=self.append_purls,
94+
commit_msg_func=self.commit_message,
95+
logger=self.log,
96+
checkpoint_func=self.save_check_point,
97+
checkpoint_on_commit=True,
98+
batch_size=self.package_batch_size,
99+
)
100+
101+
def update_state_and_checkpoints(self):
102+
npm.update_state_and_checkpoints(
103+
state=self.state,
104+
last_seq=self.last_seq,
105+
config_repo=self.config_repo,
106+
logger=self.log,
107+
)

minecode_pipelines/pipelines/mine_pypi.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from minecode_pipelines.pipelines import MineCodeBasePipeline
2525
from minecode_pipelines.pipelines import _mine_and_publish_packageurls
2626

27+
2728
class MinePypi(MineCodeBasePipeline):
2829
"""
2930
Mine all packageURLs from a pypi index and publish them to

0 commit comments

Comments
 (0)