|
20 | 20 | # ScanCode.io is a free software code scanning tool from nexB Inc. and others. |
21 | 21 | # Visit https://github.com/aboutcode-org/scancode.io for support and download. |
22 | 22 |
|
23 | | -from scanpipe.pipelines import Pipeline |
24 | | -from scanpipe.pipes import federatedcode |
25 | | - |
26 | 23 | from minecode_pipelines.pipes import npm |
27 | | -from minecode_pipelines import pipes |
| 24 | +from minecode_pipelines.pipelines import MineCodeBasePipeline |
| 25 | +from minecode_pipelines.pipelines import _mine_and_publish_packageurls |
28 | 26 |
|
29 | 27 |
|
30 | | -class MineNPM(Pipeline): |
| 28 | +class MineNPM(MineCodeBasePipeline): |
31 | 29 | """ |
32 | 30 | Mine all packageURLs from a npm index and publish them to |
33 | 31 | a FederatedCode repo. |
34 | 32 | """ |
35 | 33 |
|
| 34 | + package_batch_size = 70 |
| 35 | + |
36 | 36 | @classmethod |
37 | 37 | def steps(cls): |
38 | 38 | return ( |
39 | 39 | cls.check_federatedcode_eligibility, |
| 40 | + cls.create_federatedcode_working_dir, |
40 | 41 | cls.mine_npm_packages, |
41 | | - cls.mine_and_publish_npm_packageurls, |
42 | | - cls.delete_cloned_repos, |
| 42 | + cls.get_npm_packages_to_sync, |
| 43 | + cls.fetch_federation_config, |
| 44 | + cls.mine_and_publish_packageurls, |
| 45 | + cls.update_state_and_checkpoints, |
| 46 | + cls.delete_working_dir, |
43 | 47 | ) |
44 | 48 |
|
45 | | - def check_federatedcode_eligibility(self): |
46 | | - """ |
47 | | - Check if the project fulfills the following criteria for |
48 | | - pushing the project result to FederatedCode. |
49 | | - """ |
50 | | - federatedcode.check_federatedcode_configured_and_available(logger=self.log) |
51 | | - |
52 | 49 | def mine_npm_packages(self): |
53 | 50 | """Mine npm package names from npm indexes or checkpoint.""" |
54 | | - self.npm_packages, self.state, self.last_seq = npm.mine_npm_packages(logger=self.log) |
| 51 | + ( |
| 52 | + self.npm_packages, self.state, self.last_seq, self.config_repo |
| 53 | + ) = npm.mine_npm_packages(logger=self.log) |
55 | 54 |
|
56 | | - def mine_and_publish_npm_packageurls(self): |
57 | | - """Get npm packageURLs for all mined npm package names.""" |
58 | | - self.repos = npm.mine_and_publish_npm_packageurls( |
| 55 | + def get_npm_packages_to_sync(self): |
| 56 | + """Get npm packages which needs to be synced using checkpoint.""" |
| 57 | + self.packages, self.synced_packages = npm.get_npm_packages_to_sync( |
59 | 58 | packages_file=self.npm_packages, |
60 | 59 | state=self.state, |
61 | | - last_seq=self.last_seq, |
62 | 60 | logger=self.log, |
63 | 61 | ) |
64 | 62 |
|
65 | | - def delete_cloned_repos(self): |
66 | | - pipes.delete_cloned_repos(repos=self.repos, logger=self.log) |
| 63 | + def packages_count(self): |
| 64 | + return len(self.packages) |
| 65 | + |
| 66 | + def mine_packageurls(self): |
| 67 | + """Yield npm packageURLs for all mined npm package names.""" |
| 68 | + self.packages_mined = [] |
| 69 | + yield from npm.mine_and_publish_npm_packageurls( |
| 70 | + packages_to_sync=self.packages, |
| 71 | + packages_mined=self.packages_mined, |
| 72 | + logger=self.log, |
| 73 | + ) |
| 74 | + |
| 75 | + def save_check_point(self): |
| 76 | + npm.save_mined_packages_in_checkpoint( |
| 77 | + packages_mined=self.packages_mined, |
| 78 | + synced_packages=self.synced_packages, |
| 79 | + config_repo=self.config_repo, |
| 80 | + logger=self.log, |
| 81 | + ) |
| 82 | + self.packages_mined = [] |
| 83 | + |
| 84 | + def mine_and_publish_packageurls(self): |
| 85 | + """Mine and publish PackageURLs.""" |
| 86 | + |
| 87 | + _mine_and_publish_packageurls( |
| 88 | + packageurls=self.mine_packageurls(), |
| 89 | + total_package_count=self.packages_count(), |
| 90 | + data_cluster=self.data_cluster, |
| 91 | + checked_out_repos=self.checked_out_repos, |
| 92 | + working_path=self.working_path, |
| 93 | + append_purls=self.append_purls, |
| 94 | + commit_msg_func=self.commit_message, |
| 95 | + logger=self.log, |
| 96 | + checkpoint_func=self.save_check_point, |
| 97 | + checkpoint_on_commit=True, |
| 98 | + batch_size=self.package_batch_size, |
| 99 | + ) |
| 100 | + |
| 101 | + def update_state_and_checkpoints(self): |
| 102 | + npm.update_state_and_checkpoints( |
| 103 | + state=self.state, |
| 104 | + last_seq=self.last_seq, |
| 105 | + config_repo=self.config_repo, |
| 106 | + logger=self.log, |
| 107 | + ) |
0 commit comments