2020# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222
23- from minecode_pipelines import VERSION
24- from minecode_pipelines .pipes import write_packageurls_to_file
25-
2623from minecode_pipelines .miners .cpan import get_cpan_packages
2724from minecode_pipelines .miners .cpan import get_cpan_packageurls
2825from minecode_pipelines .miners .cpan import CPAN_REPO
3229
3330from aboutcode .hashid import get_package_base_dir
3431from packageurl import PackageURL
35- from scanpipe .pipes .federatedcode import clone_repository
36-
37- from scanpipe .pipes .federatedcode import commit_changes
38- from scanpipe .pipes .federatedcode import push_changes
39-
4032
4133# If True, show full details on fetching packageURL for
4234# a package name present in the index
4537PACKAGE_BATCH_SIZE = 500
4638
4739
48- # We are testing and storing mined packageURLs in one single repo per ecosystem for now
49- MINECODE_DATA_CPAN_REPO = "https://github.com/aboutcode-data/minecode-data-cpan-test"
50-
51-
5240def mine_cpan_packages (logger = None ):
5341 if logger :
5442 logger ("Getting packages from cpan index" )
@@ -66,21 +54,15 @@ def mine_and_publish_cpan_packageurls(package_path_by_name, logger=None):
6654 if not package_path_by_name :
6755 return
6856
69- # clone repo
70- cloned_data_repo = clone_repository (repo_url = MINECODE_DATA_CPAN_REPO )
71- if logger :
72- logger (f"{ MINECODE_DATA_CPAN_REPO } repo cloned at: { cloned_data_repo .working_dir } " )
73-
57+ packageurls_by_base_purl = {}
7458 for package_batch in grouper (n = PACKAGE_BATCH_SIZE , iterable = package_path_by_name .keys ()):
7559 packages_mined = []
76- purls = []
77- purl_files = []
7860
7961 if logger and LOG_PACKAGEURL_DETAILS :
8062 logger ("Starting package mining for a batch of packages" )
8163
8264 for package_name in package_batch :
83- if not package_name :
65+ if not package_name or package_name in packages_mined :
8466 continue
8567
8668 # fetch packageURLs for package
@@ -106,41 +88,12 @@ def mine_and_publish_cpan_packageurls(package_path_by_name, logger=None):
10688
10789 # get repo and path for package
10890 base_purl = PackageURL (type = CPAN_TYPE , name = package_name ).to_string ()
109- package_base_dir = get_package_base_dir (purl = base_purl )
110-
11191 if logger and LOG_PACKAGEURL_DETAILS :
112- logger (f"writing packageURLs for package: { base_purl } at: { package_base_dir } " )
92+ logger (f"fetched packageURLs for package: { base_purl } " )
11393 purls_string = " " .join (packageurls )
11494 logger (f"packageURLs: { purls_string } " )
11595
116- # write packageURLs to file
117- purl_file = write_packageurls_to_file (
118- repo = cloned_data_repo ,
119- base_dir = package_base_dir ,
120- packageurls = packageurls ,
121- )
122- purl_files .append (purl_file )
123- purls .append (base_purl )
124-
12596 packages_mined .append (package_name )
126-
127- if logger :
128- purls_string = " " .join (purls )
129- logger ("Committing and pushing changes for a batch of packages: " )
130- logger (f"{ purls_string } " )
131-
132- # commit changes
133- commit_changes (
134- repo = cloned_data_repo ,
135- files_to_commit = purl_files ,
136- purls = purls ,
137- mine_type = "packageURL" ,
138- tool_name = "pkg:cpan/minecode-pipelines" ,
139- tool_version = VERSION ,
140- )
141-
142- # Push changes to remote repository
143- push_changes (repo = cloned_data_repo )
144-
145- repos_to_clean = [cloned_data_repo ]
146- return repos_to_clean
97+ packageurls_by_base_purl [base_purl ] = packageurls
98+
99+ return packageurls_by_base_purl
0 commit comments