1010from bs4 import BeautifulSoup as bs
1111from defusedxml .ElementTree import parse
1212import re
13+ from pathlib import Path
1314import fosslight_util .constant as constant
1415import fosslight_dependency .constant as const
1516from fosslight_dependency ._package_manager import PackageManager
@@ -66,6 +67,7 @@ def run_plugin(self):
6667 def add_plugin_in_pom (self , pom_backup ):
6768 ret = False
6869 xml = 'xml'
70+ f_content = None
6971
7072 manifest_file = const .SUPPORT_PACKAE .get (self .package_manager_name )
7173 if os .path .isfile (manifest_file ) != 1 :
@@ -94,25 +96,32 @@ def add_plugin_in_pom(self, pom_backup):
9496
9597 tmp_plugin = bs (license_maven_plugin , xml )
9698
97- license_maven_plugins = f"<plugins>{ license_maven_plugin } <plugins>"
99+ license_maven_plugins = f"<plugins>{ license_maven_plugin } </ plugins>"
98100 tmp_plugins = bs (license_maven_plugins , xml )
99101
100102 with open (pom_backup , 'r' , encoding = 'utf8' ) as f :
101103 f_xml = f .read ()
102104 f_content = bs (f_xml , xml )
103105
104- build = f_content .find ('build' )
105- if build is not None :
106- plugins = build .find ('plugins' )
107- if plugins is not None :
108- plugins .append (tmp_plugin .plugin )
109- ret = True
110- else :
111- build .append (tmp_plugins .plugins )
112- ret = True
106+ build = f_content .find ('build' )
107+ if build is not None :
108+ plugins = build .find ('plugins' )
109+ if plugins is not None :
110+ plugins .append (tmp_plugin .plugin )
111+ ret = True
112+ else :
113+ build .append (tmp_plugins .plugins )
114+ ret = True
115+ else :
116+ project = f_content .find ('project' )
117+ if project is not None :
118+ build_with_plugins = f"<build>{ license_maven_plugins } </build>"
119+ tmp_build = bs (build_with_plugins , xml )
120+ project .append (tmp_build .build )
121+ ret = True
113122 except Exception as e :
114123 ret = False
115- logger .error (f"Failed to add plugin in pom : { e } " )
124+ logger .warning (f"Failed to add plugin in pom : { e } " )
116125
117126 if ret :
118127 with open (manifest_file , "w" , encoding = 'utf8' ) as f_w :
@@ -196,14 +205,88 @@ def collect_source_download_urls(self, include_groups=None, include_artifacts=No
196205 self ._parse_downloaded_from_lines_mvn (proc .stdout )
197206 else :
198207 logger .debug (f"dependency:resolve failed (rc={ proc .returncode } )" )
208+ if not self .download_url_map and (include_groups and include_artifacts ):
209+ logger .debug ("No download URLs found, attempting to reconstruct from local repository" )
210+ self ._collect_urls_from_local_repository (include_groups , include_artifacts )
199211 except Exception as e :
200212 logger .debug (f"Error occurred while collecting source download URLs: { e } " )
201213 finally :
202214 if current_mode :
203215 change_file_mode (cmd_mvn , current_mode )
204216
217+ def _collect_urls_from_local_repository (self , include_groups = None , include_artifacts = None ):
218+ try :
219+ m2_repo = Path .home () / ".m2" / "repository"
220+ if not m2_repo .exists ():
221+ return
222+ repo_map = self ._parse_pom_repositories ()
223+ if include_groups and include_artifacts :
224+ for group_id in include_groups :
225+ group_path = group_id .replace ('.' , '/' )
226+ for artifact_id in include_artifacts :
227+ artifact_path = m2_repo / group_path / artifact_id
228+ if artifact_path .exists ():
229+ self ._scan_artifact_versions (artifact_path , group_id , artifact_id , repo_map )
230+ except Exception as e :
231+ logger .debug (f"Failed to collect URLs from local repository: { e } " )
232+
233+ def _parse_pom_repositories (self ):
234+ repo_map = {}
235+ try :
236+ pom_file = os .path .join (self .input_dir , 'pom.xml' )
237+ if not os .path .exists (pom_file ):
238+ return repo_map
239+ with open (pom_file , 'r' , encoding = 'utf8' ) as f :
240+ soup = bs (f .read (), 'xml' )
241+ repositories = soup .find_all ('repository' )
242+ for repo in repositories :
243+ repo_id = repo .find ('id' )
244+ repo_url = repo .find ('url' )
245+ if repo_id and repo_url :
246+ repo_map [repo_id .text .strip ()] = repo_url .text .strip ().rstrip ('/' )
247+ except Exception as e :
248+ logger .debug (f"Failed to parse pom repositories: { e } " )
249+ return repo_map
250+
251+ def _scan_artifact_versions (self , artifact_path , group_id , artifact_id , repo_map ):
252+ try :
253+ for version_dir in artifact_path .iterdir ():
254+ if not version_dir .is_dir ():
255+ continue
256+ version = version_dir .name
257+ remote_repos_file = version_dir / "_remote.repositories"
258+
259+ if remote_repos_file .exists ():
260+ with open (remote_repos_file , 'r' ) as f :
261+ for line in f :
262+ if line .startswith ('#' ) or '>' not in line :
263+ continue
264+ parts = line .strip ().split ('>' )
265+ if len (parts ) != 2 :
266+ continue
267+ filename = parts [0 ]
268+ repo_id = parts [1 ].rstrip ('=' )
269+
270+ if '-sources.jar' in filename :
271+ if repo_id in repo_map :
272+ repo_url = repo_map [repo_id ]
273+ elif repo_id == 'central' :
274+ repo_url = 'https://repo.maven.apache.org/maven2'
275+ else :
276+ continue
277+ group_path = group_id .replace ('.' , '/' )
278+ url = f"{ repo_url } /{ group_path } /{ artifact_id } /{ version } /{ filename } "
279+ key = f"{ group_id } :{ artifact_id } :{ version } "
280+ self .download_url_map [key ] = url
281+ logger .debug (f"Reconstructed URL from local repo: { key } -> { url } " )
282+ break
283+ except Exception as e :
284+ logger .debug (f"Failed to scan artifact versions: { e } " )
285+
205286 def _parse_downloaded_from_lines_mvn (self , stdout_text : str ):
206287 current_gav = None
288+ tld_roots = {'com' , 'org' , 'io' , 'net' , 'edu' , 'gov' , 'mil' , 'co' , 'de' , 'fr' , 'uk' , 'kr' , 'jp' , 'cn' }
289+
207290 for raw in stdout_text .splitlines ():
208291 line = raw .strip ()
209292 try :
@@ -217,6 +300,26 @@ def _parse_downloaded_from_lines_mvn(self, stdout_text: str):
217300 if not m :
218301 continue
219302 url = m .group (1 )
303+
304+ if not current_gav :
305+ parts = url .split ('/' )
306+ if len (parts ) >= 6 and parts [0 ].startswith ('http' ):
307+ filename = parts [- 1 ]
308+ version = parts [- 2 ]
309+ artifactid = parts [- 3 ]
310+
311+ if filename .startswith (f"{ artifactid } -{ version } " ):
312+ artifact_idx = len (parts ) - 3
313+ group_start_idx = - 1
314+ for i in range (artifact_idx - 1 , 2 , - 1 ):
315+ if parts [i ] in tld_roots :
316+ group_start_idx = i
317+ break
318+ if group_start_idx > 0 :
319+ group_parts = parts [group_start_idx :artifact_idx ]
320+ groupid = '.' .join (group_parts )
321+ current_gav = (groupid , artifactid , version )
322+
220323 if not current_gav :
221324 continue
222325 groupid , artifactid , version = current_gav
@@ -228,6 +331,8 @@ def _parse_downloaded_from_lines_mvn(self, stdout_text: str):
228331 prev = self .download_url_map .get (key )
229332 if (prev is None ) or (('-sources.' in url ) and ('-sources.' not in (prev or '' ))):
230333 self .download_url_map [key ] = url
334+
335+ current_gav = None
231336 except Exception as e :
232337 logger .debug (f"Failed to parse mvn line: { line } ({ e } )" )
233338
0 commit comments