3535from http import HTTPStatus
3636from threading import Lock
3737from requests import HTTPError , RequestException
38- import Levenshtein
3938
4039import sonar .logging as log
4140import sonar .platform as pf
@@ -845,9 +844,7 @@ def count_third_party_issues(self, filters: Optional[dict[str, str]] = None) ->
845844 return super ().count_third_party_issues (filters )
846845 log .debug ("Getting 3rd party issues on branches/PR" )
847846 issue_counts = {}
848- for comp in branches_or_prs .values ():
849- if not comp :
850- continue
847+ for comp in [c for c in branches_or_prs .values () if c ]:
851848 log .debug ("Getting 3rd party issues for %s" , str (comp ))
852849 for k , total in comp .count_third_party_issues (filters ).items ():
853850 if k not in issue_counts :
@@ -1398,28 +1395,19 @@ def get_list(endpoint: pf.Platform, key_list: types.KeyList = None, threads: int
13981395
13991396
14001397def get_matching_list (endpoint : pf .Platform , pattern : str , threads : int = 8 ) -> dict [str , Project ]:
1401- """
1398+ """Returns the list of projects whose keys are matching the pattern
1399+
14021400 :param Platform endpoint: Reference to the SonarQube platform
14031401 :param str pattern: Regular expression to match project keys
14041402 :return: the list of all projects matching the pattern
1405- :rtype: dict{key: Project}
14061403 """
1407- if not pattern or pattern == ".*" :
1408- return get_list (endpoint , threads = threads )
1404+ pattern = pattern or ".+"
14091405 log .info ("Listing projects matching regexp '%s'" , pattern )
14101406 matches = {k : v for k , v in get_list (endpoint , threads = threads ).items () if re .match (rf"^{ pattern } $" , k )}
14111407 log .info ("%d project key matching regexp '%s'" , len (matches ), pattern )
14121408 return matches
14131409
14141410
1415- def __similar_keys (key1 : str , key2 : str , max_distance : int = 5 ) -> bool :
1416- """Returns whether 2 project keys are similar"""
1417- if key1 == key2 :
1418- return False
1419- max_distance = min (len (key1 ) // 2 , len (key2 ) // 2 , max_distance )
1420- return len (key2 ) >= 7 and (re .match (key2 , key1 )) or Levenshtein .distance (key1 , key2 , score_cutoff = 6 ) <= max_distance
1421-
1422-
14231411def __audit_duplicates (projects_list : dict [str , Project ], audit_settings : types .ConfigSettings ) -> list [Problem ]:
14241412 """Audits for suspected duplicate projects"""
14251413 if audit_settings .get (c .AUDIT_MODE_PARAM , "" ) == "housekeeper" :
@@ -1433,7 +1421,7 @@ def __audit_duplicates(projects_list: dict[str, Project], audit_settings: types.
14331421 for key1 , p in projects_list .items ():
14341422 for key2 in projects_list :
14351423 pair = " " .join (sorted ([key1 , key2 ]))
1436- if __similar_keys (key1 , key2 , audit_settings .get ("audit.projects.duplicates.maxDifferences" , 4 )) and pair not in pair_set :
1424+ if util . similar_strings (key1 , key2 , audit_settings .get ("audit.projects.duplicates.maxDifferences" , 4 )) and pair not in pair_set :
14371425 duplicates .append (Problem (get_rule (RuleId .PROJ_DUPLICATE ), p , str (p ), key2 ))
14381426 pair_set .add (pair )
14391427 return duplicates
0 commit comments