Skip to content

Commit c994bfb

Browse files
committed
Move Levenshtein distance calc in utilities
1 parent dee7f26 commit c994bfb

File tree

2 files changed

+15
-17
lines changed

2 files changed

+15
-17
lines changed

sonar/projects.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
from http import HTTPStatus
3636
from threading import Lock
3737
from requests import HTTPError, RequestException
38-
import Levenshtein
3938

4039
import sonar.logging as log
4140
import sonar.platform as pf
@@ -845,9 +844,7 @@ def count_third_party_issues(self, filters: Optional[dict[str, str]] = None) ->
845844
return super().count_third_party_issues(filters)
846845
log.debug("Getting 3rd party issues on branches/PR")
847846
issue_counts = {}
848-
for comp in branches_or_prs.values():
849-
if not comp:
850-
continue
847+
for comp in [c for c in branches_or_prs.values() if c]:
851848
log.debug("Getting 3rd party issues for %s", str(comp))
852849
for k, total in comp.count_third_party_issues(filters).items():
853850
if k not in issue_counts:
@@ -1398,28 +1395,19 @@ def get_list(endpoint: pf.Platform, key_list: types.KeyList = None, threads: int
13981395

13991396

14001397
def get_matching_list(endpoint: pf.Platform, pattern: str, threads: int = 8) -> dict[str, Project]:
1401-
"""
1398+
"""Returns the list of projects whose keys are matching the pattern
1399+
14021400
:param Platform endpoint: Reference to the SonarQube platform
14031401
:param str pattern: Regular expression to match project keys
14041402
:return: the list of all projects matching the pattern
1405-
:rtype: dict{key: Project}
14061403
"""
1407-
if not pattern or pattern == ".*":
1408-
return get_list(endpoint, threads=threads)
1404+
pattern = pattern or ".+"
14091405
log.info("Listing projects matching regexp '%s'", pattern)
14101406
matches = {k: v for k, v in get_list(endpoint, threads=threads).items() if re.match(rf"^{pattern}$", k)}
14111407
log.info("%d project key matching regexp '%s'", len(matches), pattern)
14121408
return matches
14131409

14141410

1415-
def __similar_keys(key1: str, key2: str, max_distance: int = 5) -> bool:
1416-
"""Returns whether 2 project keys are similar"""
1417-
if key1 == key2:
1418-
return False
1419-
max_distance = min(len(key1) // 2, len(key2) // 2, max_distance)
1420-
return len(key2) >= 7 and (re.match(key2, key1)) or Levenshtein.distance(key1, key2, score_cutoff=6) <= max_distance
1421-
1422-
14231411
def __audit_duplicates(projects_list: dict[str, Project], audit_settings: types.ConfigSettings) -> list[Problem]:
14241412
"""Audits for suspected duplicate projects"""
14251413
if audit_settings.get(c.AUDIT_MODE_PARAM, "") == "housekeeper":
@@ -1433,7 +1421,7 @@ def __audit_duplicates(projects_list: dict[str, Project], audit_settings: types.
14331421
for key1, p in projects_list.items():
14341422
for key2 in projects_list:
14351423
pair = " ".join(sorted([key1, key2]))
1436-
if __similar_keys(key1, key2, audit_settings.get("audit.projects.duplicates.maxDifferences", 4)) and pair not in pair_set:
1424+
if util.similar_strings(key1, key2, audit_settings.get("audit.projects.duplicates.maxDifferences", 4)) and pair not in pair_set:
14371425
duplicates.append(Problem(get_rule(RuleId.PROJ_DUPLICATE), p, str(p), key2))
14381426
pair_set.add(pair)
14391427
return duplicates

sonar/utilities.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
from copy import deepcopy
3737
import requests
3838

39+
import Levenshtein
40+
3941
import sonar.logging as log
4042
from sonar import version, errcodes
4143
from sonar.util import types, cache_helper
@@ -813,3 +815,11 @@ def flatten(original_dict: dict[str, any]) -> dict[str, any]:
813815
else:
814816
flat_dict[k] = v
815817
return flat_dict
818+
819+
820+
def similar_strings(key1: str, key2: str, max_distance: int = 5) -> bool:
821+
"""Returns whether 2 project keys are similar, but not equal"""
822+
if key1 == key2:
823+
return False
824+
max_distance = min(len(key1) // 2, len(key2) // 2, max_distance)
825+
return len(key2) >= 7 and (re.match(key2, key1)) or Levenshtein.distance(key1, key2, score_cutoff=6) <= max_distance

0 commit comments

Comments
 (0)