Skip to content

Commit 458eff9

Browse files
authored
refactor(dependency_getter): extract requirements files extraction (#886)
* refactor(dependency_getter): extract requirements file extraction * refactor(dependency_getter): extract requirements files extraction * test(utils): remove obsolete `FUTURE_DEPRECATED_OBSOLETE_ARGUMENT` * test(depencency_getter): add unsupported spec case
1 parent 3fc6405 commit 458eff9

File tree

3 files changed

+100
-86
lines changed

3 files changed

+100
-86
lines changed

python/deptry/dependency_getter/requirements_files.py

Lines changed: 96 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@
66
import re
77
from dataclasses import dataclass
88
from pathlib import Path
9+
from typing import TYPE_CHECKING
910
from urllib.parse import urlparse
1011

1112
from deptry.dependency import Dependency
1213
from deptry.dependency_getter.base import DependenciesExtract, DependencyGetter
1314

15+
if TYPE_CHECKING:
16+
from collections.abc import Mapping, Sequence
17+
1418

1519
@dataclass
1620
class RequirementsTxtDependencyGetter(DependencyGetter):
@@ -20,23 +24,13 @@ class RequirementsTxtDependencyGetter(DependencyGetter):
2024
requirements_files_dev: tuple[str, ...] = ("dev-requirements.txt", "requirements-dev.txt")
2125

2226
def get(self) -> DependenciesExtract:
23-
dependencies = list(
24-
itertools.chain(
25-
*(self._get_dependencies_from_requirements_files(file_name) for file_name in self.requirements_files)
26-
)
27-
)
28-
29-
dev_dependencies = list(
30-
itertools.chain(
31-
*(
32-
self._get_dependencies_from_requirements_files(file_name)
33-
for file_name in self._scan_for_dev_requirements_files()
34-
)
35-
)
27+
return DependenciesExtract(
28+
get_dependencies_from_requirements_files(self.requirements_files, self.package_module_name_map),
29+
get_dependencies_from_requirements_files(
30+
self._scan_for_dev_requirements_files(), self.package_module_name_map
31+
),
3632
)
3733

38-
return DependenciesExtract(dependencies, dev_dependencies)
39-
4034
def _scan_for_dev_requirements_files(self) -> list[str]:
4135
"""
4236
Check if any of the files passed as requirements_files_dev exist, and if so; return them.
@@ -46,83 +40,103 @@ def _scan_for_dev_requirements_files(self) -> list[str]:
4640
logging.debug("Found files with development requirements! %s", dev_requirements_files)
4741
return dev_requirements_files
4842

49-
def _get_dependencies_from_requirements_files(self, file_name: str, is_dev: bool = False) -> list[Dependency]:
50-
logging.debug("Scanning %s for %s", file_name, "dev dependencies" if is_dev else "dependencies")
51-
dependencies = []
5243

53-
file_path = Path(file_name)
44+
def get_dependencies_from_requirements_files(
45+
file_names: Sequence[str], package_module_name_map: Mapping[str, Sequence[str]], is_dev: bool = False
46+
) -> list[Dependency]:
47+
return list(
48+
itertools.chain(
49+
*(
50+
get_dependencies_from_requirements_file(file_name, package_module_name_map, is_dev)
51+
for file_name in file_names
52+
)
53+
)
54+
)
55+
56+
57+
def get_dependencies_from_requirements_file(
58+
file_name: str, package_module_name_map: Mapping[str, Sequence[str]], is_dev: bool = False
59+
) -> list[Dependency]:
60+
logging.debug("Scanning %s for %s", file_name, "dev dependencies" if is_dev else "dependencies")
61+
dependencies = []
62+
63+
file_path = Path(file_name)
64+
65+
with file_path.open() as f:
66+
data = f.readlines()
67+
68+
for line in data:
69+
dependency = _extract_dependency_from_line(line, file_path, package_module_name_map)
70+
if dependency:
71+
dependencies.append(dependency)
72+
73+
return dependencies
74+
75+
76+
def _extract_dependency_from_line(
77+
line: str, file_path: Path, package_module_name_map: Mapping[str, Sequence[str]]
78+
) -> Dependency | None:
79+
"""
80+
Extract a dependency from a single line of a requirements.txt file.
81+
"""
82+
line = _remove_comments_from(line)
83+
line = _remove_newlines_from(line)
84+
name = _find_dependency_name_in(line)
85+
if name:
86+
return Dependency(
87+
name=name,
88+
definition_file=file_path,
89+
module_names=package_module_name_map.get(name),
90+
)
91+
else:
92+
return None
5493

55-
with file_path.open() as f:
56-
data = f.readlines()
5794

58-
for line in data:
59-
dependency = self._extract_dependency_from_line(line, file_path)
60-
if dependency:
61-
dependencies.append(dependency)
95+
def _find_dependency_name_in(line: str) -> str | None:
96+
"""
97+
Find the dependency name of a dependency specified according to the pip-standards for requirement.txt
98+
"""
99+
if _line_is_url(line):
100+
return _extract_name_from_url(line)
101+
else:
102+
match = re.search("^[^-][a-zA-Z0-9-_]+", line)
103+
if match:
104+
return match.group(0)
105+
return None
62106

63-
return dependencies
64107

65-
def _extract_dependency_from_line(self, line: str, file_path: Path) -> Dependency | None:
66-
"""
67-
Extract a dependency from a single line of a requirements.txt file.
68-
"""
69-
line = self._remove_comments_from(line)
70-
line = self._remove_newlines_from(line)
71-
name = self._find_dependency_name_in(line)
72-
if name:
73-
return Dependency(
74-
name=name,
75-
definition_file=file_path,
76-
module_names=self.package_module_name_map.get(name),
77-
)
78-
else:
79-
return None
108+
def _remove_comments_from(line: str) -> str:
109+
"""
110+
Removes comments from a line. A comment is defined as any text
111+
following a '#' that is either at the start of the line or preceded by a space.
112+
This ensures that fragments like '#egg=' in URLs are not mistakenly removed.
113+
"""
114+
return re.sub(r"(?<!\S)#.*", "", line).strip()
80115

81-
def _find_dependency_name_in(self, line: str) -> str | None:
82-
"""
83-
Find the dependency name of a dependency specified according to the pip-standards for requirement.txt
84-
"""
85-
if self._line_is_url(line):
86-
return self._extract_name_from_url(line)
87-
else:
88-
match = re.search("^[^-][a-zA-Z0-9-_]+", line)
89-
if match:
90-
return match.group(0)
91-
return None
92116

93-
@staticmethod
94-
def _remove_comments_from(line: str) -> str:
95-
"""
96-
Removes comments from a line. A comment is defined as any text
97-
following a '#' that is either at the start of the line or preceded by a space.
98-
This ensures that fragments like '#egg=' in URLs are not mistakenly removed.
99-
"""
100-
return re.sub(r"(?<!\S)#.*", "", line).strip()
117+
def _remove_newlines_from(line: str) -> str:
118+
return line.replace("\n", "")
101119

102-
@staticmethod
103-
def _remove_newlines_from(line: str) -> str:
104-
return line.replace("\n", "")
105120

106-
@staticmethod
107-
def _line_is_url(line: str) -> bool:
108-
return urlparse(line).scheme != ""
121+
def _line_is_url(line: str) -> bool:
122+
return urlparse(line).scheme != ""
109123

110-
@staticmethod
111-
def _extract_name_from_url(line: str) -> str | None:
112-
# Try to find egg, for url like git+https://github.com/xxxxx/package@xxxxx#egg=package
113-
match = re.search("egg=([a-zA-Z0-9-_]*)", line)
114-
if match:
115-
return match.group(1)
116124

117-
# for url like git+https://github.com/name/python-module.git@0d6dc38d58
118-
match = re.search(r"\/((?:(?!\/).)*?)\.git", line)
119-
if match:
120-
return match.group(1)
125+
def _extract_name_from_url(line: str) -> str | None:
126+
# Try to find egg, for url like git+https://github.com/xxxxx/package@xxxxx#egg=package
127+
match = re.search("egg=([a-zA-Z0-9-_]*)", line)
128+
if match:
129+
return match.group(1)
121130

122-
# for url like https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
123-
match = re.search(r"\/((?:(?!\/).)*?)\/archive\/", line)
124-
if match:
125-
return match.group(1)
131+
# for url like git+https://github.com/name/python-module.git@0d6dc38d58
132+
match = re.search(r"\/((?:(?!\/).)*?)\.git", line)
133+
if match:
134+
return match.group(1)
126135

127-
logging.warning("Could not parse dependency name from url %s", line)
128-
return None
136+
# for url like https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
137+
match = re.search(r"\/((?:(?!\/).)*?)\/archive\/", line)
138+
if match:
139+
return match.group(1)
140+
141+
logging.warning("Could not parse dependency name from url %s", line)
142+
return None

tests/functional/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
class Project(str, Enum):
99
EXAMPLE = "example_project"
1010
PEP_621 = "pep_621_project"
11-
FUTURE_DEPRECATED_OBSOLETE_ARGUMENT = "project_with_future_deprecated_obsolete_argument"
1211
GITIGNORE = "project_with_gitignore"
1312
MULTIPLE_SOURCE_DIRECTORIES = "project_with_multiple_source_directories"
1413
NAMESPACE = "project_using_namespace"

tests/unit/dependency_getter/test_requirements_txt.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pytest
66

7-
from deptry.dependency_getter.requirements_files import RequirementsTxtDependencyGetter
7+
from deptry.dependency_getter.requirements_files import RequirementsTxtDependencyGetter, _line_is_url
88
from tests.utils import run_within_dir
99

1010

@@ -63,7 +63,8 @@ def test_parse_requirements_files_urls(tmp_path: Path) -> None:
6363
https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
6464
git+https://github.com/baz/foo-bar.git@asd#egg=foo-bar
6565
git+https://github.com/baz/foo-bar.git@asd
66-
git+https://github.com/abc123/bar-foo@xyz789#egg=bar-fooo"""
66+
git+https://github.com/abc123/bar-foo@xyz789#egg=bar-fooo
67+
https://unsupported-specification.com"""
6768

6869
with run_within_dir(tmp_path):
6970
with Path("requirements.txt").open("w") as f:
@@ -200,4 +201,4 @@ def test_dev_multiple_with_arguments(tmp_path: Path) -> None:
200201
],
201202
)
202203
def test__line_is_url(line: str, expected: bool) -> None:
203-
assert RequirementsTxtDependencyGetter._line_is_url(line) is expected
204+
assert _line_is_url(line) is expected

0 commit comments

Comments
 (0)