Skip to content

Commit be5b76e

Browse files
committed
add support for pip
1 parent b6946e4 commit be5b76e

File tree

1 file changed

+63
-4
lines changed

1 file changed

+63
-4
lines changed

scripts/generate-valid-project-descriptor.py

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
- OWASP Dependency Check: https://owasp.org/www-project-dependency-check/
77
- NPM audit: https://docs.npmjs.com/cli/v9/commands/npm-audit
88
- dotnet package list: https://learn.microsoft.com/en-us/dotnet/core/tools/dotnet-package-list
9+
- PIP audit: https://pypi.org/project/pip-audit/
910
1011
For Maven:
1112
This script analyse all the "pom.xml" descriptor files present recursively in a folder and
@@ -31,7 +32,14 @@
3132
Once generated, use the command "dotnet restore --verbosity quiet" into the folder where the "project.csproj" file was created
3233
to gather all dependencies.
3334
34-
It leverage the data provided by the Google site "https://deps.dev/".
35+
For Pip:
36+
The management of dependencies via the file "pyproject.toml" is used as reference as it is the modern way to manage dependencies in a project (See https://peps.python.org/pep-0621/).
37+
This script analyse all the "pyproject.toml" descriptor files present recursively in a folder and create a single requirements file named "requirements-consolidated.txt" with all dependencies resolvable via the online official registry.
38+
Extract dependencies from "[project] > dependencies" only.
39+
The generated file can be provided to "pip-audit" as the source of dependencies to analyze.
40+
41+
42+
It leverage the data provided by the Google site "https://deps.dev/" to identify if a dependency exist into the "online official registry".
3543
3644
Other type of project will be added, over the time, based on the case I meet :)
3745
@@ -42,9 +50,14 @@
4250
import argparse
4351
import pathlib
4452
import json
53+
import re
54+
import tomllib
4555
import xml.etree.ElementTree as ET
4656
from termcolor import colored
4757

58+
# KEY is the package manager official name and VALUE if the corresponding name used by deps.dev
59+
DEPSDEV_PACKAGE_MANAGER_MAPPING = {"pip": "pypi"}
60+
4861
MAVEN_PROJECT_DESCRIPTOR_TPL = """<?xml version="1.0" encoding="UTF-8"?>
4962
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5063
<modelVersion>4.0.0</modelVersion>
@@ -241,6 +254,21 @@ def list_packages_nuget(xml_content):
241254
return dependencies
242255

243256

257+
def list_packages_pip(content):
258+
data = tomllib.loads(content)
259+
project_data = data.get("project", {})
260+
dependencies_data = project_data.get("dependencies", [])
261+
dependencies = []
262+
for dependency_data in dependencies_data:
263+
name = re.findall(r'([a-z0-9_\.\-\[\]]+)[<>=~!^]', dependency_data, re.IGNORECASE)[0]
264+
name = name.strip("<>=~!^")
265+
version = dependency_data.replace(name, "")
266+
pkg_name = f"{name}:{version}"
267+
if pkg_name not in dependencies:
268+
dependencies.append(pkg_name)
269+
return dependencies
270+
271+
244272
def encode_package_name(pkg_name):
245273
name_encoded = pkg_name.replace("/", "%2F").replace("@", "%40").replace(":", "%3A")
246274
return name_encoded
@@ -266,13 +294,25 @@ def find_package_present_into_registry(pkg_names_list, project_type):
266294
pkg_name_only = parts[0]
267295
version_only = parts[1].split(" ")[0].strip("<>=^|")
268296
name_encoded = encode_package_name(pkg_name_only)
297+
if project_type == "pip":
298+
parts = pkg_name.split(":")
299+
pkg_name_only = parts[0]
300+
version_only = parts[1].split(",")[0].strip("<>=~!^")
301+
name_encoded = encode_package_name(pkg_name_only)
269302
if project_type == "nuget":
270303
parts = pkg_name.split(":")
271304
pkg_name_only = parts[0]
272305
version_only = parts[1]
273306
name_encoded = encode_package_name(pkg_name_only)
274307
if name_encoded is not None and version_only is not None:
275-
u = f"https://deps.dev/_/s/{project_type}/p/{name_encoded}/v/{version_only}"
308+
project_type_target = project_type
309+
if project_type in DEPSDEV_PACKAGE_MANAGER_MAPPING:
310+
project_type_target = DEPSDEV_PACKAGE_MANAGER_MAPPING[project_type]
311+
u = f"https://deps.dev/_/s/{project_type_target}/p/{name_encoded}/v/{version_only}"
312+
# With python (pip) sometime a package is specified via "3.10" instead of "3.10.0".
313+
# So I need perform a check without the version to just check the presence of the package.
314+
if project_type == "pip":
315+
u = f"https://deps.dev/_/s/{project_type_target}/p/{name_encoded}"
276316
response = session.get(url=u)
277317
if response.status_code == 200 and "version" in response.json():
278318
present.append(pkg_name)
@@ -295,7 +335,7 @@ def generate_project_descriptor(project_descriptor_file_name, pkg_names_list, gl
295335
if project_type == "npm":
296336
# Handle the prevention of adding several times the same package at this level as it is the final
297337
# step where all dependencies were identified.
298-
# It is more important in NPM (as compared to MAVEN) as a range of versions is specified for a package.
338+
# It is more important in NPM (as compared to MAVEN) as a range of versions can be specified for a package.
299339
packages_already_added = []
300340
for pkg_name in pkg_names_list:
301341
parts = pkg_name.split(":")
@@ -304,6 +344,19 @@ def generate_project_descriptor(project_descriptor_file_name, pkg_names_list, gl
304344
dependencies.append(NPM_DEPENDENCY_TPL % (parts[0], parts[1]))
305345
packages_already_added.append(pkg_name_only)
306346
project_descriptor_content = NPM_PROJECT_DESCRIPTOR_TPL % (",".join(dependencies))
347+
if project_type == "pip":
348+
# Handle the prevention of adding several times the same package at this level as it is the final
349+
# step where all dependencies were identified.
350+
# It is more important in PIP (as compared to MAVEN) as a range of versions can be specified for a package.
351+
packages_already_added = []
352+
for pkg_name in pkg_names_list:
353+
parts = pkg_name.split(":")
354+
pkg_name_only = parts[0].lower()
355+
if pkg_name_only not in packages_already_added:
356+
dependencies.append(f"{parts[0]}{parts[1]}")
357+
packages_already_added.append(pkg_name_only)
358+
project_descriptor_content = "\n".join(dependencies)
359+
project_descriptor_target_file_name = "requirements-consolidated.txt"
307360
if project_type == "nuget":
308361
for pkg_name in pkg_names_list:
309362
parts = pkg_name.split(":")
@@ -324,7 +377,7 @@ def is_npm_package_json_file(file_content):
324377
parser = argparse.ArgumentParser(description="Generate a valid project descriptor with all dependencies resolvable via online official registry.")
325378
required_params = parser.add_argument_group("required named arguments")
326379
required_params.add_argument("-f", action="store", dest="base_folder", help="Path to folder containing the project code base.", required=True)
327-
required_params.add_argument("-t", action="store", dest="project_type", choices=["maven", "npm", "nuget"], help="System managing the project external dependencies.", required=True)
380+
required_params.add_argument("-t", action="store", dest="project_type", choices=["maven", "npm", "nuget", "pip"], help="System managing the project external dependencies.", required=True)
328381
parser.add_argument("-e", action="store", dest="gids_to_ignore", help="[MAVEN ONLY] List of artefacts GroupID, separated by a comma, to ignores (excludes) from the final POM file.", required=False, default="")
329382
args = parser.parse_args()
330383
project_descriptor_file_name = None
@@ -338,6 +391,9 @@ def is_npm_package_json_file(file_content):
338391
if args.project_type == "nuget":
339392
project_descriptor_file_name = "*.csproj"
340393
gids_to_ignore = None
394+
if args.project_type == "pip":
395+
project_descriptor_file_name = "pyproject.toml"
396+
gids_to_ignore = None
341397
print(colored(f"[+] Extract all dependencies from all '{project_descriptor_file_name}' files...", "yellow"))
342398
global_dependencies = []
343399
global_properties = {}
@@ -357,6 +413,9 @@ def is_npm_package_json_file(file_content):
357413
if args.project_type == "nuget":
358414
dependencies = list_packages_nuget(content)
359415
global_dependencies.extend(dependencies)
416+
if args.project_type == "pip":
417+
dependencies = list_packages_pip(content)
418+
global_dependencies.extend(dependencies)
360419
global_dependencies = list(set(global_dependencies))
361420
print(f"\rDependencies identified ({project_descriptor_file_name_file_count} files read): {len(global_dependencies):<80}")
362421
print(colored(f"[+] Identify all resolvable dependencies...", "yellow"))

0 commit comments

Comments
 (0)