66- OWASP Dependency Check: https://owasp.org/www-project-dependency-check/
77- NPM audit: https://docs.npmjs.com/cli/v9/commands/npm-audit
88- dotnet package list: https://learn.microsoft.com/en-us/dotnet/core/tools/dotnet-package-list
9+ - PIP audit: https://pypi.org/project/pip-audit/
910
1011For Maven:
1112 This script analyse all the "pom.xml" descriptor files present recursively in a folder and
3132 Once generated, use the command "dotnet restore --verbosity quiet" into the folder where the "project.csproj" file was created
3233 to gather all dependencies.
3334
34- It leverage the data provided by the Google site "https://deps.dev/".
35+ For Pip:
36+ The management of dependencies via the file "pyproject.toml" is used as reference as it is the modern way to manage dependencies in a project (See https://peps.python.org/pep-0621/).
37+ This script analyse all the "pyproject.toml" descriptor files present recursively in a folder and create a single requirements file named "requirements-consolidated.txt" with all dependencies resolvable via the online official registry.
38+ Extract dependencies from "[project] > dependencies" only.
39+ The generated file can be provided to "pip-audit" as the source of dependencies to analyze.
40+
41+
42+ It leverage the data provided by the Google site "https://deps.dev/" to identify if a dependency exist into the "online official registry".
3543
3644Other type of project will be added, over the time, based on the case I meet :)
3745
4250import argparse
4351import pathlib
4452import json
53+ import re
54+ import tomllib
4555import xml .etree .ElementTree as ET
4656from termcolor import colored
4757
58+ # KEY is the package manager official name and VALUE if the corresponding name used by deps.dev
59+ DEPSDEV_PACKAGE_MANAGER_MAPPING = {"pip" : "pypi" }
60+
4861MAVEN_PROJECT_DESCRIPTOR_TPL = """<?xml version="1.0" encoding="UTF-8"?>
4962<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5063 <modelVersion>4.0.0</modelVersion>
@@ -241,6 +254,21 @@ def list_packages_nuget(xml_content):
241254 return dependencies
242255
243256
257+ def list_packages_pip (content ):
258+ data = tomllib .loads (content )
259+ project_data = data .get ("project" , {})
260+ dependencies_data = project_data .get ("dependencies" , [])
261+ dependencies = []
262+ for dependency_data in dependencies_data :
263+ name = re .findall (r'([a-z0-9_\.\-\[\]]+)[<>=~!^]' , dependency_data , re .IGNORECASE )[0 ]
264+ name = name .strip ("<>=~!^" )
265+ version = dependency_data .replace (name , "" )
266+ pkg_name = f"{ name } :{ version } "
267+ if pkg_name not in dependencies :
268+ dependencies .append (pkg_name )
269+ return dependencies
270+
271+
244272def encode_package_name (pkg_name ):
245273 name_encoded = pkg_name .replace ("/" , "%2F" ).replace ("@" , "%40" ).replace (":" , "%3A" )
246274 return name_encoded
@@ -266,13 +294,25 @@ def find_package_present_into_registry(pkg_names_list, project_type):
266294 pkg_name_only = parts [0 ]
267295 version_only = parts [1 ].split (" " )[0 ].strip ("<>=^|" )
268296 name_encoded = encode_package_name (pkg_name_only )
297+ if project_type == "pip" :
298+ parts = pkg_name .split (":" )
299+ pkg_name_only = parts [0 ]
300+ version_only = parts [1 ].split ("," )[0 ].strip ("<>=~!^" )
301+ name_encoded = encode_package_name (pkg_name_only )
269302 if project_type == "nuget" :
270303 parts = pkg_name .split (":" )
271304 pkg_name_only = parts [0 ]
272305 version_only = parts [1 ]
273306 name_encoded = encode_package_name (pkg_name_only )
274307 if name_encoded is not None and version_only is not None :
275- u = f"https://deps.dev/_/s/{ project_type } /p/{ name_encoded } /v/{ version_only } "
308+ project_type_target = project_type
309+ if project_type in DEPSDEV_PACKAGE_MANAGER_MAPPING :
310+ project_type_target = DEPSDEV_PACKAGE_MANAGER_MAPPING [project_type ]
311+ u = f"https://deps.dev/_/s/{ project_type_target } /p/{ name_encoded } /v/{ version_only } "
312+ # With python (pip) sometime a package is specified via "3.10" instead of "3.10.0".
313+ # So I need perform a check without the version to just check the presence of the package.
314+ if project_type == "pip" :
315+ u = f"https://deps.dev/_/s/{ project_type_target } /p/{ name_encoded } "
276316 response = session .get (url = u )
277317 if response .status_code == 200 and "version" in response .json ():
278318 present .append (pkg_name )
@@ -295,7 +335,7 @@ def generate_project_descriptor(project_descriptor_file_name, pkg_names_list, gl
295335 if project_type == "npm" :
296336 # Handle the prevention of adding several times the same package at this level as it is the final
297337 # step where all dependencies were identified.
298- # It is more important in NPM (as compared to MAVEN) as a range of versions is specified for a package.
338+ # It is more important in NPM (as compared to MAVEN) as a range of versions can be specified for a package.
299339 packages_already_added = []
300340 for pkg_name in pkg_names_list :
301341 parts = pkg_name .split (":" )
@@ -304,6 +344,19 @@ def generate_project_descriptor(project_descriptor_file_name, pkg_names_list, gl
304344 dependencies .append (NPM_DEPENDENCY_TPL % (parts [0 ], parts [1 ]))
305345 packages_already_added .append (pkg_name_only )
306346 project_descriptor_content = NPM_PROJECT_DESCRIPTOR_TPL % ("," .join (dependencies ))
347+ if project_type == "pip" :
348+ # Handle the prevention of adding several times the same package at this level as it is the final
349+ # step where all dependencies were identified.
350+ # It is more important in PIP (as compared to MAVEN) as a range of versions can be specified for a package.
351+ packages_already_added = []
352+ for pkg_name in pkg_names_list :
353+ parts = pkg_name .split (":" )
354+ pkg_name_only = parts [0 ].lower ()
355+ if pkg_name_only not in packages_already_added :
356+ dependencies .append (f"{ parts [0 ]} { parts [1 ]} " )
357+ packages_already_added .append (pkg_name_only )
358+ project_descriptor_content = "\n " .join (dependencies )
359+ project_descriptor_target_file_name = "requirements-consolidated.txt"
307360 if project_type == "nuget" :
308361 for pkg_name in pkg_names_list :
309362 parts = pkg_name .split (":" )
@@ -324,7 +377,7 @@ def is_npm_package_json_file(file_content):
324377 parser = argparse .ArgumentParser (description = "Generate a valid project descriptor with all dependencies resolvable via online official registry." )
325378 required_params = parser .add_argument_group ("required named arguments" )
326379 required_params .add_argument ("-f" , action = "store" , dest = "base_folder" , help = "Path to folder containing the project code base." , required = True )
327- required_params .add_argument ("-t" , action = "store" , dest = "project_type" , choices = ["maven" , "npm" , "nuget" ], help = "System managing the project external dependencies." , required = True )
380+ required_params .add_argument ("-t" , action = "store" , dest = "project_type" , choices = ["maven" , "npm" , "nuget" , "pip" ], help = "System managing the project external dependencies." , required = True )
328381 parser .add_argument ("-e" , action = "store" , dest = "gids_to_ignore" , help = "[MAVEN ONLY] List of artefacts GroupID, separated by a comma, to ignores (excludes) from the final POM file." , required = False , default = "" )
329382 args = parser .parse_args ()
330383 project_descriptor_file_name = None
@@ -338,6 +391,9 @@ def is_npm_package_json_file(file_content):
338391 if args .project_type == "nuget" :
339392 project_descriptor_file_name = "*.csproj"
340393 gids_to_ignore = None
394+ if args .project_type == "pip" :
395+ project_descriptor_file_name = "pyproject.toml"
396+ gids_to_ignore = None
341397 print (colored (f"[+] Extract all dependencies from all '{ project_descriptor_file_name } ' files..." , "yellow" ))
342398 global_dependencies = []
343399 global_properties = {}
@@ -357,6 +413,9 @@ def is_npm_package_json_file(file_content):
357413 if args .project_type == "nuget" :
358414 dependencies = list_packages_nuget (content )
359415 global_dependencies .extend (dependencies )
416+ if args .project_type == "pip" :
417+ dependencies = list_packages_pip (content )
418+ global_dependencies .extend (dependencies )
360419 global_dependencies = list (set (global_dependencies ))
361420 print (f"\r Dependencies identified ({ project_descriptor_file_name_file_count } files read): { len (global_dependencies ):<80} " )
362421 print (colored (f"[+] Identify all resolvable dependencies..." , "yellow" ))
0 commit comments