22#
33# /// script
44# requires-python = ">=3.10"
5- # dependencies = ["openpyxl", "pydantic"]
5+ # dependencies = ["openpyxl", "pydantic", "packaging" ]
66# ///
77
88import argparse
1212from typing import Generator , Literal
1313
1414import openpyxl
15+ from packaging import version
1516from pydantic import AliasPath , BaseModel , Field , computed_field
1617
1718logger = logging .getLogger (__name__ )
@@ -36,7 +37,7 @@ class SPDXPackage(BaseModel):
3637 # I haven't yet seen a case where it is missing and it should be included in the human-readable SBOM
3738 versionInfo : str
3839 supplier : str = "Open-source software"
39- externalRefs : list [SPDXRef ] = []
40+ externalRefs : list [SPDXRef ] = Field ( default_factory = list )
4041
4142 @computed_field
4243 def purl (self ) -> str | None :
@@ -109,20 +110,31 @@ def to_fda_records(self, author: str | None) -> Generator["FDARecord"]:
109110
110111
111112class FDARecord (BaseModel ):
112- """RDA required fields."""
113+ """FDA required fields."""
113114
114115 author : str
115116 timestamp : str
116117 supplier : str = "Open-source software"
117118 component : str
118119 version : str
119120 unique_identifier : str
120- relationship : Literal ["is contained by" ] = "is contained by"
121-
122-
123- def newer2 (p1 , p2 ):
124- """Return the package with the newer version."""
125- return p1 if p1 .version > p2 .version else p2
121+ relationship : Literal ["Is contained by" ] = "Is contained by"
122+
123+
124+ def newer (p1 : FDARecord , p2 : FDARecord ) -> FDARecord :
125+ """Return the package with the newer version using semantic version comparison."""
126+ if p1 .version == p2 .version :
127+ return p2 # Arbitrary choice if versions are equal
128+ try :
129+ v1 = version .parse (p1 .version )
130+ v2 = version .parse (p2 .version )
131+ return p1 if v1 > v2 else p2
132+ except Exception as e :
133+ # Fallback to string comparison if version parsing fails
134+ logger .warning (
135+ f"Failed to parse versions '{ p1 .version } ' or '{ p2 .version } ': { e } "
136+ )
137+ return p1 if p1 .version > p2 .version else p2
126138
127139
128140def merge_sboms (sbom1 : list [FDARecord ], sbom2 : list [FDARecord ]) -> list [FDARecord ]:
@@ -131,7 +143,7 @@ def merge_sboms(sbom1: list[FDARecord], sbom2: list[FDARecord]) -> list[FDARecor
131143 for r in sbom2 :
132144 key = (r .component , r .supplier )
133145 if key in records :
134- records [key ] = newer2 (records [key ], r )
146+ records [key ] = newer (records [key ], r )
135147 else :
136148 records [key ] = r
137149 return list (records .values ())
@@ -156,7 +168,7 @@ def gen_sbom(
156168):
157169 """Generate a combined SBOM from multiple SPDX and CycloneDX SBOMs in the input directory."""
158170 bom_parsers : list [type [BaseSBOM ]] = [SPDX2_3 , Cyclone1_6 ]
159- boms = []
171+ boms : list [ list [ FDARecord ]] = []
160172
161173 for bom_file in input_directory_path .glob ("*.json" ):
162174 for bom_parser in bom_parsers :
@@ -173,9 +185,9 @@ def gen_sbom(
173185 logger .error (f"Failed to parse { bom_file } with all known parsers" )
174186 raise ValueError (f"Unknown BOM format in { bom_file } " )
175187
176- merged_bom = reduce (merge_sboms , boms , [])
188+ merged_bom : list [ FDARecord ] = reduce (merge_sboms , boms , [])
177189 save_as_xlsx (merged_bom , output_file_path )
178- # Check for duplicates
190+ # Check for duplicates (side effect: log warnings)
179191 deduplicate (merged_bom )
180192
181193
@@ -192,7 +204,6 @@ def save_as_xlsx(bom: list[FDARecord], output_file_path: Path | str):
192204 ]
193205 wb = openpyxl .Workbook ()
194206 ws = wb .active
195- assert ws
196207 ws .append (excel_header )
197208 for r in bom :
198209 ws .append (
0 commit comments