Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions modules/fundamental/src/license/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,22 +297,29 @@ impl LicenseService {
.distinct()
.column_as(expanded_license::Column::ExpandedText, LICENSE_TEXT);

// Build query for non-expanded licenses: includes both
// Build query for licenses not yet linked to any SBOM: includes both
// (a) pre-loaded SPDX dictionary entries with no SBOM connection yet, AND
// (b) CycloneDX licenses that exist in sbom_package_license but were never expanded.
// A LEFT JOIN on sbom_package_license (instead of INNER JOIN) ensures pre-loaded licenses
// with no SBOM attachment are included. Then filtering for sbom_license_expanded IS NULL
// removes SPDX licenses that have already been expanded (they appear in spdx_query instead).
// (b) licenses from older SBOMs ingested before license expansion was implemented.
// Use NOT EXISTS instead of LEFT JOIN + IS NULL to find licenses without SBOMs.
// On large tables, LEFT JOIN scans all rows while NOT EXISTS
// uses a Nested Loop Anti Join with index-only scan.
let exists_subquery = sea_query::Query::select()
.expr(Expr::val(1))
.from(sbom_license_expanded::Entity)
.and_where(
Expr::col((
sbom_license_expanded::Entity,
sbom_license_expanded::Column::LicenseId,
))
.equals((license::Entity, license::Column::Id)),
)
.to_owned();

let mut non_sbom_query = license::Entity::find()
.select_only()
.distinct()
.column_as(license::Column::Text, LICENSE_TEXT)
.join(JoinType::LeftJoin, license::Relation::PackageLicense.def())
.join(
JoinType::LeftJoin,
sbom_license_expanded::Relation::License.def().rev(),
)
.filter(sbom_license_expanded::Column::LicenseId.is_null());
.filter(Expr::exists(exists_subquery).not());

// Apply filtering to both queries (without sorting - that's applied to the UNION result)
let filter_only = Query {
Expand Down
12 changes: 12 additions & 0 deletions modules/ingestor/src/graph/sbom/common/expanded_license.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ use uuid::Uuid;
///
/// While SeaORM could express this via custom expressions, it would be significantly
/// more verbose and harder to maintain than the raw SQL.
///
/// # Differences from Migration Backfill
///
/// The migration in m0002120_normalize_expanded_license/up.sql performs a similar
/// operation but with key differences:
/// - Migration: Pre-deduplicates by (text, sbom_id) and uses WHERE NOT EXISTS to skip
/// already-backfilled SBOMs. Optimized for one-time bulk processing.
/// - Ingestion: Filters by specific sbom_id parameter for single-SBOM processing.
/// Uses ON CONFLICT for idempotent re-ingestion of the same SBOM.
///
/// Both use the same core logic (expand_license_expression_with_mappings + md5 hash
/// matching) but optimize for their different use cases.
pub async fn populate_expanded_license(
sbom_id: Uuid,
db: &impl ConnectionTrait,
Expand Down
4 changes: 2 additions & 2 deletions modules/ingestor/src/graph/sbom/cyclonedx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,11 @@ impl<'a> From<Information<'a>> for SbomInformation {

impl SbomContext {
#[instrument(skip(connection, sbom, warnings), err(level=tracing::Level::INFO))]
pub async fn ingest_cyclonedx<C: ConnectionTrait>(
pub async fn ingest_cyclonedx(
&self,
mut sbom: Box<CycloneDx>,
warnings: &dyn ReportSink,
connection: &C,
connection: &impl ConnectionTrait,
) -> Result<(), Error> {
// pre-flight checks

Expand Down
4 changes: 2 additions & 2 deletions modules/ingestor/src/graph/sbom/spdx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ impl<'a> From<Information<'a>> for SbomInformation {

impl SbomContext {
#[instrument(skip(db, sbom_data, warnings), ret(level=tracing::Level::DEBUG))]
pub async fn ingest_spdx<C: ConnectionTrait>(
pub async fn ingest_spdx(
&self,
sbom_data: SPDX,
warnings: &dyn ReportSink,
db: &C,
db: &impl ConnectionTrait,
) -> Result<(), Error> {
// pre-flight checks

Expand Down
Loading