Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ import {
generateMavenFingerprints,
getMavenRepositoryPath,
} from './fingerprint';
import { buildM2HashLabelsMap } from './parse/m2-hash-labels';
import { readM2HashLabels } from './parse/m2-hash-labels';
import {
fetchRepositoryUrlMap,
readRemoteRepositoryLabel,
} from './parse/m2-remote-repositories';
import { collectM2Nodes, buildLabelMap } from './parse/m2-batch';
import {
SnykHttpClient,
HashAlgorithm,
Expand Down Expand Up @@ -173,14 +178,43 @@ export async function inspect(

// Read install-time-recorded companion-file hashes (e.g. `.jar.sha1`) from
// the local Maven repository and surface them as `hash:<algorithm>` labels.
// Gated behind its own `--include-component-metadata` option for now.
// Also read distribution source info from _remote.repositories files.
// Both require access to the local Maven repository, so we hoist that
// path resolution here to run once.
let repositoryPath: string | undefined;
let remoteRepositoriesMap = new Map<string, Record<string, string>>();
let hashLabelsMap = new Map<string, Record<string, string>>();

if (options.includeComponentMetadata) {
const repositoryPath = await getMavenRepositoryPath(
repositoryPath = await getMavenRepositoryPath(
mavenContext.command,
options.mavenRepository,
);
hashLabelsMap = await buildM2HashLabelsMap(mavenGraphs, repositoryPath);
}

if (options.includeComponentMetadata && repositoryPath) {
// Resolve the node set and artifact paths once; both label passes reuse it.
const m2Nodes = collectM2Nodes(mavenGraphs, repositoryPath);

// The hash-label reads only touch the local repository, so kick them off
// concurrently with the dependency:list-repositories subprocess instead
// of waiting for it.
const hashLabelsPromise = buildLabelMap(m2Nodes, (node) =>
readM2HashLabels(node.artifactPath),
);

const repoUrlMap = await fetchRepositoryUrlMap(
mavenContext,
!!options.mavenAggregateProject,
);
const remoteRepositoriesPromise = buildLabelMap(m2Nodes, (node) =>
readRemoteRepositoryLabel(node, repositoryPath, repoUrlMap),
);

[hashLabelsMap, remoteRepositoriesMap] = await Promise.all([
hashLabelsPromise,
remoteRepositoriesPromise,
]);
}

// Build scanned projects
Expand All @@ -192,6 +226,7 @@ export async function inspect(
!!fingerprintOptions?.enabled,
!!options.showMavenBuildScope,
hashLabelsMap,
remoteRepositoriesMap,
);

return {
Expand Down
9 changes: 9 additions & 0 deletions lib/parse/dep-graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ function createNodeInfo(
Object.assign(labels, hashLabels);
}

// Merge distribution-source labels (read from `.m2/.../repository/*/_remote.repositories`
// and resolved to full artifact URLs via Maven's dependency:list-repositories).
// These are consumed downstream to populate CycloneDX `component.ExternalReferences`
// with type="distribution".
const repoLabels = context.remoteRepositoriesMap?.get(depInfo.id);
if (repoLabels) {
Object.assign(labels, repoLabels);
}

if (Object.keys(labels).length === 0) {
return;
}
Expand Down
65 changes: 65 additions & 0 deletions lib/parse/m2-batch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import type { MavenGraph } from './types';
import { dependencyIdToArtifactPath } from '../fingerprint';

/**
* Number of artifacts processed per batch. We batch to avoid spawning too many
* concurrent file reads. Note a single node may itself fan out into several
* reads (e.g. one companion file per hash algorithm), so the in-flight read
* count can be a small multiple of this.
*/
export const CONCURRENCY = 5;

/**
* A Maven graph node paired with the path to its artifact in the local Maven
* repository. Resolved once via {@link collectM2Nodes} so the hash-label and
* distribution-url passes can share it rather than each recomputing it.
*/
export interface M2Node {
nodeId: string;
artifactPath: string;
}

/**
* Collect the unique node IDs across all graphs and resolve each to its
* artifact path in the local Maven repository. Done once so every per-node
* pass (hashes, distribution URLs) can reuse the same node set and paths.
*/
export function collectM2Nodes(
mavenGraphs: MavenGraph[],
repositoryPath: string,
): M2Node[] {
const nodeIds = new Set<string>();
for (const graph of mavenGraphs) {
Object.keys(graph.nodes).forEach((nodeId) => nodeIds.add(nodeId));
}
return Array.from(nodeIds).map((nodeId) => ({
nodeId,
artifactPath: dependencyIdToArtifactPath(nodeId, repositoryPath),
}));
}

/**
* Run `readLabels` for every node in bounded-concurrency batches and return a
* Map<nodeId, labels> containing only the nodes that produced a non-empty label
* set. Reads run asynchronously in bounded batches so they never block the
* event loop. The shared scaffold for any per-node label-reading pass.
*/
export async function buildLabelMap(
nodes: M2Node[],
readLabels: (node: M2Node) => Promise<Record<string, string>>,
): Promise<Map<string, Record<string, string>>> {
const result = new Map<string, Record<string, string>>();

for (let i = 0; i < nodes.length; i += CONCURRENCY) {
const batch = nodes.slice(i, i + CONCURRENCY);
const batchResults = await Promise.all(batch.map(readLabels));
batch.forEach((node, j) => {
const labels = batchResults[j];
if (Object.keys(labels).length > 0) {
result.set(node.nodeId, labels);
}
});
}

return result;
}
53 changes: 4 additions & 49 deletions lib/parse/m2-hash-labels.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import * as fs from 'fs';
import type { MavenGraph } from './types';
import { dependencyIdToArtifactPath } from '../fingerprint';
import { debug } from '../index';

/**
Expand Down Expand Up @@ -37,11 +35,6 @@ const M2_COMPANION_FILES: {
{ ext: 'sha512', algorithm: 'sha-512', digestPattern: /^[0-9a-f]{128}$/ },
];

// Number of artifacts processed per batch. Each artifact reads its companion
// files concurrently (up to one per entry in M2_COMPANION_FILES), so up to
// CONCURRENCY * M2_COMPANION_FILES.length companion reads are in flight at once.
const CONCURRENCY = 5;

// Upper bound on the bytes read from a companion file. We only keep the first
// whitespace-delimited token, and the longest valid digest is sha-512 at 128
// hex chars; 256 bytes leaves slack for a leading BOM/whitespace while still
Expand Down Expand Up @@ -93,19 +86,17 @@ async function readCompanionFile(
}

/**
* Given a Maven dependency ID (e.g. "com.google.guava:guava:jar:32.1.3-jre"),
* read whichever companion checksum files exist for it in the local Maven
* repository and return them as `hash:<algorithm>` -> hex labels.
* Given the path to an artifact in the local Maven repository, read whichever
* companion checksum files exist for it and return them as
* `hash:<algorithm>` -> hex labels.
*
* Empty result if none are present (artifact not in .m2 yet, or no companion
* files published).
*/
export async function readM2HashLabels(
dependencyId: string,
repositoryPath: string,
artifactPath: string,
): Promise<Record<string, string>> {
const labels: Record<string, string> = {};
const artifactPath = dependencyIdToArtifactPath(dependencyId, repositoryPath);

const digests = await Promise.all(
M2_COMPANION_FILES.map(({ ext, digestPattern }) =>
Expand All @@ -122,39 +113,3 @@ export async function readM2HashLabels(

return labels;
}

/**
* Pre-compute the hash-label map for every node in a set of Maven graphs.
* Mirrors the pattern used by `generateFingerprints` so the depgraph builder
* can attach labels without doing I/O inside the BFS/DFS loop. Reads are run
* asynchronously in bounded batches so they never block the event loop.
*/
export async function buildM2HashLabelsMap(
mavenGraphs: MavenGraph[],
repositoryPath: string,
): Promise<Map<string, Record<string, string>>> {
const result = new Map<string, Record<string, string>>();

// Collect the unique node IDs across all graphs.
const nodeIds = new Set<string>();
for (const graph of mavenGraphs) {
Object.keys(graph.nodes).forEach((nodeId) => nodeIds.add(nodeId));
}
const nodeIdArray = Array.from(nodeIds);

// Read companion files in bounded-concurrency batches.
for (let i = 0; i < nodeIdArray.length; i += CONCURRENCY) {
const batch = nodeIdArray.slice(i, i + CONCURRENCY);
const batchResults = await Promise.all(
batch.map((nodeId) => readM2HashLabels(nodeId, repositoryPath)),
);
batch.forEach((nodeId, j) => {
const labels = batchResults[j];
if (Object.keys(labels).length > 0) {
result.set(nodeId, labels);
}
});
}

return result;
}
Loading