From 8d82b6309aaf4abf51da1b52f5f3ec58264c4a38 Mon Sep 17 00:00:00 2001 From: Wolfgang Klenk Date: Tue, 20 May 2025 12:20:48 +0200 Subject: [PATCH 1/2] feat(scanner): Add submodule fetch strategy for nested repositories Introduce `submoduleFetchStrategy` config to control how the Scanner fetches Git submodules. When set to `TOP_LEVEL_ONLY`, only top-level submodules are fetched, avoiding timeouts on deeply nested repos. This mirrors the behavior already available in the Analyzer and allows to resolve nested provenances even in this kind of repositories with a vast amount of nested submodules. Signed-off-by: Wolfgang Klenk --- .../mapping/src/commonMain/kotlin/Mappings.kt | 6 +++-- .../commonMain/kotlin/JobConfigurations.kt | 9 +++++++- .../commonMain/kotlin/JobConfigurations.kt | 9 +++++++- .../src/main/kotlin/scanner/ScannerRunner.kt | 23 ++++++++++++++++++- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/api/v1/mapping/src/commonMain/kotlin/Mappings.kt b/api/v1/mapping/src/commonMain/kotlin/Mappings.kt index 6e4c3dd5e7..326bf6f8b1 100644 --- a/api/v1/mapping/src/commonMain/kotlin/Mappings.kt +++ b/api/v1/mapping/src/commonMain/kotlin/Mappings.kt @@ -578,7 +578,8 @@ fun ScannerJobConfiguration.mapToApi() = ApiScannerJobConfiguration( skipExcluded, sourceCodeOrigins?.map { it.mapToApi() }, config?.mapValues { it.value.mapToApi() }, - keepAliveWorker + keepAliveWorker, + submoduleFetchStrategy.mapToApi() ) fun ApiScannerJobConfiguration.mapToModel() = ScannerJobConfiguration( @@ -590,7 +591,8 @@ fun ApiScannerJobConfiguration.mapToModel() = ScannerJobConfiguration( skipExcluded, sourceCodeOrigins?.map { it.mapToModel() }, config?.mapValues { it.value.mapToModel() }, - keepAliveWorker + keepAliveWorker, + submoduleFetchStrategy.mapToModel() ) fun Secret.mapToApi() = ApiSecret(name, description) diff --git a/api/v1/model/src/commonMain/kotlin/JobConfigurations.kt b/api/v1/model/src/commonMain/kotlin/JobConfigurations.kt index d65adb1004..d98381923c 100644 --- a/api/v1/model/src/commonMain/kotlin/JobConfigurations.kt +++ b/api/v1/model/src/commonMain/kotlin/JobConfigurations.kt @@ -187,7 +187,14 @@ data class ScannerJobConfiguration( * Keep the worker alive after it has finished. This is useful for manual problem analysis directly * within the pod's execution environment. */ - val keepAliveWorker: Boolean = false + val keepAliveWorker: Boolean = false, + + /** + * Specifies how submodules are fetched when resolving provenances. Currently supported only for Git repositories. + * If set to [SubmoduleFetchStrategy.FULLY_RECURSIVE] (default), all submodules are fetched recursively. If set + * to [SubmoduleFetchStrategy.TOP_LEVEL_ONLY], only the top-level submodules are fetched. + */ + val submoduleFetchStrategy: SubmoduleFetchStrategy = SubmoduleFetchStrategy.FULLY_RECURSIVE ) /** diff --git a/model/src/commonMain/kotlin/JobConfigurations.kt b/model/src/commonMain/kotlin/JobConfigurations.kt index 63398f6ae5..bd30ce08be 100644 --- a/model/src/commonMain/kotlin/JobConfigurations.kt +++ b/model/src/commonMain/kotlin/JobConfigurations.kt @@ -196,7 +196,14 @@ data class ScannerJobConfiguration( * Keep the worker alive after it has finished. This is useful for manual problem analysis directly * within the pod's execution environment. */ - val keepAliveWorker: Boolean = false + val keepAliveWorker: Boolean = false, + + /** + * Specifies how submodules are fetched when resolving provenances. Currently supported only for Git repositories. + * If set to [SubmoduleFetchStrategy.FULLY_RECURSIVE] (default), all submodules are fetched recursively. If set + * to [SubmoduleFetchStrategy.TOP_LEVEL_ONLY], only the top-level submodules are fetched. + */ + val submoduleFetchStrategy: SubmoduleFetchStrategy = SubmoduleFetchStrategy.FULLY_RECURSIVE ) /** diff --git a/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt b/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt index 060aa1f522..d4d3eadc6c 100644 --- a/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt +++ b/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt @@ -20,6 +20,7 @@ package org.eclipse.apoapsis.ortserver.workers.scanner import org.eclipse.apoapsis.ortserver.model.ScannerJobConfiguration +import org.eclipse.apoapsis.ortserver.model.SubmoduleFetchStrategy import org.eclipse.apoapsis.ortserver.workers.common.OrtServerFileListStorage import org.eclipse.apoapsis.ortserver.workers.common.context.WorkerContext import org.eclipse.apoapsis.ortserver.workers.common.mapToOrt @@ -33,6 +34,7 @@ import org.ossreviewtoolkit.model.PackageType import org.ossreviewtoolkit.model.Provenance import org.ossreviewtoolkit.model.ScannerRun import org.ossreviewtoolkit.model.SourceCodeOrigin +import org.ossreviewtoolkit.model.VcsType import org.ossreviewtoolkit.model.config.DownloaderConfiguration import org.ossreviewtoolkit.model.config.ScannerConfiguration import org.ossreviewtoolkit.model.utils.FileArchiver @@ -58,6 +60,24 @@ class ScannerRunner( ): OrtScannerResult { val pluginConfigs = context.resolvePluginConfigSecrets(config.config) + // If the submodule fetch strategy is set to TOP_LEVEL_ONLY, for git use a plugin config that prevents that + // submodules are fetched recursively. + val vcsPluginConfigs = if (config.submoduleFetchStrategy == SubmoduleFetchStrategy.TOP_LEVEL_ONLY) { + mapOf( + VcsType.GIT.toString() to PluginConfig( + options = mapOf("updateNestedSubmodules" to "false") + ) + ) + } else { + emptyMap() + } + + if (config.submoduleFetchStrategy == SubmoduleFetchStrategy.DISABLED) { + throw ScannerException( + "Scanner job configuration option SubmoduleFetchStrategy.DISABLED is not supported." + ) + } + val packageProvenanceCache = PackageProvenanceCache() val packageProvenanceStorage = OrtServerPackageProvenanceStorage(db, scannerRunId, packageProvenanceCache) val nestedProvenanceStorage = OrtServerNestedProvenanceStorage(db, packageProvenanceCache) @@ -85,7 +105,8 @@ class ScannerRunner( ?: listOf(SourceCodeOrigin.ARTIFACT, SourceCodeOrigin.VCS) ) - val workingTreeCache = DefaultWorkingTreeCache() + val workingTreeCache = DefaultWorkingTreeCache().addVcsPluginConfigs(vcsPluginConfigs) + val provenanceDownloader = DefaultProvenanceDownloader(downloaderConfig, workingTreeCache) val packageProvenanceResolver = DefaultPackageProvenanceResolver( scanStorages.packageProvenanceStorage, From 33f456aa4484367b634837e04c39f4ba6d418811 Mon Sep 17 00:00:00 2001 From: Wolfgang Klenk Date: Tue, 20 May 2025 12:22:24 +0200 Subject: [PATCH 2/2] feat(scanner): Store VCS plugin config in NestedProvenanceStorage If particular VCS plugin configurations are active during a scan (like `submoduleFetchStrategy=TOP_LEVEL_ONLY`), ensure that VCS plugin configurations are stored alongside nested provenance data. This prevents reuse of cache entries across scans with differing VCS plugin settings, ensuring correctness and reliability of scan results. However, there remains a small risk that other dependencies are not fully resolved in such a scenario where the WorkingTreeCache is limited to the first level of submodules. But this is rather small, because other open source dependencies typically don't use nested submodules at all. Signed-off-by: Wolfgang Klenk --- .../kotlin/tables/NestedProvenancesTable.kt | 6 +++ ...105__addNestedProvenancesConfiguration.sql | 2 + .../OrtServerNestedProvenanceStorage.kt | 10 ++++- .../src/main/kotlin/scanner/ScannerRunner.kt | 19 +++++++++- .../OrtServerNestedProvenanceStorageTest.kt | 12 +++++- .../src/test/kotlin/ScannerRunnerTest.kt | 38 +++++++++++++++++++ 6 files changed, 82 insertions(+), 5 deletions(-) create mode 100644 dao/src/main/resources/db/migration/V105__addNestedProvenancesConfiguration.sql diff --git a/dao/src/main/kotlin/tables/NestedProvenancesTable.kt b/dao/src/main/kotlin/tables/NestedProvenancesTable.kt index 296cee249d..d5bf8b85cc 100644 --- a/dao/src/main/kotlin/tables/NestedProvenancesTable.kt +++ b/dao/src/main/kotlin/tables/NestedProvenancesTable.kt @@ -35,6 +35,11 @@ object NestedProvenancesTable : LongIdTable("nested_provenances") { val rootResolvedRevision = text("root_resolved_revision") val hasOnlyFixedRevisions = bool("has_only_fixed_revisions") + + // If specific VCS plugin configurations are used, store a canonical string representation of these configuration + // options in this column. This ensures that results are only reused for scans with identical VCS plugin + // configurations. + val vcsPluginConfigs = text("vcs_plugin_configs").nullable() } class NestedProvenanceDao(id: EntityID) : LongEntity(id) { @@ -44,6 +49,7 @@ class NestedProvenanceDao(id: EntityID) : LongEntity(id) { var rootResolvedRevision by NestedProvenancesTable.rootResolvedRevision var hasOnlyFixedRevisions by NestedProvenancesTable.hasOnlyFixedRevisions + var vcsPluginConfigs by NestedProvenancesTable.vcsPluginConfigs val packageProvenances by PackageProvenanceDao optionalReferrersOn PackageProvenancesTable.nestedProvenanceId val subRepositories by NestedProvenanceSubRepositoryDao referrersOn diff --git a/dao/src/main/resources/db/migration/V105__addNestedProvenancesConfiguration.sql b/dao/src/main/resources/db/migration/V105__addNestedProvenancesConfiguration.sql new file mode 100644 index 0000000000..95f8a1587d --- /dev/null +++ b/dao/src/main/resources/db/migration/V105__addNestedProvenancesConfiguration.sql @@ -0,0 +1,2 @@ +ALTER TABLE nested_provenances + ADD COLUMN vcs_plugin_configs text DEFAULT NULL; diff --git a/workers/scanner/src/main/kotlin/scanner/OrtServerNestedProvenanceStorage.kt b/workers/scanner/src/main/kotlin/scanner/OrtServerNestedProvenanceStorage.kt index 6fb0cd54e3..4e4b237ce0 100644 --- a/workers/scanner/src/main/kotlin/scanner/OrtServerNestedProvenanceStorage.kt +++ b/workers/scanner/src/main/kotlin/scanner/OrtServerNestedProvenanceStorage.kt @@ -42,7 +42,8 @@ import org.ossreviewtoolkit.utils.ort.runBlocking class OrtServerNestedProvenanceStorage( private val db: Database, - private val packageProvenanceCache: PackageProvenanceCache + private val packageProvenanceCache: PackageProvenanceCache, + private val vcsPluginConfigs: String? ) : NestedProvenanceStorage { override fun writeNestedProvenance( root: RepositoryProvenance, @@ -64,6 +65,7 @@ class OrtServerNestedProvenanceStorage( rootVcs = vcsDao rootResolvedRevision = root.resolvedRevision hasOnlyFixedRevisions = result.hasOnlyFixedRevisions + vcsPluginConfigs = this@OrtServerNestedProvenanceStorage.vcsPluginConfigs } result.nestedProvenance.subRepositories.forEach { (path, repositoryProvenance) -> @@ -89,7 +91,11 @@ class OrtServerNestedProvenanceStorage( .where { VcsInfoTable.type eq resolvedVcs.type.name and (VcsInfoTable.url eq resolvedVcs.url) and - (VcsInfoTable.revision eq resolvedVcs.revision) + (VcsInfoTable.revision eq resolvedVcs.revision) and + ( + NestedProvenancesTable.vcsPluginConfigs eq + this@OrtServerNestedProvenanceStorage.vcsPluginConfigs + ) }.orderBy(NestedProvenancesTable.id to SortOrder.DESC) .limit(1) .singleOrNull() diff --git a/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt b/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt index d4d3eadc6c..45e50bc2c5 100644 --- a/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt +++ b/workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt @@ -52,6 +52,18 @@ class ScannerRunner( private val fileArchiver: FileArchiver, private val fileListStorage: OrtServerFileListStorage ) { + companion object { + /** + * Convert the VCS plugin configurations to a canonical string representation. If there are no VCS plugin + * configurations, return null. + */ + fun createCanonicalVcsPluginConfigs(vcsPluginConfigs: Map) = + vcsPluginConfigs.keys.sorted().joinToString(separator = "&") { vcs -> + vcsPluginConfigs[vcs]?.options.orEmpty() + .toSortedMap().entries.joinToString(separator = "&") { (key, value) -> "$vcs/$key/$value" } + }.ifEmpty { null } + } + suspend fun run( context: WorkerContext, ortResult: OrtResult, @@ -78,9 +90,14 @@ class ScannerRunner( ) } + val canonicalVcsPluginConfigs = createCanonicalVcsPluginConfigs(vcsPluginConfigs) val packageProvenanceCache = PackageProvenanceCache() val packageProvenanceStorage = OrtServerPackageProvenanceStorage(db, scannerRunId, packageProvenanceCache) - val nestedProvenanceStorage = OrtServerNestedProvenanceStorage(db, packageProvenanceCache) + val nestedProvenanceStorage = OrtServerNestedProvenanceStorage( + db, + packageProvenanceCache, + canonicalVcsPluginConfigs + ) val scanResultStorage = OrtServerScanResultStorage(db, scannerRunId) val scanStorages = ScanStorages( diff --git a/workers/scanner/src/test/kotlin/OrtServerNestedProvenanceStorageTest.kt b/workers/scanner/src/test/kotlin/OrtServerNestedProvenanceStorageTest.kt index 7564c3577f..91b6b4442b 100644 --- a/workers/scanner/src/test/kotlin/OrtServerNestedProvenanceStorageTest.kt +++ b/workers/scanner/src/test/kotlin/OrtServerNestedProvenanceStorageTest.kt @@ -64,7 +64,11 @@ class OrtServerNestedProvenanceStorageTest : WordSpec() { packageProvenanceCache = PackageProvenanceCache() packageProvenanceStorage = OrtServerPackageProvenanceStorage(dbExtension.db, scannerRun.id, packageProvenanceCache) - nestedProvenanceStorage = OrtServerNestedProvenanceStorage(dbExtension.db, packageProvenanceCache) + nestedProvenanceStorage = OrtServerNestedProvenanceStorage( + dbExtension.db, + packageProvenanceCache, + "" + ) packageProvenanceStorage.writeProvenance(id, vcsInfo, packageProvenance) } @@ -209,7 +213,11 @@ class OrtServerNestedProvenanceStorageTest : WordSpec() { packageProvenanceCache = PackageProvenanceCache() packageProvenanceStorage = OrtServerPackageProvenanceStorage(dbExtension.db, scannerRun.id, packageProvenanceCache) - nestedProvenanceStorage = OrtServerNestedProvenanceStorage(dbExtension.db, packageProvenanceCache) + nestedProvenanceStorage = OrtServerNestedProvenanceStorage( + dbExtension.db, + packageProvenanceCache, + "" + ) val subInfo1 = vcsInfo.copy(path = "sub1") val subProvenance1 = RepositoryProvenance(subInfo1, subInfo1.revision) diff --git a/workers/scanner/src/test/kotlin/ScannerRunnerTest.kt b/workers/scanner/src/test/kotlin/ScannerRunnerTest.kt index 93a6fafe8d..e6402dc0c7 100644 --- a/workers/scanner/src/test/kotlin/ScannerRunnerTest.kt +++ b/workers/scanner/src/test/kotlin/ScannerRunnerTest.kt @@ -44,6 +44,7 @@ import org.ossreviewtoolkit.model.OrtResult import org.ossreviewtoolkit.model.Provenance import org.ossreviewtoolkit.model.RepositoryProvenance import org.ossreviewtoolkit.model.config.ScannerConfiguration +import org.ossreviewtoolkit.plugins.api.PluginConfig as OrtPluginConfig import org.ossreviewtoolkit.scanner.LocalPathScannerWrapper import org.ossreviewtoolkit.scanner.Scanner import org.ossreviewtoolkit.scanner.ScannerWrapperFactory @@ -184,6 +185,43 @@ class ScannerRunnerTest : WordSpec({ result.issues shouldBe issuesMap } } + + "createCanonicalVcsPluginConfigs" should { + "return null if no VCS config plugins are used at all." { + val vcsPluginConfigs = emptyMap() + + val result = ScannerRunner.createCanonicalVcsPluginConfigs(vcsPluginConfigs) + + result shouldBe null + } + + "return a canonical string of VCS plugin configs." { + val vcsPluginConfigs = mapOf( + "VCS-Z" to OrtPluginConfig( + options = mapOf( + "option-z" to "1", + "option-a" to "2" + ), + secrets = mapOf( + "some-secret" to "my-secret" + ) + ), + "VCS-A" to OrtPluginConfig( + options = mapOf( + "option-x" to "3", + "option-b" to "4" + ), + secrets = mapOf( + "some-secret" to "my-secret" + ) + ) + ) + + val result = ScannerRunner.createCanonicalVcsPluginConfigs(vcsPluginConfigs) + + result shouldBe "VCS-A/option-b/4&VCS-A/option-x/3&VCS-Z/option-a/2&VCS-Z/option-z/1" + } + } }) private fun mockScannerWrapperFactory(scannerName: String) =