Skip to content

Commit f068fce

Browse files
committed
feat(scanner): Add submodule fetch strategy for nested repositories
Introduce `submoduleFetchStrategy` config to control how the Scanner fetches Git submodules. When set to `TOP_LEVEL_ONLY`, only top-level submodules are fetched, avoiding timeouts on deeply nested repos. This mirrors the behavior already available in the Analyzer and allows to resolve nested provenances even in this kind of repositories with a vast amount of nested submodules. Signed-off-by: Wolfgang Klenk <[email protected]>
1 parent 05e5b2b commit f068fce

File tree

4 files changed

+36
-5
lines changed

4 files changed

+36
-5
lines changed

api/v1/mapping/src/commonMain/kotlin/Mappings.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,8 @@ fun ScannerJobConfiguration.mapToApi() = ApiScannerJobConfiguration(
578578
skipExcluded,
579579
sourceCodeOrigins?.map { it.mapToApi() },
580580
config?.mapValues { it.value.mapToApi() },
581-
keepAliveWorker
581+
keepAliveWorker,
582+
submoduleFetchStrategy.mapToApi()
582583
)
583584

584585
fun ApiScannerJobConfiguration.mapToModel() = ScannerJobConfiguration(
@@ -590,7 +591,8 @@ fun ApiScannerJobConfiguration.mapToModel() = ScannerJobConfiguration(
590591
skipExcluded,
591592
sourceCodeOrigins?.map { it.mapToModel() },
592593
config?.mapValues { it.value.mapToModel() },
593-
keepAliveWorker
594+
keepAliveWorker,
595+
submoduleFetchStrategy.mapToModel()
594596
)
595597

596598
fun Secret.mapToApi() = ApiSecret(name, description)

api/v1/model/src/commonMain/kotlin/JobConfigurations.kt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,14 @@ data class ScannerJobConfiguration(
187187
* Keep the worker alive after it has finished. This is useful for manual problem analysis directly
188188
* within the pod's execution environment.
189189
*/
190-
val keepAliveWorker: Boolean = false
190+
val keepAliveWorker: Boolean = false,
191+
192+
/**
193+
* Specifies how submodules are fetched when resolving provenances. Currently supported only for Git repositories.
194+
* If set to [SubmoduleFetchStrategy.FULLY_RECURSIVE] (default), all submodules are fetched recursively. If set
195+
* to [SubmoduleFetchStrategy.TOP_LEVEL_ONLY], only the top-level submodules are fetched.
196+
*/
197+
val submoduleFetchStrategy: SubmoduleFetchStrategy = SubmoduleFetchStrategy.FULLY_RECURSIVE
191198
)
192199

193200
/**

model/src/commonMain/kotlin/JobConfigurations.kt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,14 @@ data class ScannerJobConfiguration(
196196
* Keep the worker alive after it has finished. This is useful for manual problem analysis directly
197197
* within the pod's execution environment.
198198
*/
199-
val keepAliveWorker: Boolean = false
199+
val keepAliveWorker: Boolean = false,
200+
201+
/**
202+
* Specifies how submodules are fetched when resolving provenances. Currently supported only for Git repositories.
203+
* If set to [SubmoduleFetchStrategy.FULLY_RECURSIVE] (default), all submodules are fetched recursively. If set
204+
* to [SubmoduleFetchStrategy.TOP_LEVEL_ONLY], only the top-level submodules are fetched.
205+
*/
206+
val submoduleFetchStrategy: SubmoduleFetchStrategy = SubmoduleFetchStrategy.FULLY_RECURSIVE
200207
)
201208

202209
/**

workers/scanner/src/main/kotlin/scanner/ScannerRunner.kt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.eclipse.apoapsis.ortserver.workers.scanner
2121

2222
import org.eclipse.apoapsis.ortserver.model.ScannerJobConfiguration
23+
import org.eclipse.apoapsis.ortserver.model.SubmoduleFetchStrategy
2324
import org.eclipse.apoapsis.ortserver.workers.common.OrtServerFileListStorage
2425
import org.eclipse.apoapsis.ortserver.workers.common.context.WorkerContext
2526
import org.eclipse.apoapsis.ortserver.workers.common.mapToOrt
@@ -33,6 +34,7 @@ import org.ossreviewtoolkit.model.PackageType
3334
import org.ossreviewtoolkit.model.Provenance
3435
import org.ossreviewtoolkit.model.ScannerRun
3536
import org.ossreviewtoolkit.model.SourceCodeOrigin
37+
import org.ossreviewtoolkit.model.VcsType
3638
import org.ossreviewtoolkit.model.config.DownloaderConfiguration
3739
import org.ossreviewtoolkit.model.config.ScannerConfiguration
3840
import org.ossreviewtoolkit.model.utils.FileArchiver
@@ -85,7 +87,20 @@ class ScannerRunner(
8587
?: listOf(SourceCodeOrigin.ARTIFACT, SourceCodeOrigin.VCS)
8688
)
8789

88-
val workingTreeCache = DefaultWorkingTreeCache()
90+
// If the submodule fetch strategy is set to TOP_LEVEL_ONLY, for git use a plugin config that prevents that
91+
// submodules are fetched recursively.
92+
val vcsPluginConfigs = if (config.submoduleFetchStrategy == SubmoduleFetchStrategy.TOP_LEVEL_ONLY) {
93+
mapOf(
94+
VcsType.GIT.toString() to PluginConfig(
95+
options = mapOf("updateNestedSubmodules" to "false")
96+
)
97+
)
98+
} else {
99+
emptyMap()
100+
}
101+
102+
val workingTreeCache = DefaultWorkingTreeCache().addVcsPluginConfigs(vcsPluginConfigs)
103+
89104
val provenanceDownloader = DefaultProvenanceDownloader(downloaderConfig, workingTreeCache)
90105
val packageProvenanceResolver = DefaultPackageProvenanceResolver(
91106
scanStorages.packageProvenanceStorage,

0 commit comments

Comments
 (0)