Skip to content

Commit e40ada3

Browse files
committed
feat(downloader): Add a sanity check on the mime-type for unpackable files
Signed-off-by: Sebastian Schuberth <sebastian@doubleopen.org>
1 parent 5e6ea82 commit e40ada3

2 files changed

Lines changed: 9 additions & 0 deletions

File tree

downloader/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ dependencies {
2929
implementation(projects.utils.ortUtils)
3030

3131
implementation(libs.kotlinx.coroutines)
32+
implementation(libs.tika)
3233

3334
funTestImplementation(platform(projects.plugins.versionControlSystems))
3435

downloader/src/main/kotlin/Downloader.kt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import java.net.URI
2727
import kotlin.time.TimeSource
2828

2929
import org.apache.logging.log4j.kotlin.logger
30+
import org.apache.tika.Tika
3031

3132
import org.ossreviewtoolkit.model.ArtifactProvenance
3233
import org.ossreviewtoolkit.model.HashAlgorithm
@@ -306,6 +307,7 @@ class Downloader(private val config: DownloaderConfiguration) {
306307
* happens but the source code is only checked to be available. An [ArtifactProvenance] is returned on success or a
307308
* [DownloadException] is thrown in case of failure.
308309
*/
310+
@Suppress("ThrowsCount")
309311
fun downloadSourceArtifact(
310312
sourceArtifact: RemoteArtifact,
311313
outputDirectory: File,
@@ -385,6 +387,12 @@ class Downloader(private val config: DownloaderConfiguration) {
385387
"Could not unpack source artifact '${sourceArchive.absolutePath}': ${e.collectMessages()}"
386388
}
387389

390+
val mimeType = Tika().detect(sourceArchive)
391+
val isSourceCodeFile = (mimeType.startsWith("text/") && !mimeType.endsWith("html"))
392+
|| (mimeType.startsWith("application/") && mimeType.endsWith("script"))
393+
394+
if (!isSourceCodeFile) throw DownloadException("The artifact does not seem to be a source code file", e)
395+
388396
logger.info {
389397
"Treating '${sourceArchive.absolutePath}' as a source code file."
390398
}

0 commit comments

Comments
 (0)