nf-core
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎conf/igenomes.config‎
Lines changed: 38 additions & 0 deletions b/‎conf/igenomes.config‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎docs/usage.md‎
Lines changed: 22 additions & 0 deletions b/‎docs/usage.md‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎modules/local/star_genomeparams_upgrade/environment.yml‎
Lines changed: 7 additions & 0 deletions b/‎modules/local/star_genomeparams_upgrade/environment.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎modules/local/star_genomeparams_upgrade/main.nf‎
Lines changed: 58 additions & 0 deletions b/‎modules/local/star_genomeparams_upgrade/main.nf‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎modules/local/star_genomeparams_upgrade/meta.yml‎
Lines changed: 80 additions & 0 deletions b/‎modules/local/star_genomeparams_upgrade/meta.yml‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎modules/local/star_genomeparams_upgrade/tests/main.nf.test‎
Lines changed: 145 additions & 0 deletions b/‎modules/local/star_genomeparams_upgrade/tests/main.nf.test‎
Lines changed: 145 additions & 0 deletions
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Address [#512](https://github.com/nf-core/scrnaseq/issues/512), adding early validation of the cellranger multi barcode sheet ([#513](https://github.com/nf-core/scrnaseq/pull/513))
 - Update `nf-core/cellranger` modules to Cell Ranger `10.0.0`, including output channel handling for multiplexed experiments ([#508](https://github.com/nf-core/scrnaseq/pull/508))
 - Replace **alevinqc** with **qcatch** for simpleaf QC; add `--skip_qcatch` parameter ([#520](https://github.com/nf-core/scrnaseq/pull/520))
+- Add legacy STAR iGenomes index compatibility (rnaseq-style `star_legacy` handling and `STAR_GENOMEPARAMS_UPGRADE` to rewrite STAR 2.6.x `genomeParameters.txt` metadata for modern STAR) ([#552](https://github.com/nf-core/scrnaseq/pull/552))
 
 ### Chore
 
 
@@ -378,6 +378,28 @@ sample,multiplexed_sample_id,probe_barcode_ids,cmo_ids,ocm_ids,description
 
 > You must provide the barcodes CSV with the `--cellranger_multi_barcodes` parameter.
 
+## Reference genome options
+
+The pipeline can resolve reference files from `conf/igenomes.config` when you provide `--genome`, for example `--genome GRCh38`. These entries may include pre-built aligner indices such as STAR indices, depending on the selected genome.
+
+Some AWS iGenomes STAR indices were generated with older STAR versions and contain legacy metadata. nf-core/scrnaseq includes a compatibility step for these configured iGenomes entries so that legacy STAR indices can run with the STAR version shipped by the pipeline. This support is intended to keep existing iGenomes usage working, not to make legacy indices the preferred reference for new analyses.
+
+> [!WARNING]
+> For production runs, we recommend building fresh indices from current reference files instead of relying on legacy AWS iGenomes indices. The nf-core [reference genome documentation](https://nf-co.re/docs/running/reference-genomes) warns that AWS iGenomes annotations are significantly outdated, for example human annotations from Ensembl release 75, and that GRCh38 iGenomes uses the NCBI assembly rather than the masked Ensembl assembly.
+
+To generate and keep a STAR index for future runs, provide current FASTA and GTF files and set `--save_reference`:
+
+```bash
+nextflow run nf-core/scrnaseq \
+    --input samplesheet.csv \
+    --outdir results \
+    --aligner star \
+    --fasta reference.fa.gz \
+    --gtf annotation.gtf.gz \
+    --save_reference \
+    -profile docker
+```
+
 ## Running the pipeline
 
 The minimum typical command for running the pipeline is as follows:
 
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::gawk=5.3.1
@@ -0,0 +1,58 @@
+process STAR_GENOMEPARAMS_UPGRADE {
+    tag "${meta.id ?: index.name}"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a1/a125c778baf3865331101a104b60d249ee15fe1dca13bdafd888926cc5490a34/data' :
+        'community.wave.seqera.io/library/gawk:5.3.1--e09efb5dfc4b8156' }"
+
+    input:
+    tuple val(meta), path(index, stageAs: 'input_index')
+
+    output:
+    tuple val(meta), path('star'), emit: index
+    tuple val("${task.process}"), val('gawk'), eval("awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//'"), topic: versions, emit: versions_gawk
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    mkdir -p star
+    for f in input_index/*; do
+        name=\$(basename "\$f")
+        if [ "\$name" = "genomeParameters.txt" ]; then
+            continue
+        fi
+        ln -s "\$(readlink -f "\$f")" "star/\$name"
+    done
+
+    awk -F'\\t' -v OFS='\\t' '
+        \$1 == "versionGenome" && \$2 == "20201" {
+            print "versionGenome", "2.7.4a"
+            seen_upgraded = 1
+            next
+        }
+        \$1 == "genomeType"          { seen_genomeType = 1 }
+        \$1 == "genomeTransformType" { seen_transformType = 1 }
+        \$1 == "genomeTransformVCF"  { seen_transformVCF = 1 }
+        { print }
+        END {
+            if (seen_upgraded) {
+                if (!seen_genomeType)    print "genomeType", "Full"
+                if (!seen_transformType) print "genomeTransformType", "None"
+                if (!seen_transformVCF)  print "genomeTransformVCF", "-"
+            }
+        }
+    ' "input_index/genomeParameters.txt" > star/genomeParameters.txt
+    """
+
+    stub:
+    """
+    mkdir -p star
+    for f in input_index/*; do
+        ln -s "\$(readlink -f "\$f")" "star/\$(basename "\$f")"
+    done
+    """
+}
@@ -0,0 +1,80 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "star_genomeparams_upgrade"
+description: |
+  Upgrade STAR 2.6.x `genomeParameters.txt` metadata into the 2.7.4a-compatible schema. The
+  on-disk binary index files (Genome, SA, SAindex, suffix array, etc.) did not change between
+  STAR 2.6.x and 2.7.4a; only the metadata schema in `genomeParameters.txt` differs. This
+  process symlinks every binary file from the input index unchanged and writes a patched copy
+  of `genomeParameters.txt` next to them, so the upgraded index can be loaded by modern STAR.
+
+  Idempotent: indices already at `versionGenome 2.7.4a` (or any tag other than `20201`) stream
+  through unmodified. Targets the AWS iGenomes STAR indices in particular, which all carry
+  `versionGenome 20201` and lack the `genomeType` / `genomeTransformType` / `genomeTransformVCF`
+  fields written by STAR 2.7.4a+.
+keywords:
+  - star
+  - igenomes
+  - scrnaseq
+  - index
+  - compatibility
+tools:
+  - "gawk":
+      description: "GNU awk - used to rewrite genomeParameters.txt metadata"
+      homepage: "https://www.gnu.org/software/gawk/"
+      documentation: "https://www.gnu.org/software/gawk/manual/"
+      licence:
+        - "GPL v3"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing index information
+          e.g. [ id:'star_index' ]
+    - index:
+        type: directory
+        description: |
+          Pre-built STAR index directory (typically a STAR 2.6.x-built iGenomes index). Must
+          contain a `genomeParameters.txt` file alongside the binary index files.
+        pattern: "*"
+        ontologies: []
+output:
+  index:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing index information
+            e.g. [ id:'star_index' ]
+      - star:
+          type: directory
+          description: |
+            Upgraded STAR index directory. Contains symlinks to every binary file from the
+            input index plus a patched `genomeParameters.txt` rewritten into the 2.7.4a schema.
+          pattern: "star"
+          ontologies: []
+  versions_gawk:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - gawk:
+          type: string
+          description: The name of the tool
+      - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - gawk:
+          type: string
+          description: The name of the tool
+      - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@pinin4fjords"
+maintainers:
+  - "@pinin4fjords"
@@ -0,0 +1,145 @@
+nextflow_process {
+
+    name "Test Process STAR_GENOMEPARAMS_UPGRADE"
+    script "../main.nf"
+    process "STAR_GENOMEPARAMS_UPGRADE"
+
+    test("upgrade legacy 2.6.1d-style index") {
+
+        config "./nextflow.legacy_index.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../../modules/nf-core/star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = STAR_GENOMEGENERATE.out.index.map { _meta, idx -> [ [ id:'star_index' ], idx ] }
+                """
+            }
+        }
+
+        then {
+            def patched = path("${process.out.index[0][1]}/genomeParameters.txt").text
+            assertAll(
+                { assert process.success },
+                { assert patched.contains('versionGenome\t2.7.4a') },
+                { assert patched.contains('genomeType\tFull') },
+                { assert patched.contains('genomeTransformType\tNone') },
+                { assert patched.contains('genomeTransformVCF\t-') },
+                { assert !patched.contains('versionGenome\t20201') },
+                { assert snapshot(
+                    process.out.index.collect { meta, idx -> [ meta, file(idx).list().sort() ] }
+                ).match() }
+            )
+        }
+
+        cleanup {
+            new File("${launchDir}").deleteDir()
+        }
+    }
+
+    test("idempotent on modern 2.7.x index") {
+
+        // STAR_GENOMEGENERATE here uses the default modern container (no legacy config),
+        // so the produced index is already at versionGenome 2.7.4a; the upgrade process
+        // should pass it through unchanged.
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../../modules/nf-core/star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = STAR_GENOMEGENERATE.out.index.map { _meta, idx -> [ [ id:'star_index' ], idx ] }
+                """
+            }
+        }
+
+        then {
+            def patched = path("${process.out.index[0][1]}/genomeParameters.txt").text
+            assertAll(
+                { assert process.success },
+                { assert patched.contains('versionGenome\t2.7.4a') },
+                { assert !patched.contains('versionGenome\t20201') }
+            )
+        }
+
+        cleanup {
+            new File("${launchDir}").deleteDir()
+        }
+    }
+
+    test("upgrade legacy index - stub") {
+
+        options "-stub"
+        config "./nextflow.legacy_index.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../../modules/nf-core/star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = STAR_GENOMEGENERATE.out.index.map { _meta, idx -> [ [ id:'star_index' ], idx ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+        cleanup {
+            new File("${launchDir}").deleteDir()
+        }
+    }
+}