nextflow-io · bentsherman · Apr 18, 2025 · Apr 18, 2025 · Apr 18, 2025
diff --git a/bin/fastqc.sh b/bin/fastqc.sh
@@ -1,6 +1,5 @@
 #!/usr/bin/env bash
-sample_id="$1"
-reads="$2"
+reads="$1"
 
-mkdir fastqc_${sample_id}_logs
-fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads}
+mkdir fastqc
+fastqc -o fastqc -f fastq -q ${reads}
diff --git a/data/allreads.csv b/data/allreads.csv
@@ -0,0 +1,4 @@
+gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq
+liver,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_liver_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_liver_2.fq
+lung,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_lung_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_lung_2.fq
+spleen,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_spleen_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_spleen_2.fq
diff --git a/data/gut.csv b/data/gut.csv
@@ -0,0 +1 @@
+gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq
diff --git a/main.nf b/main.nf
@@ -4,36 +4,85 @@
  * Proof of concept of a RNAseq pipeline implemented with Nextflow
  */
 
+nextflow.preview.output = true
+nextflow.preview.params = true
 
 /*
- * Default pipeline parameters. They can be overriden on the command line eg.
- * given `params.foo` specify on the run command line `--foo some_value`.
+ * import modules
  */
+include { RNASEQ } from './modules/rnaseq'
+include { MULTIQC } from './modules/multiqc'
 
-params.reads = "$baseDir/data/ggal/ggal_gut_{1,2}.fq"
-params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
-params.outdir = "results"
-params.multiqc = "$baseDir/multiqc"
+/*
+ * Pipeline parameters can be overridden on the command line,
+ * e.g. `--reads myreads.csv --transcriptome myref.fa`.
+ */
+params {
+  /**
+   * CSV file of FASTQ pairs to analyze.
+   */
+  reads
 
+  /**
+   * FASTA file for the reference transcriptome.
+   */
+  transcriptome
 
-// import modules
-include { RNASEQ } from './modules/rnaseq'
-include { MULTIQC } from './modules/multiqc'
+  /**
+   * Directory containing the configuration for MultiQC.
+   */
+  multiqc = "$projectDir/multiqc"
+}
 
 /* 
  * main script flow
  */
 workflow {
+  main:
+  log.info """\
+      R N A S E Q - N F   P I P E L I N E
+      ===================================
+      transcriptome: ${params.transcriptome}
+      reads        : ${params.reads}
+      outdir       : ${workflow.outputDir}
+    """.stripIndent()
+
+  inputs_ch = channel.fromPath(params.reads)
+    .splitCsv()
+    .map { id, fastq_1, fastq_2 ->
+      tuple(id, file(fastq_1, checkIfExists: true), file(fastq_2, checkIfExists: true))
+    }
+
+  RNASEQ( params.transcriptome, inputs_ch )
+
+  rnaseq_ch = RNASEQ.out.quant
+    .join(RNASEQ.out.fastqc)
+    .map { id, quant, fastqc ->
+      [id: id, quant: quant, fastqc: fastqc]
+    }
+
+  multiqc_ch = RNASEQ.out.quant
+    .concat(RNASEQ.out.fastqc)
+    .map { _id, file -> file }
+    .collect()
+  MULTIQC( multiqc_ch, params.multiqc )
+
+  publish:
+  samples = rnaseq_ch
+  summary = MULTIQC.out
+}
+
+output {
+  samples {
+    path { sample ->
+      sample.quant >> "${sample.id}/"
+      sample.fastqc >> "${sample.id}/"
+    }
+    index {
+      path 'samples.json'
+    }
+  }
 
-log.info """\
-  R N A S E Q - N F   P I P E L I N E
-  ===================================
-  transcriptome: ${params.transcriptome}
-  reads        : ${params.reads}
-  outdir       : ${params.outdir}
-  """
-
-  read_pairs_ch = channel.fromFilePairs( params.reads, checkIfExists: true ) 
-  RNASEQ( params.transcriptome, read_pairs_ch )
-  MULTIQC( RNASEQ.out, params.multiqc )
+  summary {
+  }
 }
diff --git a/modules/fastqc/main.nf b/modules/fastqc/main.nf
@@ -1,18 +1,16 @@
-params.outdir = 'results'
 
 process FASTQC {
-    tag "FASTQC on $sample_id"
+    tag "$id"
     conda 'bioconda::fastqc=0.12.1'
-    publishDir params.outdir, mode:'copy'
 
     input:
-    tuple val(sample_id), path(reads)
+    tuple val(id), path(fastq_1), path(fastq_2)
 
     output:
-    path "fastqc_${sample_id}_logs", emit: logs
+    tuple val(id), path('fastqc')
 
     script:
     """
-    fastqc.sh "$sample_id" "$reads"
+    fastqc.sh "$fastq_1 $fastq_2"
     """
 }
diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf
@@ -1,8 +1,6 @@
-params.outdir = 'results'
 
 process MULTIQC {
     conda 'bioconda::multiqc=1.27.1'
-    publishDir params.outdir, mode:'copy'
 
     input:
     path '*'

diff --git a/modules/quant/main.nf b/modules/quant/main.nf
@@ -1,17 +1,17 @@
 
 process QUANT {
-    tag "$pair_id"
+    tag "$id"
     conda 'bioconda::salmon=1.10.3'
 
     input:
-    path index 
-    tuple val(pair_id), path(reads) 
+    path index
+    tuple val(id), path(fastq_1), path(fastq_2)
 
     output:
-    path pair_id 
+    tuple val(id), path('quant')
 
     script:
     """
-    salmon quant --threads $task.cpus --libType=U -i $index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id
+    salmon quant --threads $task.cpus --libType=U -i $index -1 ${fastq_1} -2 ${fastq_2} -o quant
     """
 }
diff --git a/modules/rnaseq.nf b/modules/rnaseq.nf
@@ -1,19 +1,19 @@
-params.outdir = 'results'
 
 include { INDEX } from './index'
 include { QUANT } from './quant'
 include { FASTQC } from './fastqc'
 
 workflow RNASEQ {
-  take:
+    take:
     transcriptome
-    read_pairs_ch
- 
-  main: 
+    samples_ch
+
+    main:
     INDEX(transcriptome)
-    FASTQC(read_pairs_ch)
-    QUANT(INDEX.out, read_pairs_ch)
+    FASTQC(samples_ch)
+    QUANT(INDEX.out, samples_ch)
 
-  emit: 
-     QUANT.out | concat(FASTQC.out) | collect
-}
+    emit:
+    quant = QUANT.out
+    fastqc = FASTQC.out
+}
diff --git a/nextflow.config b/nextflow.config
@@ -13,20 +13,18 @@
 manifest {
   description = 'Proof of concept of a RNA-seq pipeline implemented with Nextflow'
   author = 'Paolo Di Tommaso'
-  nextflowVersion = '>=23.10.0'
+  nextflowVersion = '>=25.04.0'
 }
 
 /*
- * default params
+ * params for default test data
  */
 
-params.outdir = "results"
-params.reads = "${projectDir}/data/ggal/ggal_gut_{1,2}.fq"
-params.transcriptome = "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
-params.multiqc = "${projectDir}/multiqc"
+params.reads = "${projectDir}/data/gut.csv"
+params.transcriptome = "https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
 
 /*
- * defines execution profiles for different environments
+ * execution profiles for different environments
  */
 
 profiles {
@@ -35,7 +33,7 @@ profiles {
   }
 
   'all-reads' {
-    params.reads = "${projectDir}/data/ggal/ggal_*_{1,2}.fq"
+    params.reads = "${projectDir}/data/allreads.csv"
   }
 
   'arm64' {
@@ -86,8 +84,6 @@ profiles {
   }
 
   'batch' {
-    params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq'
-    params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa'
     process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
     process.executor = 'awsbatch'
     process.queue = 'nextflow-ci'
@@ -96,28 +92,14 @@ profiles {
     aws.batch.cliPath = '/home/ec2-user/miniconda/bin/aws'
   }
 
-  's3-data' {
-    process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
-    params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq'
-    params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa'
-  }
-
   'google-batch' {
-      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
-      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
       params.multiqc = 'gs://rnaseq-nf/multiqc'
       process.executor = 'google-batch'
       process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
       workDir = 'gs://rnaseq-nf/scratch' // <- replace with your own bucket!
       google.region  = 'europe-west2'
   }
 
-  'gs-data' {
-      process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
-      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
-      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
-  }
-
   'azure-batch' {
     process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
     workDir = 'az://nf-scratch/work'
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq