nf-core
diff --git a/‎.github/workflows/awsfulltest.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/awsfulltest.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/nf-test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/nf-test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.nf-core.yml‎
Lines changed: 1 addition & 1 deletion b/‎.nf-core.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 10 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎assets/multiqc_config.yml‎
Lines changed: 1 addition & 4 deletions b/‎assets/multiqc_config.yml‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎assets/schema_input.json‎
Lines changed: 4 additions & 0 deletions b/‎assets/schema_input.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎bin/vcf_collect.py‎
Lines changed: 8 additions & 12 deletions b/‎bin/vcf_collect.py‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎docs/usage.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/usage.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎modules.json‎
Lines changed: 5 additions & 0 deletions b/‎modules.json‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎modules/local/vcf_collect/tests/main.nf.test.snap‎
Lines changed: 3 additions & 3 deletions b/‎modules/local/vcf_collect/tests/main.nf.test.snap‎
Lines changed: 3 additions & 3 deletions
@@ -33,7 +33,7 @@ jobs:
           parameters: |
             {
               "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}",
-              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ steps.revision.outputs.revision }}"
+              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ steps.revision.outputs.revision }}",
               "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/references",
               "tools": "all",
               "no_cosmic": true
 
@@ -64,7 +64,7 @@ jobs:
     runs-on: # use self-hosted runners
       - runs-on=${{ github.run_id }}-nf-test
       - runner=4cpu-linux-x64
-      - disk=large
+      - volume=80gb
     strategy:
       fail-fast: false
       matrix:
 
@@ -16,4 +16,4 @@ template:
   name: rnafusion
   org: nf-core
   outdir: .
-  version: 4.0.0
+  version: 4.1.0
@@ -3,6 +3,16 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v4.1.0 - [2026-02-09]
+
+### Added
+
+- Added a BAM/CRAM conversion to FASTQ step to run tools that depend on FASTQ files. It still is advised to supply a FASTQ file if possible to reduce runtime of the pipeline. [#783](https://github.com/nf-core/rnafusion/pull/783)
+
+### Fixed
+
+- Fix issue with vcf_collect when no fusions are found by fusioninspector [#786](https://github.com/nf-core/rnafusion/pull/786)
+
 ## v4.0.0 - [2025-09-10]
 
 ### Added
 
@@ -1,8 +1,5 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/rnafusion/releases/tag/4.0.0"
-  target="_blank">nf-core/rnafusion</a> analysis pipeline. For information about how
-  to interpret these results, please see the <a href="https://nf-co.re/rnafusion/4.0.0/docs/output"
-  target="_blank">documentation</a>.
+  This report has been generated by the <a href="https://github.com/nf-core/rnafusion/releases/tag/4.1.0" target="_blank">nf-core/rnafusion</a> analysis pipeline. For information about how to interpret these results, please see the <a href="https://nf-co.re/rnafusion/4.1.0/docs/output" target="_blank">documentation</a>.
 
 report_section_order:
   nf-core-rnafusion-methods-description:
 
@@ -32,6 +32,7 @@
                 "format": "file-path",
                 "exists": true,
                 "pattern": "^\\S+\\.bam$",
+                "description": "BAM file created with STAR. This file should be created from paired-end reads and junctions should be written out separately.",
                 "errorMessage": "BAM file cannot contain spaces, has to exist and must have extension '.bam'"
             },
             "bai": {
@@ -46,6 +47,7 @@
                 "format": "file-path",
                 "exists": true,
                 "pattern": "^\\S+\\.cram$",
+                "description": "CRAM file created with STAR. This file should be created from paired-end reads and junctions should be written out separately.",
                 "errorMessage": "CRAM file cannot contain spaces, has to exist and must have extension '.cram'"
             },
             "crai": {
@@ -60,13 +62,15 @@
                 "format": "file-path",
                 "exists": true,
                 "pattern": "^\\S+\\.junction$",
+                "description": "Junctions file created with STAR. This file should be created from paired-end reads.",
                 "errorMessage": "Junctions file cannot contain spaces, has to exist and must have extension '.junction'"
             },
             "splice_junctions": {
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
                 "pattern": "^\\S+\\.SJ.out.tab$",
+                "description": "Splice junctions file created with STAR. This file should be created from paired-end reads.",
                 "errorMessage": "Split junctions file cannot contain spaces, has to exist and must have extension '.SJ.out.tab'"
             },
             "strandedness": {
 
@@ -84,16 +84,14 @@ def vcf_collect(
     all_df = df.merge(
         gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id"
     )
-    all_df[["PosA", "orig_start", "orig_end"]] = (
-        all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int)
-    )
 
     all_df = all_df[
         (
             (all_df["PosA"] >= all_df["orig_start"])
             & (all_df["PosA"] <= all_df["orig_end"])
         )
-        | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0))
+        | ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna()))
+        | (all_df["PosA"].isna())
     ]
 
     all_df["Left_transcript_version"] = all_df["CDS_LEFT_ID"].astype(str).str.split(".").str[-1]
@@ -145,21 +143,16 @@ def vcf_collect(
     all_df = all_df.merge(
         gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id"
     )
-    all_df[["PosB", "orig_start", "orig_end"]] = all_df[
-        ["PosB", "orig_start", "orig_end"]
-    ].fillna(0)
-    all_df[["PosB", "orig_start", "orig_end"]] = all_df[
-        ["PosB", "orig_start", "orig_end"]
-    ].astype(int)
+
     all_df = all_df[
         (
             (all_df["PosB"] >= all_df["orig_start"])
             & (all_df["PosB"] <= all_df["orig_end"])
         )
-        | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0))
+        | ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna()))
+        | (all_df["PosB"].isna())
     ]
 
-    all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan)
     all_df = all_df.replace("", np.nan)
 
     all_df["Right_transcript_version"] = all_df["CDS_RIGHT_ID"].astype(str).str.split(".").str[-1]
@@ -213,6 +206,9 @@ def vcf_collect(
 
     all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv))
 
+    # Filter out invalid entries with missing positional values
+    all_df = all_df[all_df["PosA"].notna() & all_df["PosB"].notna() & all_df["ChromosomeA"].notna() & all_df["ChromosomeB"].notna()]
+
     return write_vcf(column_manipulation(all_df), header_def(sample), out_file)
 
 
 
@@ -140,8 +140,8 @@ TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,forward
 | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- |
 | `sample`           | Custom sample name. This value needs to be unique across all entries in the samplesheet and cannot contain spaces                                                                                                                                                                                                                                                                                                                             | ✅       |
 | `strandedness`     | Strandedness: forward or reverse.                                                                                                                                                                                                                                                                                                                                                                                                             | ✅       |
-| `fastq_1`          | Full path to FastQ file for Illumina short reads 1. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. The FASTQ files are required to run `salmon`, `fusioninspector` and `fusioncatcher`.                                                                                      | ❓       |
-| `fastq_2`          | Full path to FastQ file for Illumina short reads 2. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. The FASTQ files are required to run `salmon`, `fusioninspector` and `fusioncatcher`".                                                                                     | ❓       |
+| `fastq_1`          | Full path to FastQ file for Illumina short reads 1. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. This should be supplied to let the pipeline run faster, but they can be automatically created from the availabe BAM or CRAM files.                                        | ❓       |
+| `fastq_2`          | Full path to FastQ file for Illumina short reads 2. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. This should be supplied to let the pipeline run faster, but they can be automatically created from the availabe BAM or CRAM files.                                        | ❓       |
 | `bam`              | Full path to the BAM file created with STAR. File has to exist and must have the extension ".bam". It's the responsibility of the pipeline user to make sure this file has been correctly created, see the [prepare chapter](#preparing-bamcramjunctionssplice_junctions) for more information. The BAM file is required to run `ctatsplicing`, `stringtie`, `fusioninspector` and `arriba` when the `fastq_1` and `cram` fields are empty.   | ❓       |
 | `bai`              | Full path to the index of the BAM file. File has to exist and must have the extension ".bai".                                                                                                                                                                                                                                                                                                                                                 | ❌       |
 | `cram`             | Full path to the CRAM file created with STAR. File has to exist and must have the extension ".cram". It's the responsibility of the pipeline user to make sure this file has been correctly created, see the [prepare chapter](#preparing-bamcramjunctionssplice_junctions) for more information. The CRAM file is required to run `ctatsplicing`, `stringtie`, `fusioninspector` and `arriba` when the `fastq_1` and `bam` fields are empty. | ❓       |
 
@@ -133,6 +133,11 @@
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
                         "installed_by": ["modules"]
                     },
+                    "samtools/collatefastq": {
+                        "branch": "master",
+                        "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1",
+                        "installed_by": ["modules"]
+                    },
                     "samtools/convert": {
                         "branch": "master",
                         "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
 
@@ -61,8 +61,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "25.04.6"
+            "nextflow": "25.10.2"
         },
-        "timestamp": "2025-08-12T20:45:25.283350308"
+        "timestamp": "2026-01-08T17:24:54.112204263"
     }
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ jobs:`
`33`	`33`	`parameters: \|`
`34`	`34`	`{`
`35`	`35`	`"hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}",`
`36`		`- "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ steps.revision.outputs.revision }}"`
	`36`	`+ "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ steps.revision.outputs.revision }}",`
`37`	`37`	`"genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/references",`
`38`	`38`	`"tools": "all",`
`39`	`39`	`"no_cosmic": true`
Original file line number	Diff line number	Diff line change
`@@ -84,16 +84,14 @@ def vcf_collect(`
`84`	`84`	`all_df = df.merge(`
`85`	`85`	`gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id"`
`86`	`86`	`)`
`87`		`- all_df[["PosA", "orig_start", "orig_end"]] = (`
`88`		`- all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int)`
`89`		`- )`
`90`	`87`
`91`	`88`	`all_df = all_df[`
`92`	`89`	`(`
`93`	`90`	`(all_df["PosA"] >= all_df["orig_start"])`
`94`	`91`	`& (all_df["PosA"] <= all_df["orig_end"])`
`95`	`92`	`)`
`96`		`- \| ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0))`
	`93`	`+ \| ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna()))`
	`94`	`+ \| (all_df["PosA"].isna())`
`97`	`95`	`]`
`98`	`96`
`99`	`97`	`all_df["Left_transcript_version"] = all_df["CDS_LEFT_ID"].astype(str).str.split(".").str[-1]`
`@@ -145,21 +143,16 @@ def vcf_collect(`
`145`	`143`	`all_df = all_df.merge(`
`146`	`144`	`gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id"`
`147`	`145`	`)`
`148`		`- all_df[["PosB", "orig_start", "orig_end"]] = all_df[`
`149`		`- ["PosB", "orig_start", "orig_end"]`
`150`		`- ].fillna(0)`
`151`		`- all_df[["PosB", "orig_start", "orig_end"]] = all_df[`
`152`		`- ["PosB", "orig_start", "orig_end"]`
`153`		`- ].astype(int)`
	`146`	`+`
`154`	`147`	`all_df = all_df[`
`155`	`148`	`(`
`156`	`149`	`(all_df["PosB"] >= all_df["orig_start"])`
`157`	`150`	`& (all_df["PosB"] <= all_df["orig_end"])`
`158`	`151`	`)`
`159`		`- \| ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0))`
	`152`	`+ \| ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna()))`
	`153`	`+ \| (all_df["PosB"].isna())`
`160`	`154`	`]`
`161`	`155`
`162`		`- all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan)`
`163`	`156`	`all_df = all_df.replace("", np.nan)`
`164`	`157`
`165`	`158`	`all_df["Right_transcript_version"] = all_df["CDS_RIGHT_ID"].astype(str).str.split(".").str[-1]`
`@@ -213,6 +206,9 @@ def vcf_collect(`
`213`	`206`
`214`	`207`	`all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv))`
`215`	`208`
	`209`	`+ # Filter out invalid entries with missing positional values`
	`210`	`+ all_df = all_df[all_df["PosA"].notna() & all_df["PosB"].notna() & all_df["ChromosomeA"].notna() & all_df["ChromosomeB"].notna()]`
	`211`	`+`
`216`	`212`	`return write_vcf(column_manipulation(all_df), header_def(sample), out_file)`
`217`	`213`
`218`	`214`