Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
**What problems does this PR solve?**
Provide a short description or reference to the relevant issue, explaining what problems this PR solves.

**An outline of the validation procedure for this feature**
In addition to automatic tests, has any manual testing been carried out?

**Risk analysis - Reasons for careful code review**
If any of the boxes below are checked, extra careful code review should be inititated.

- [ ] This PR contains code that could remove data
7 changes: 4 additions & 3 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
env:
CAPSULE_LOG: none
run: |
curl -s https://get.nextflow.io | bash
curl -fsSL https://github.com/nextflow-io/nextflow/releases/download/v24.10.4/nextflow -o nextflow | bash
sudo mv nextflow /usr/local/bin/

- name: Make Nextflow binary executable
Expand All @@ -35,11 +35,12 @@ jobs:
- name: Set up python
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: 3.11
architecture: x64

- name: Install test requirements
run: pip install -r requirements-dev.txt
run: |
pip install -r requirements-dev.txt

- name: Run tests
run: pytest tests
Expand Down
47 changes: 8 additions & 39 deletions bin/get_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import xmltodict
from collections import OrderedDict
import re
import glob
import csv
import argparse
import os
import json
Expand All @@ -12,11 +14,10 @@


class RunfolderInfo:
def __init__(self, runfolder, bcl2fastq_outdir):
def __init__(self, runfolder):
self.runfolder = runfolder
self.run_info = self.read_run_info()
self.run_parameters = self.read_run_parameters()
self.stats_json = self.read_stats_json(bcl2fastq_outdir)
self.description_and_identifier = OrderedDict()
self.run_parameters_tags = {
"RunId": "Run ID",
Expand Down Expand Up @@ -79,27 +80,11 @@ def find_flowcell_type_novaseqx(self):
return None
return {"Flowcell type": flowcell_type}

def read_stats_json(self, bcl2fastq_outdir):
stats_json_path = os.path.join(
self.runfolder, bcl2fastq_outdir, "Stats/Stats.json"
)
if os.path.exists(stats_json_path):
with open(stats_json_path) as f:
return json.load(f)
else:
return None

def get_bcl2fastq_version(self, runfolder):
with open(os.path.join(runfolder, "bcl2fastq_version")) as f:
bcl2fastq_str = f.read()
return bcl2fastq_str.split("v")[1].strip()

def get_software_version(self, runfolder):
with open(
Path(runfolder)
/ "pipeline_info"
/ "nf_core_pipeline_software_mqc_versions.yml"
) as f:
pipeline_dir = Path(runfolder) / "pipeline_info"
pipeline_info_filename = next(pipeline_dir.glob("*_software_mqc_versions.yml"))

with open(pipeline_info_filename) as f:
return {
software: version
for software_dict in yaml.safe_load(f).values()
Expand Down Expand Up @@ -151,15 +136,6 @@ def get_info(self):
return results

def get_demultiplexing_info(self):
try:
return {
"Demultiplexing": {
"bcl2fastq": self.get_bcl2fastq_version(self.runfolder)
}
}
except FileNotFoundError:
pass

try:
return {"Demultiplexing": self.get_software_version(self.runfolder)}
except FileNotFoundError:
Expand All @@ -173,18 +149,11 @@ def get_demultiplexing_info(self):
parser.add_argument(
"--runfolder", type=str, required=True, help="Path to runfolder"
)
parser.add_argument(
"--bcl2fastq-outdir",
type=str,
default="Data/Intensities/BaseCalls",
help="Path to bcl2fastq output folder relative to the runfolder",
)

args = parser.parse_args()
runfolder = args.runfolder
bcl2fastq_outdir = args.bcl2fastq_outdir

runfolder_info = RunfolderInfo(runfolder, bcl2fastq_outdir)
runfolder_info = RunfolderInfo(runfolder)
info = runfolder_info.get_info()

print(
Expand Down
14 changes: 7 additions & 7 deletions config/nextflow_config/singularity.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@ singularity {

process {
withName: 'FASTQC' {
container = 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1'
container = 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0'
}
withName: 'FASTQ_SCREEN' {
container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.14.0--pl5262hdfd78af_1'
container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.16.0--pl5321hdfd78af_0'
}
withName: 'GET_QC_THRESHOLDS' {
container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0'
}
withName: 'GET_METADATA' {
container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0'
}
withName: 'INTEROP_SUMMARY' {
container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.2.4--hdbdd923_2'
container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.5.0--h503566f_0'
}
withName: 'MULTIQC_PER_FLOWCELL' {
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0'
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1'
}
withName: 'MULTIQC_PER_PROJECT' {
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0'
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1'
}
}

19 changes: 19 additions & 0 deletions config/nextflow_config/test_bclconvert.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
========================================================================================
Nextflow config file for running minimal tests
========================================================================================
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run main.nf -profile dev,test,singularity


This config takes inspiration from https://github.com/nf-core/rnaseq
----------------------------------------------------------------------------------------
*/

params {
run_folder = "$baseDir/test_data/230825_M04034_0043_000000000-L6NVV"
fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes"
checkqc_config = "$baseDir/test_data/checkqc_config.yaml"
config_dir = "$baseDir/test_data/test_config"
}
1 change: 1 addition & 0 deletions config/tool_config/multiqc_flowcell_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ run_modules:
- fastqc
- fastq_screen
- bcl2fastq
- bclconvert
- interop
- custom_content

Expand Down
43 changes: 28 additions & 15 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ params.run_folder = "/path/to/run_folder"
params.result_dir = "results"
fastqscreen_default_databases = "FastQ_Screen_Genomes"
params.fastqscreen_databases = fastqscreen_default_databases
params.bcl2fastq_outdir = "Unaligned"
params.demultiplexer = "bcl2fastq"
params.demultiplexer_outdir = "Unaligned"

params.checkqc_config = "" // See: https://github.com/Molmed/checkQC
params.assets_dir = "$baseDir/assets"
params.config_dir = "$baseDir/config/tool_config"
Expand Down Expand Up @@ -47,11 +49,12 @@ def helpMessage() {

Optional parameters:
--result_dir Path to write results (default: results)
--bcl2fastq_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
--demultiplexer_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
--checkqc_config Configuration file for CheckQC
--assets_dir Location of project assests (default: "\$baseDir/assets").
--config_dir Location of tool configuration files (default: "\$baseDir/config/tool_config").
--script_dir Location of project scripts (default: "\$baseDir/bin")
--demultiplexer Name of demultiplexer used e.g 'bcl2fastq' or 'bclconvert'

--help Print this help message.

Expand All @@ -61,7 +64,7 @@ def helpMessage() {
"""
}

if (params.help || !params.run_folder){
if (params.help || !params.run_folder || !params.demultiplexer){
helpMessage()
exit 0
}
Expand All @@ -72,7 +75,8 @@ workflow {
Channel.fromPath(params.run_folder,checkIfExists:true)
.ifEmpty { "Error: No run folder (--run_folder) given."; exit 1 }
.set {run_folder}
CHECK_RUN_QUALITY(run_folder)
Channel.value(params.demultiplexer).set {demultiplexer}
CHECK_RUN_QUALITY(run_folder, demultiplexer)

}

Expand All @@ -83,13 +87,13 @@ workflow.onComplete {
def get_project_and_reads(run_folder) {

Channel
.fromPath("${run_folder}/${params.bcl2fastq_outdir}/**.fastq.gz" )
.fromPath("${run_folder}/${params.demultiplexer_outdir}/**.fastq.gz" )
.filter( ~/.*_[^I]\d_001\.fastq\.gz$/ )
.ifEmpty { "Error: No fastq files found under ${run_folder}/ !\n"; exit 1 }
.map {
it.toString().indexOf('Undetermined') > 0 ?
['NoProject', it] :
[(it.toString() =~ /^.*\/${params.bcl2fastq_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it]
[(it.toString() =~ /^.*\/${params.demultiplexer_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it]
}

}
Expand Down Expand Up @@ -121,8 +125,21 @@ workflow CHECK_RUN_QUALITY {

take:
run_folder
demultiplexer

main:
if (params.demultiplexer == 'bclconvert') {
Channel.fromPath([
"${params.run_folder}/${params.demultiplexer_outdir}/Reports/*.csv",
"${params.run_folder}/RunInfo.xml"])
.collect().ifEmpty([])
.set { demux_stats }
} else {
Channel.fromPath("${params.run_folder}/${params.demultiplexer_outdir}/Stats/Stats.json")
.collect().ifEmpty([])
.set { demux_stats }
}

INTEROP_SUMMARY(run_folder)
GET_QC_THRESHOLDS(run_folder)
GET_METADATA(run_folder)
Expand All @@ -132,14 +149,15 @@ workflow CHECK_RUN_QUALITY {
FASTQ_SCREEN(project_and_reads,
params.config_dir,
params.fastqscreen_databases)
MULTIQC_PER_FLOWCELL( params.run_folder,
MULTIQC_PER_FLOWCELL(
params.run_folder,
FASTQC.out.map{ it[1] }.collect(),
FASTQ_SCREEN.out.results.map{ it[1] }.collect(),
FASTQ_SCREEN.out.tsv.map{ it[1] }.collectFile(keepHeader:true,skip:1,sort:true),
INTEROP_SUMMARY.out.collect(),
GET_QC_THRESHOLDS.out.collect().ifEmpty([]),
GET_METADATA.out.collect(),
Channel.fromPath("${params.run_folder}/${params.bcl2fastq_outdir}/Stats/Stats.json").collect().ifEmpty([]),
demux_stats,
params.assets_dir,
params.config_dir)
MULTIQC_PER_PROJECT( params.run_folder,
Expand Down Expand Up @@ -239,14 +257,9 @@ process GET_METADATA {
path 'sequencing_metadata_mqc.yaml'

script:
if ( params.bcl2fastq_outdir ){
bcl2fastq_outdir_section = "--bcl2fastq-outdir ${params.bcl2fastq_outdir}"
} else {
bcl2fastq_outdir_section = ""
}
"""
python ${params.script_dir}/get_metadata.py --runfolder $runfolder \\
$bcl2fastq_outdir_section &> sequencing_metadata_mqc.yaml
&> sequencing_metadata_mqc.yaml
"""
}

Expand Down Expand Up @@ -277,7 +290,7 @@ process MULTIQC_PER_FLOWCELL {
path ('Interop_summary/*') // Interop log
path qc_thresholds // Quality check thresholds (optional)
path sequencing_metadata // Sequencing meta data ( custom content data )
path bcl2fastq_stats // Bcl2Fastq logs
path demux_stats // demux logs
path assets // Staged copy of assets folder
path config_dir // Staged copy of config folder

Expand Down
4 changes: 4 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,9 @@ profiles {
test {
includeConfig 'config/nextflow_config/test.config'
}

test_bclconvert {
includeConfig 'config/nextflow_config/test_bclconvert.config'
}

}
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest==6.2.5
black==24.3.0
beautifulsoup4==4.10.0
checkqc==3.6.6
checkqc==4.1.1rc1
lxml==4.9.2
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
15 changes: 15 additions & 0 deletions test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0"?>
<RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2">
<Run Id="230825_M04034_0043_000000000-L6NVV" Number="43">
<Flowcell>000000000-L6NVV</Flowcell>
<Instrument>M04034</Instrument>
<Date>230825</Date>
<Reads>
<Read NumCycles="151" Number="1" IsIndexedRead="N" />
<Read NumCycles="8" Number="2" IsIndexedRead="Y" />
<Read NumCycles="8" Number="3" IsIndexedRead="Y" />
<Read NumCycles="151" Number="4" IsIndexedRead="N" />
</Reads>
<FlowcellLayout LaneCount="1" SurfaceCount="2" SwathCount="1" TileCount="14" />
</Run>
</RunInfo>
Loading