Skip to content

Commit b75b3d9

Browse files
authored
Add BCLConvert support (#67)
* Add BCLConvert support * Corrected bclconvert outputdir and added test * Corrected bclconvert report outputs and added tests * Added pull_request template * Refactored code * Updated GHA python and nextflow versions * Updated singularity containers * Refactored code * Refactored code * Update checkqc version
1 parent f49a23e commit b75b3d9

31 files changed

+345
-98
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
**What problems does this PR solve?**
2+
Provide a short description or reference to the relevant issue, explaining what problems this PR solves.
3+
4+
**An outline of the validation procedure for this feature**
5+
In addition to automatic tests, has any manual testing been carried out?
6+
7+
**Risk analysis - Reasons for careful code review**
8+
If any of the boxes below are checked, extra careful code review should be inititated.
9+
10+
- [ ] This PR contains code that could remove data

.github/workflows/run_tests.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
env:
2727
CAPSULE_LOG: none
2828
run: |
29-
curl -s https://get.nextflow.io | bash
29+
curl -fsSL https://github.com/nextflow-io/nextflow/releases/download/v24.10.4/nextflow -o nextflow | bash
3030
sudo mv nextflow /usr/local/bin/
3131
3232
- name: Make Nextflow binary executable
@@ -35,11 +35,12 @@ jobs:
3535
- name: Set up python
3636
uses: actions/setup-python@v2
3737
with:
38-
python-version: 3.9
38+
python-version: 3.11
3939
architecture: x64
4040

4141
- name: Install test requirements
42-
run: pip install -r requirements-dev.txt
42+
run: |
43+
pip install -r requirements-dev.txt
4344
4445
- name: Run tests
4546
run: pytest tests

bin/get_metadata.py

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import xmltodict
44
from collections import OrderedDict
55
import re
6+
import glob
7+
import csv
68
import argparse
79
import os
810
import json
@@ -12,11 +14,10 @@
1214

1315

1416
class RunfolderInfo:
15-
def __init__(self, runfolder, bcl2fastq_outdir):
17+
def __init__(self, runfolder):
1618
self.runfolder = runfolder
1719
self.run_info = self.read_run_info()
1820
self.run_parameters = self.read_run_parameters()
19-
self.stats_json = self.read_stats_json(bcl2fastq_outdir)
2021
self.description_and_identifier = OrderedDict()
2122
self.run_parameters_tags = {
2223
"RunId": "Run ID",
@@ -79,27 +80,11 @@ def find_flowcell_type_novaseqx(self):
7980
return None
8081
return {"Flowcell type": flowcell_type}
8182

82-
def read_stats_json(self, bcl2fastq_outdir):
83-
stats_json_path = os.path.join(
84-
self.runfolder, bcl2fastq_outdir, "Stats/Stats.json"
85-
)
86-
if os.path.exists(stats_json_path):
87-
with open(stats_json_path) as f:
88-
return json.load(f)
89-
else:
90-
return None
91-
92-
def get_bcl2fastq_version(self, runfolder):
93-
with open(os.path.join(runfolder, "bcl2fastq_version")) as f:
94-
bcl2fastq_str = f.read()
95-
return bcl2fastq_str.split("v")[1].strip()
96-
9783
def get_software_version(self, runfolder):
98-
with open(
99-
Path(runfolder)
100-
/ "pipeline_info"
101-
/ "nf_core_pipeline_software_mqc_versions.yml"
102-
) as f:
84+
pipeline_dir = Path(runfolder) / "pipeline_info"
85+
pipeline_info_filename = next(pipeline_dir.glob("*_software_mqc_versions.yml"))
86+
87+
with open(pipeline_info_filename) as f:
10388
return {
10489
software: version
10590
for software_dict in yaml.safe_load(f).values()
@@ -151,15 +136,6 @@ def get_info(self):
151136
return results
152137

153138
def get_demultiplexing_info(self):
154-
try:
155-
return {
156-
"Demultiplexing": {
157-
"bcl2fastq": self.get_bcl2fastq_version(self.runfolder)
158-
}
159-
}
160-
except FileNotFoundError:
161-
pass
162-
163139
try:
164140
return {"Demultiplexing": self.get_software_version(self.runfolder)}
165141
except FileNotFoundError:
@@ -173,18 +149,11 @@ def get_demultiplexing_info(self):
173149
parser.add_argument(
174150
"--runfolder", type=str, required=True, help="Path to runfolder"
175151
)
176-
parser.add_argument(
177-
"--bcl2fastq-outdir",
178-
type=str,
179-
default="Data/Intensities/BaseCalls",
180-
help="Path to bcl2fastq output folder relative to the runfolder",
181-
)
182152

183153
args = parser.parse_args()
184154
runfolder = args.runfolder
185-
bcl2fastq_outdir = args.bcl2fastq_outdir
186155

187-
runfolder_info = RunfolderInfo(runfolder, bcl2fastq_outdir)
156+
runfolder_info = RunfolderInfo(runfolder)
188157
info = runfolder_info.get_info()
189158

190159
print(

config/nextflow_config/singularity.config

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,25 @@ singularity {
55

66
process {
77
withName: 'FASTQC' {
8-
container = 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1'
8+
container = 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0'
99
}
1010
withName: 'FASTQ_SCREEN' {
11-
container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.14.0--pl5262hdfd78af_1'
11+
container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.16.0--pl5321hdfd78af_0'
1212
}
1313
withName: 'GET_QC_THRESHOLDS' {
14-
container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
14+
container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0'
1515
}
1616
withName: 'GET_METADATA' {
17-
container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
17+
container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0'
1818
}
1919
withName: 'INTEROP_SUMMARY' {
20-
container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.2.4--hdbdd923_2'
20+
container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.5.0--h503566f_0'
2121
}
2222
withName: 'MULTIQC_PER_FLOWCELL' {
23-
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0'
23+
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1'
2424
}
2525
withName: 'MULTIQC_PER_PROJECT' {
26-
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0'
26+
container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1'
2727
}
2828
}
2929

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
========================================================================================
3+
Nextflow config file for running minimal tests
4+
========================================================================================
5+
Defines input files and everything required to run a fast and simple pipeline test.
6+
Use as follows:
7+
nextflow run main.nf -profile dev,test,singularity
8+
9+
10+
This config takes inspiration from https://github.com/nf-core/rnaseq
11+
----------------------------------------------------------------------------------------
12+
*/
13+
14+
params {
15+
run_folder = "$baseDir/test_data/230825_M04034_0043_000000000-L6NVV"
16+
fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes"
17+
checkqc_config = "$baseDir/test_data/checkqc_config.yaml"
18+
config_dir = "$baseDir/test_data/test_config"
19+
}

config/tool_config/multiqc_flowcell_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ run_modules:
44
- fastqc
55
- fastq_screen
66
- bcl2fastq
7+
- bclconvert
78
- interop
89
- custom_content
910

main.nf

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ params.run_folder = "/path/to/run_folder"
1212
params.result_dir = "results"
1313
fastqscreen_default_databases = "FastQ_Screen_Genomes"
1414
params.fastqscreen_databases = fastqscreen_default_databases
15-
params.bcl2fastq_outdir = "Unaligned"
15+
params.demultiplexer = "bcl2fastq"
16+
params.demultiplexer_outdir = "Unaligned"
17+
1618
params.checkqc_config = "" // See: https://github.com/Molmed/checkQC
1719
params.assets_dir = "$baseDir/assets"
1820
params.config_dir = "$baseDir/config/tool_config"
@@ -47,11 +49,12 @@ def helpMessage() {
4749
4850
Optional parameters:
4951
--result_dir Path to write results (default: results)
50-
--bcl2fastq_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
52+
--demultiplexer_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
5153
--checkqc_config Configuration file for CheckQC
5254
--assets_dir Location of project assests (default: "\$baseDir/assets").
5355
--config_dir Location of tool configuration files (default: "\$baseDir/config/tool_config").
5456
--script_dir Location of project scripts (default: "\$baseDir/bin")
57+
--demultiplexer Name of demultiplexer used e.g 'bcl2fastq' or 'bclconvert'
5558
5659
--help Print this help message.
5760
@@ -61,7 +64,7 @@ def helpMessage() {
6164
"""
6265
}
6366

64-
if (params.help || !params.run_folder){
67+
if (params.help || !params.run_folder || !params.demultiplexer){
6568
helpMessage()
6669
exit 0
6770
}
@@ -72,7 +75,8 @@ workflow {
7275
Channel.fromPath(params.run_folder,checkIfExists:true)
7376
.ifEmpty { "Error: No run folder (--run_folder) given."; exit 1 }
7477
.set {run_folder}
75-
CHECK_RUN_QUALITY(run_folder)
78+
Channel.value(params.demultiplexer).set {demultiplexer}
79+
CHECK_RUN_QUALITY(run_folder, demultiplexer)
7680

7781
}
7882

@@ -83,13 +87,13 @@ workflow.onComplete {
8387
def get_project_and_reads(run_folder) {
8488

8589
Channel
86-
.fromPath("${run_folder}/${params.bcl2fastq_outdir}/**.fastq.gz" )
90+
.fromPath("${run_folder}/${params.demultiplexer_outdir}/**.fastq.gz" )
8791
.filter( ~/.*_[^I]\d_001\.fastq\.gz$/ )
8892
.ifEmpty { "Error: No fastq files found under ${run_folder}/ !\n"; exit 1 }
8993
.map {
9094
it.toString().indexOf('Undetermined') > 0 ?
9195
['NoProject', it] :
92-
[(it.toString() =~ /^.*\/${params.bcl2fastq_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it]
96+
[(it.toString() =~ /^.*\/${params.demultiplexer_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it]
9397
}
9498

9599
}
@@ -121,8 +125,21 @@ workflow CHECK_RUN_QUALITY {
121125

122126
take:
123127
run_folder
128+
demultiplexer
124129

125130
main:
131+
if (params.demultiplexer == 'bclconvert') {
132+
Channel.fromPath([
133+
"${params.run_folder}/${params.demultiplexer_outdir}/Reports/*.csv",
134+
"${params.run_folder}/RunInfo.xml"])
135+
.collect().ifEmpty([])
136+
.set { demux_stats }
137+
} else {
138+
Channel.fromPath("${params.run_folder}/${params.demultiplexer_outdir}/Stats/Stats.json")
139+
.collect().ifEmpty([])
140+
.set { demux_stats }
141+
}
142+
126143
INTEROP_SUMMARY(run_folder)
127144
GET_QC_THRESHOLDS(run_folder)
128145
GET_METADATA(run_folder)
@@ -132,14 +149,15 @@ workflow CHECK_RUN_QUALITY {
132149
FASTQ_SCREEN(project_and_reads,
133150
params.config_dir,
134151
params.fastqscreen_databases)
135-
MULTIQC_PER_FLOWCELL( params.run_folder,
152+
MULTIQC_PER_FLOWCELL(
153+
params.run_folder,
136154
FASTQC.out.map{ it[1] }.collect(),
137155
FASTQ_SCREEN.out.results.map{ it[1] }.collect(),
138156
FASTQ_SCREEN.out.tsv.map{ it[1] }.collectFile(keepHeader:true,skip:1,sort:true),
139157
INTEROP_SUMMARY.out.collect(),
140158
GET_QC_THRESHOLDS.out.collect().ifEmpty([]),
141159
GET_METADATA.out.collect(),
142-
Channel.fromPath("${params.run_folder}/${params.bcl2fastq_outdir}/Stats/Stats.json").collect().ifEmpty([]),
160+
demux_stats,
143161
params.assets_dir,
144162
params.config_dir)
145163
MULTIQC_PER_PROJECT( params.run_folder,
@@ -239,14 +257,9 @@ process GET_METADATA {
239257
path 'sequencing_metadata_mqc.yaml'
240258

241259
script:
242-
if ( params.bcl2fastq_outdir ){
243-
bcl2fastq_outdir_section = "--bcl2fastq-outdir ${params.bcl2fastq_outdir}"
244-
} else {
245-
bcl2fastq_outdir_section = ""
246-
}
247260
"""
248261
python ${params.script_dir}/get_metadata.py --runfolder $runfolder \\
249-
$bcl2fastq_outdir_section &> sequencing_metadata_mqc.yaml
262+
&> sequencing_metadata_mqc.yaml
250263
"""
251264
}
252265

@@ -277,7 +290,7 @@ process MULTIQC_PER_FLOWCELL {
277290
path ('Interop_summary/*') // Interop log
278291
path qc_thresholds // Quality check thresholds (optional)
279292
path sequencing_metadata // Sequencing meta data ( custom content data )
280-
path bcl2fastq_stats // Bcl2Fastq logs
293+
path demux_stats // demux logs
281294
path assets // Staged copy of assets folder
282295
path config_dir // Staged copy of config folder
283296

nextflow.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,9 @@ profiles {
6262
test {
6363
includeConfig 'config/nextflow_config/test.config'
6464
}
65+
66+
test_bclconvert {
67+
includeConfig 'config/nextflow_config/test_bclconvert.config'
68+
}
6569

6670
}

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pytest==6.2.5
22
black==24.3.0
33
beautifulsoup4==4.10.0
4-
checkqc==3.6.6
4+
checkqc==4.1.1rc1
55
lxml==4.9.2
Binary file not shown.

0 commit comments

Comments
 (0)