Skip to content

Commit 6654bd6

Browse files
committed
Refactored code
1 parent 6b44f8e commit 6654bd6

File tree

11 files changed

+65
-128
lines changed

11 files changed

+65
-128
lines changed

bin/get_metadata.py

Lines changed: 7 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@
1414

1515

1616
class RunfolderInfo:
17-
def __init__(self, runfolder, demultiplexer_outdir, demultiplexer):
17+
def __init__(self, runfolder, demultiplexer_outdir):
1818
self.runfolder = runfolder
19-
self.demultiplexer = demultiplexer
2019
self.run_info = self.read_run_info()
2120
self.run_parameters = self.read_run_parameters()
22-
self.stats_json = self.read_stats_json(demultiplexer_outdir, demultiplexer)
2321
self.description_and_identifier = OrderedDict()
2422
self.run_parameters_tags = {
2523
"RunId": "Run ID",
@@ -82,38 +80,11 @@ def find_flowcell_type_novaseqx(self):
8280
return None
8381
return {"Flowcell type": flowcell_type}
8482

85-
def read_stats_json(self, demultiplexer_outdir, demultiplexer):
86-
stats_path = "Reports" if demultiplexer == "bclconvert" else "Stats/Stats.json"
87-
stats_json_path = os.path.join(self.runfolder, demultiplexer_outdir, stats_path)
88-
if os.path.exists(stats_json_path):
89-
if demultiplexer == "bclconvert":
90-
# Bclconvert produces multiple statistical output files
91-
files = glob.glob(stats_json_path + "/*.csv")
92-
bclconvert_data = {}
93-
for file in files:
94-
with open(file) as csvfile:
95-
reader = csv.reader(csvfile)
96-
file_name = re.sub(r".*/|\.csv", "", file)
97-
bclconvert_data[file_name] = [row for row in reader]
98-
return bclconvert_data
99-
else:
100-
with open(stats_json_path) as f:
101-
return json.load(f)
102-
else:
103-
return None
104-
105-
def get_demultiplexer_version(self, runfolder):
106-
with open(os.path.join(runfolder, f"{self.demultiplexer}_version")) as f:
107-
demultiplexer_str = f.read()
108-
return demultiplexer_str.split("v")[1].strip()
109-
11083
def get_software_version(self, runfolder):
111-
pipeline_info_filename = (
112-
"nf_core_pipeline_software_mqc_versions.yml"
113-
if self.demultiplexer == "bcl2fastq"
114-
else "nf_core_demultiplex_software_mqc_versions.yml"
115-
)
116-
with open(Path(runfolder) / "pipeline_info" / pipeline_info_filename) as f:
84+
pipeline_dir = Path(runfolder) / "pipeline_info"
85+
pipeline_info_filename = next(pipeline_dir.glob("*_software_mqc_versions.yml"))
86+
87+
with open(pipeline_info_filename) as f:
11788
return {
11889
software: version
11990
for software_dict in yaml.safe_load(f).values()
@@ -165,15 +136,6 @@ def get_info(self):
165136
return results
166137

167138
def get_demultiplexing_info(self):
168-
try:
169-
return {
170-
"Demultiplexing": {
171-
self.demultiplexer: self.get_demultiplexer_version(self.runfolder)
172-
}
173-
}
174-
except FileNotFoundError:
175-
pass
176-
177139
try:
178140
return {"Demultiplexing": self.get_software_version(self.runfolder)}
179141
except FileNotFoundError:
@@ -187,25 +149,18 @@ def get_demultiplexing_info(self):
187149
parser.add_argument(
188150
"--runfolder", type=str, required=True, help="Path to runfolder"
189151
)
190-
parser.add_argument(
191-
"--demultiplexer",
192-
type=str,
193-
default="bcl2fastq",
194-
help="Name of demultiplexer used",
195-
)
196152
parser.add_argument(
197153
"--demultiplexer-outdir",
198154
type=str,
199-
default="Unaligned",
155+
default="Data/Intensities/BaseCalls",
200156
help="Path to demultiplexer output folder relative to the runfolder",
201157
)
202158

203159
args = parser.parse_args()
204160
runfolder = args.runfolder
205-
demultiplexer = args.demultiplexer
206161
demultiplexer_outdir = args.demultiplexer_outdir
207162

208-
runfolder_info = RunfolderInfo(runfolder, demultiplexer_outdir, demultiplexer)
163+
runfolder_info = RunfolderInfo(runfolder, demultiplexer_outdir)
209164
info = runfolder_info.get_info()
210165

211166
print(
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
========================================================================================
3+
Nextflow config file for running minimal tests
4+
========================================================================================
5+
Defines input files and everything required to run a fast and simple pipeline test.
6+
Use as follows:
7+
nextflow run main.nf -profile dev,test,singularity
8+
9+
10+
This config takes inspiration from https://github.com/nf-core/rnaseq
11+
----------------------------------------------------------------------------------------
12+
*/
13+
14+
params {
15+
run_folder = "$baseDir/test_data/230825_M04034_0043_000000000-L6NVV"
16+
fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes"
17+
checkqc_config = "$baseDir/test_data/checkqc_config.yaml"
18+
config_dir = "$baseDir/test_data/test_config"
19+
}

config/tool_config/bclconvert/multiqc_flowcell_config.yaml

Lines changed: 0 additions & 20 deletions
This file was deleted.

config/tool_config/bcl2fastq/multiqc_flowcell_config.yaml renamed to config/tool_config/multiqc_flowcell_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ run_modules:
44
- fastqc
55
- fastq_screen
66
- bcl2fastq
7+
- bclconvert
78
- interop
89
- custom_content
910

main.nf

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def helpMessage() {
4949
5050
Optional parameters:
5151
--result_dir Path to write results (default: results)
52-
--demultiplexer_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
52+
--demultiplexer_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Data/Intensities/BaseCalls)
5353
--checkqc_config Configuration file for CheckQC
5454
--assets_dir Location of project assests (default: "\$baseDir/assets").
5555
--config_dir Location of tool configuration files (default: "\$baseDir/config/tool_config").
@@ -64,11 +64,7 @@ def helpMessage() {
6464
"""
6565
}
6666

67-
if (params.help || !params.run_folder){
68-
helpMessage()
69-
exit 0
70-
}
71-
if (params.help || !params.demultiplexer){
67+
if (params.help || !params.run_folder || !params.demultiplexer){
7268
helpMessage()
7369
exit 0
7470
}
@@ -146,7 +142,7 @@ workflow CHECK_RUN_QUALITY {
146142

147143
INTEROP_SUMMARY(run_folder)
148144
GET_QC_THRESHOLDS(run_folder)
149-
GET_METADATA(run_folder, demultiplexer)
145+
GET_METADATA(run_folder)
150146
project_and_reads = get_project_and_reads(params.run_folder)
151147
FASTQC(project_and_reads,
152148
params.config_dir)
@@ -163,8 +159,7 @@ workflow CHECK_RUN_QUALITY {
163159
GET_METADATA.out.collect(),
164160
demux_stats,
165161
params.assets_dir,
166-
params.config_dir,
167-
demultiplexer)
162+
params.config_dir)
168163
MULTIQC_PER_PROJECT( params.run_folder,
169164
combine_results_by_project(
170165
FASTQC.out.groupTuple(),
@@ -257,7 +252,6 @@ process GET_METADATA {
257252

258253
input:
259254
path runfolder
260-
val demultiplexer
261255

262256
output:
263257
path 'sequencing_metadata_mqc.yaml'
@@ -270,7 +264,6 @@ process GET_METADATA {
270264
}
271265
"""
272266
python ${params.script_dir}/get_metadata.py --runfolder $runfolder \\
273-
--demultiplexer $demultiplexer \\
274267
$demultiplexer_outdir_section &> sequencing_metadata_mqc.yaml
275268
"""
276269
}
@@ -305,13 +298,7 @@ process MULTIQC_PER_FLOWCELL {
305298
path demux_stats // demux logs
306299
path assets // Staged copy of assets folder
307300
path config_dir // Staged copy of config folder
308-
val demultiplexer // Demultiplexer name
309301

310-
script:
311-
// """
312-
// echo $demux_stats
313-
// echo demultiplexer: $demultiplexer
314-
// """
315302
output:
316303
tuple path("*multiqc_report.html"), path("*_data.zip")
317304

@@ -326,7 +313,7 @@ process MULTIQC_PER_FLOWCELL {
326313
--title "Flowcell report for \${RUNFOLDER}" \\
327314
--filename \${RUNFOLDER}_multiqc_report.html -z \\
328315
-c ${config_dir}/multiqc_main_config.yaml \\
329-
-c ${config_dir}/${demultiplexer}/multiqc_flowcell_config.yaml \\
316+
-c ${config_dir}/multiqc_flowcell_config.yaml \\
330317
${threshold_parameter} \\
331318
.
332319
"""

nextflow.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,9 @@ profiles {
6262
test {
6363
includeConfig 'config/nextflow_config/test.config'
6464
}
65+
66+
test_bclconvert {
67+
includeConfig 'config/nextflow_config/test_bclconvert.config'
68+
}
6569

6670
}

test_data/test_config/bcl2fastq/multiqc_flowcell_config.yaml

Lines changed: 0 additions & 1 deletion
This file was deleted.

test_data/test_config/bclconvert/multiqc_flowcell_config.yaml

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../config/tool_config/multiqc_flowcell_config.yaml

tests/integration_tests/test_validate_output.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,30 @@
99
# Run pipeline in test mode, this is done once per test session
1010
@pytest.fixture(scope="session", autouse=True)
1111
def result_dir(request, tmpdir_factory):
12-
demultiplexer = "bcl2fastq"
13-
if hasattr(request, "param"):
14-
demultiplexer = request.param
12+
demultiplexer = request.param
1513

1614
result_dir = tmpdir_factory.mktemp("results")
17-
cmd = [
18-
"nextflow",
19-
"run",
20-
"main.nf",
21-
"-profile",
22-
"dev,test,singularity",
23-
"--demultiplexer",
24-
demultiplexer,
25-
"--result_dir",
26-
result_dir,
27-
]
28-
if demultiplexer == "bclconvert":
29-
cmd = [*cmd, *["--run_folder", "test_data/230825_M04034_0043_000000000-L6NVV"]]
30-
31-
subprocess.run(cmd, check=True)
15+
extra_profile = "test_bclconvert" if demultiplexer == "bclconvert" else "test"
16+
17+
subprocess.run(
18+
[
19+
"nextflow",
20+
"run",
21+
"main.nf",
22+
"-profile",
23+
f"dev,{extra_profile},singularity",
24+
"--demultiplexer",
25+
demultiplexer,
26+
"--result_dir",
27+
result_dir,
28+
],
29+
check=True,
30+
)
3231

3332
yield result_dir
3433

3534

35+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
3636
def test_results_dirs_exist(result_dir):
3737
flowcell_dir = os.path.join(result_dir, "flowcell_report")
3838
projects_dir = os.path.join(result_dir, "projects")
@@ -41,6 +41,7 @@ def test_results_dirs_exist(result_dir):
4141
assert os.path.isdir(projects_dir)
4242

4343

44+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
4445
def test_project_dirs_exist(result_dir):
4546
projects_dir = os.path.join(result_dir, "projects")
4647
projects = ["Zymo", "Qiagen", "NoProject"]
@@ -49,6 +50,7 @@ def test_project_dirs_exist(result_dir):
4950
assert os.path.isdir(os.path.join(projects_dir, project))
5051

5152

53+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
5254
def test_flowcell_report_exist(result_dir):
5355
flowcell_dir = os.path.join(result_dir, "flowcell_report")
5456
report_path = os.path.join(
@@ -58,6 +60,7 @@ def test_flowcell_report_exist(result_dir):
5860
assert os.path.isfile(report_path)
5961

6062

63+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
6164
def test_project_reports_exist(result_dir):
6265
projects_dir = os.path.join(result_dir, "projects")
6366
projects = ["Zymo", "Qiagen", "NoProject"]
@@ -71,6 +74,7 @@ def test_project_reports_exist(result_dir):
7174
assert os.path.isfile(report_path)
7275

7376

77+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
7478
def check_sections_in_report(report_path, sections):
7579
with open(report_path, "r") as html_file:
7680
parser = BeautifulSoup(html_file.read(), "lxml")
@@ -79,6 +83,7 @@ def check_sections_in_report(report_path, sections):
7983
assert len(hits) > 0
8084

8185

86+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
8287
def test_all_sections_included_in_flowcell_report(result_dir):
8388
flowcell_dir = os.path.join(result_dir, "flowcell_report")
8489
report_path = os.path.join(
@@ -116,6 +121,7 @@ def test_all_sections_included_in_bclcovert_flowcell_report(result_dir):
116121
check_sections_in_report(report_path, sections)
117122

118123

124+
@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
119125
def test_all_sections_included_in_project_reports(result_dir):
120126
projects_dir = os.path.join(result_dir, "projects")
121127
projects = ["Zymo", "Qiagen", "NoProject"]

0 commit comments

Comments
 (0)