Skip to content

Commit 635e238

Browse files
committed
2025-02-27
1 parent 262debe commit 635e238

File tree

7 files changed

+62
-64
lines changed

7 files changed

+62
-64
lines changed

bin/R/create_pairwise_analysis_tuple.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ colnames(sub_lineages) <- "selected_sub_lineage"
1414
run_ids <- readr::read_delim("run_sample_ids.txt", col_names = FALSE, delim = ",")
1515
colnames(run_ids) <- "SampleID"
1616

17-
meta <- readr::read_delim("pairwise_analysis.list.csv", col_names = TRUE, delim = ",") |>
18-
distinct() |>
19-
filter(!is.na(main_lineage) & !str_detect(main_lineage, ",")) |>
20-
filter(!str_detect(sample, "CN-")) # This line filters out any sample that contains 'CN-'
17+
meta <- readr::read_delim("pairwise_analysis.list.csv", col_names = FALSE, delim = ",") |>
18+
distinct()
2119
colnames(meta) <- c("SampleID", "main_lineage", "sub_lineage")
20+
meta <- meta |> filter(!is.na(main_lineage) & !str_detect(main_lineage, ";")) |>
21+
filter(!str_detect(SampleID, "CN-")) # This line filters out any sample that contains 'CN-'
2222

2323
# Filter out the lineages at sub_lineage level
2424
filtered_meta <- meta %>%

bin/shell/concatenate-variable-pylogeny-ancestors.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ mkdir -p Phylogeny/
1919
/^>/ {next} # Skip header lines
2020
{
2121
# Process sequence lines
22-
for (i = 1; i <= length(\$0); i++) {
22+
for (i = 1; i <= length($0); i++) {
2323
position++
24-
print position "\t" substr(\$0, i, 1) >> "'"${lineage}.tmp.fasta_positions.tab"'"
24+
print position "\t" substr($0, i, 1) >> "'"${lineage}.tmp.fasta_positions.tab"'"
2525
}
2626
}' "${lineage}.tmp.fasta"
2727

@@ -30,22 +30,22 @@ mkdir -p Phylogeny/
3030

3131
# 3. obtain the reference positions (H37Rv) for the cluster positions
3232
for i in `cat ${lineage}.tmp.fasta_positions`; do
33-
sed -n \$((i+2))'p' ${tab} | cut -f3
33+
sed -n $((i+2))'p' ${tab} | cut -f3
3434
done > ${lineage}.tmp_refseq
3535

3636
# 4. convert column into fasta
3737
paste -s -d "" ${lineage}.tmp_refseq | sed '1i >H37Rv' > Phylogeny/${lineage}.ref-H37Rv.fasta
3838

3939
# 5. get the genomic positions of the SNPs
4040
while read -r position; do
41-
sed -n \$((position+2))'p' ${tab} | cut -f 1;
41+
sed -n $((position+2))'p' ${tab} | cut -f 1;
4242
done < ${lineage}.tmp.fasta_positions > Phylogeny/${lineage}_genomic_positions.tab
4343

4444
cp ${mtbc_ancestor_path} ${lineage}.tmp.MTB_anc.pos.gz; gunzip ${lineage}.tmp.MTB_anc.pos.gz
4545

4646
# 6. Get the same SNPs for the 'ancestor' genomes
4747
for i in `cat Phylogeny/${lineage}_genomic_positions.tab`; do
48-
sed -n \${i}'p' ${lineage}.tmp.MTB_anc.pos | cut -f3 # doesnt need to +2 as the tsv file has no header
48+
sed -n ${i}'p' ${lineage}.tmp.MTB_anc.pos | cut -f3 # doesnt need to +2 as the tsv file has no header
4949
done > ${lineage}.tmp.MTB_anc
5050

5151
# 7. convert the column in fasta

main.nf

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,8 @@ workflow {
267267
who_out_ch,
268268
sampleID_list
269269
)
270-
270+
271+
PAIRWISE_WF.out.pairwise_clusters.view()
271272
/*
272273
······································································································
273274
SUMMARY WORKFLOW (SUMMARU_WF):
@@ -277,15 +278,15 @@ workflow {
277278
- Generate MJN files for visualisation in PopArt
278279
······································································································
279280
*/
280-
281+
/*
281282
SUMMARY_WF( params.runID,
282283
PAIRWISE_WF.out.pairwise_clusters,
283284
PAIRWISE_WF.out.analysis_summary,
284285
PAIRWISE_WF.out.who_resistance,
285286
PAIRWISE_WF.out.tbdb_resistance,
286287
PAIRWISE_WF.out.phylogeny_plotting_ch
287288
)
288-
289+
*/
289290
/*
290291
······································································································
291292
BARCODING ANALYSIS (BARCODING_WF)

modules/local/filtering/prepare_pairwise_channels/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ process PREPARE_PAIRWISE_CHANNELS {
2626
# Run the script to generate pairwise analysis tuples
2727
Rscript ${params.r_script_dir}/create_pairwise_analysis_tuple.R \\
2828
1>>.command.out \\
29-
2>>.command.err || true # i think this helps
29+
2>>.command.err || true # i think this helps (?)
3030
3131
# remove headers
3232
sed '/^lineage,SampleID/d' final.lineage_samples_tuple.csv | sort > tmp.final.lineage_samples_tuple.csv
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
process CONCATENATED_VARIABLE_REGION_PHYLOGENY {
2+
3+
tag "${runID}: ${lineage}"
4+
5+
conda params.phylogeny_env
6+
7+
publishDir "${params.outdir}/bbdd/mtbseq/pairwise/${lineage}/", mode: 'copy'
8+
9+
input:
10+
val(runID)
11+
tuple val(lineage),
12+
path(fasta),
13+
path(tab)
14+
15+
output:
16+
path("Phylogeny/*")
17+
18+
tuple val(lineage), path("Phylogeny/${lineage}_ML.contree"),
19+
path("Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.aln.fasta"), emit: phylogeny_plotting_ch
20+
21+
script:
22+
23+
def additional_args = task.ext.additional_args ?: '' // defined in the nextflow.config file
24+
25+
"""
26+
# Create the fasta files for the phylogeny
27+
bash ${params.script_dir}/shell/concatenate-variable-pylogeny-ancestors.sh \\
28+
${fasta} ${lineage} \\
29+
${tab} ${params.mtbc_ancestor_path}
30+
31+
# Perform alignment of sequences
32+
mafft --auto --thread ${params.cpus} \\
33+
Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.fasta \\
34+
> Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.aln.fasta
35+
36+
# Perform phylogeny
37+
iqtree -s Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.aln.fasta \\
38+
-m ${params.iqtree_model} \\
39+
-T AUTO \\
40+
-ntmax ${params.cpus} \\
41+
-B ${params.iqtree_bootstraps} \\
42+
--prefix ${lineage}_ML
43+
44+
mv ${lineage}_ML.* Phylogeny/
45+
"""
46+
47+
}

modules/local/phylogeny/concatenated_snp_phylogeny-nf.nf renamed to modules/local/phylogeny/concatenated_snp_phylogeny/shell

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,3 @@
1-
process CONCATENATED_VARIABLE_REGION_PHYLOGENY {
2-
3-
tag "${runID}: ${lineage}"
4-
5-
conda params.phylogeny_env
6-
7-
publishDir "${params.outdir}/bbdd/mtbseq/pairwise/${lineage}/", mode: 'copy'
8-
9-
input:
10-
val(runID)
11-
tuple val(lineage),
12-
path(fasta),
13-
path(tab)
14-
15-
output:
16-
path("Phylogeny/*")
17-
18-
tuple val(lineage), path("Phylogeny/${lineage}_ML.contree"),
19-
path("Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.aln.fasta"), emit: phylogeny_plotting_ch
20-
21-
script:
22-
23-
def additional_args = task.ext.additional_args ?: '' // defined in the nextflow.config file
24-
25-
"""
26-
# Create the fasta files for the phylogeny
27-
bash ${params.script_dir}/shell/concatenate-variable-pylogeny-ancestors.sh \\
28-
${fasta} \\
29-
${lineage} \\
30-
${tab} \\
31-
${params.mtbc_ancestor_path}
32-
33-
# Perform alignment of sequences
34-
mafft --auto --thread ${params.cpus} \\
35-
Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.fasta \\
36-
> Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.aln.fasta
37-
38-
# Perform phylogeny
39-
iqtree -s Phylogeny/${lineage}.ref-H37Rv_MTBc-anc.aln.fasta \\
40-
-m ${params.iqtree_model} \\
41-
-T AUTO \\
42-
-ntmax ${params.cpus} \\
43-
-B ${params.iqtree_bootstraps} \\
44-
--prefix ${lineage}_ML
45-
46-
mv ${lineage}_ML.* Phylogeny/
47-
"""
48-
49-
}
50-
511
/*
522
mkdir -p Phylogeny/
533

workflows/pairwise_wf.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ include { COMPILE_SEQUENCING_STATS } from '../modules/local/filte
55
include { PREPARE_PAIRWISE_CHANNELS } from '../modules/local/filtering/prepare_pairwise_channels/main.nf'
66
include { MTBSEQ_LINEAGE_JOINT_AMEND } from '../modules/local/mtbseq/lineage_joint-amend/main.nf'
77
include { MTBSEQ_LINEAGE_GROUP } from '../modules/local/mtbseq/lineage_group/main.nf'
8-
include { CONCATENATED_VARIABLE_REGION_PHYLOGENY } from '../modules/local/phylogeny/concatenated_snp_phylogeny-nf'
8+
include { CONCATENATED_VARIABLE_REGION_PHYLOGENY } from '../modules/local/phylogeny/concatenated_snp_phylogeny/main.nf'
99
include { CONCATENATE_CLUSTERS } from '../modules/local/pairwise/concatenate-cluster-file/main.nf'
1010

1111
workflow PAIRWISE_WF {

0 commit comments

Comments
 (0)