Skip to content

Commit 3a4dd16

Browse files
Merge pull request #4 from phesketh-igtp/development
Development
2 parents a565a95 + 883a068 commit 3a4dd16

File tree

19 files changed

+545
-116
lines changed

19 files changed

+545
-116
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,5 @@ qsub -S /bin/bash -cwd -V -N nf-main \
131131
/path/to/RutiSeq-nf/main.nf \
132132
--samplesheet /path/to/RutiSeq-nf/test/samples.hpc.csv \
133133
--outdir /path/to/RutiSeq-nf/RutiSeq-test \
134-
-profile igtp,conda_on
135-
# this specifies that the job should be submitted to the IGTP HPC using conda
134+
-profile hpc_sungrid_engine,conda_on
136135
```

bin/R/create_pairwise_analysis_tuple.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ filtered_meta <- meta %>%
4343
filtered_lineages_forward <- filtered_meta |>
4444
filter(SampleID %in% run_ids$SampleID) |>
4545
count(lineage) |>
46-
filter(n > 4) |> # rm lineage with less than 4 genome (min for MTBSeq)
4746
select(lineage) |>
4847
distinct() # Use distinct() instead of unique() for dplyr consistency
4948

envs/conda/r-phylogeny_env.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ channels:
33
- conda-forge
44
- bioconda
55
dependencies:
6+
- conda-forge::r-base=4.4.2
67
- bioconda::bioconductor-ggtree=3.14.0
78
- bioconda::bioconductor-treeio=1.30.0
89
- conda-forge::r-ape=5.8_1

main.nf

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
nextflow.enable.dsl = 2
44

55
include { FILE_CHECK } from './modules/local/file-checks/main.nf'
6-
include { TBPROFILER_DB_UPDATE } from './modules/local/tbprofiler/db-update/main.nf'
76
include { TAXONKIT_DB_UPDATE } from './modules/local/taxonkit/db-update/main.nf'
87
//include { NEGATIVE_CTRL_WF } from './workflows/negative_ctrl_wf.nf'
98
include { SINGLE_WF } from './workflows/single_wf.nf'
@@ -168,12 +167,11 @@ workflow {
168167

169168
/*
170169
······································································································
171-
UPDATING THE DATABASE (TBPROFILER_DB_UPDATE)
170+
UPDATING THE DATABASE
172171
- The TBProfiler database is updated with the latest version of the database
173172
······································································································
174173
*/
175174

176-
TBPROFILER_DB_UPDATE( params.runID )
177175
TAXONKIT_DB_UPDATE( params.runID )
178176

179177
/*
@@ -188,7 +186,6 @@ workflow {
188186
/*
189187
NEGATIVE_CTRL_WF( params.runID,
190188
controls_ch,
191-
TBPROFILER_DB_UPDATE.out.tbprofiler_update_db,
192189
TAXONKIT_DB_UPDATE.out.taxonkit_update_db
193190
)
194191
*/
@@ -249,7 +246,7 @@ workflow {
249246
······································································································
250247
*/
251248

252-
SINGLE_WF( params.runID, comp_samples_ch, TBPROFILER_DB_UPDATE.out.tbprofiler_update_db )
249+
SINGLE_WF( params.runID, comp_samples_ch )
253250

254251
// DEBUG: Demonstrate the content of the channel
255252
/// SINGLE_WF.out.single_updated_samples_ch.view { sample -> "Sample: $sampleID" }
@@ -288,10 +285,12 @@ workflow {
288285
tbdb_out_ch = tbdb_out_files.collect()
289286
who_out_ch = who_out_files.collect()
290287

291-
PAIRWISE_WF( params.runID, mtbseq_stats_ch,
292-
mtbseq_class_ch, tbdb_out_ch,
293-
who_out_ch, sampleID_list,
294-
TBPROFILER_DB_UPDATE.out.tbprofiler_update_db
288+
PAIRWISE_WF( params.runID,
289+
mtbseq_stats_ch,
290+
mtbseq_class_ch,
291+
tbdb_out_ch,
292+
who_out_ch,
293+
sampleID_list
295294
)
296295

297296
/*

modules/local/pairwise/concatenate-cluster-file/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ process CONCATENATE_CLUSTERS {
2020
path(clusters)
2121

2222
output:
23-
path("unprocessed_clusters.tsv"), emit: bbdd_clusters
23+
path("unprocessed_clusters.tsv"), emit: pairwise_clusters
2424

2525
script:
2626

@@ -29,7 +29,7 @@ process CONCATENATE_CLUSTERS {
2929
echo "lineage\tdistance\tgenomes\tgroup" > unprocessed_clusters.tsv
3030
3131
# Concatenate all files
32-
for file in ${params.outDir}/bbdd/mtbseq/pairwise/*/Groups/*clusters.tsv; do
32+
for file in ${params.outDir}/bbdd/mtbseq/pairwise/*/Groups/*_d*.clusters.tsv; do
3333
cat \$file >> unprocessed_clusters.tsv
3434
done
3535

modules/local/summary/generate-nexus/main.nf

Lines changed: 1 addition & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -59,72 +59,4 @@ process GENERATE_NEXUS {
5959
6060
"""
6161

62-
}
63-
64-
/*
65-
# create the output and temporary directories
66-
mkdir -p nexus/ fasta/ positions/
67-
68-
# create the list of genomes within the cluster
69-
grep "${clusterID}" ${pairwise_clusters} \\
70-
| cut -f1 > ${clusterID}.genomes.list
71-
72-
#·················································································#
73-
74-
# create cluster directory and split up fasta file in cluster fastas
75-
while IFS=";" read -r genome; do
76-
seqkit grep -w 0 -n -p \${genome} ${snp_fasta} >> ${clusterID}.fasta
77-
done < ${clusterID}.genomes.list
78-
79-
# run snp-sites on the fastas
80-
snp-sites ${clusterID}.fasta > ${clusterID}.snpsites.fasta
81-
snp-sites ${clusterID}.fasta -v | cut -f2 \\
82-
| sed '1,4d' > positions/${clusterID}_positions.tab
83-
84-
#·················································································#
85-
86-
# H37Rv variance positions
87-
for i in `cat positions/${clusterID}_positions.tab`; do
88-
sed -n \$((i+2))'p' ${snp_tab} | cut -f3
89-
done > ${clusterID}_tmp_refseq
90-
91-
# convert column into fasta
92-
paste -s -d "" ${clusterID}_tmp_refseq \\
93-
| sed '1i >H37Rv' > ${clusterID}_H37Rv.fasta
94-
95-
#·················································································#
96-
97-
# Get genomic positions
98-
while read -r position; do
99-
sed -n \$((position+2))'p' ${snp_tab} | cut -f 1;
100-
done < positions/${clusterID}_positions.tab > positions/${clusterID}_genomic_positions.tab
101-
102-
#·················································································#
103-
104-
# Valencian ancestor (MTB_anc) variance positions
105-
cp ${params.mtbc_ancestor_path} ${lineage}.tmp.MTB_anc.pos.gz
106-
gunzip ${lineage}.tmp.MTB_anc.pos.gz
107-
108-
for i in `cat positions/${clusterID}_genomic_positions.tab`; do
109-
sed -n \${i}'p' ${lineage}.tmp.MTB_anc.pos | cut -f3
110-
done > ${clusterID}_tmp_MTB_anc
111-
112-
# convert the column in fasta
113-
paste -s -d "" ${clusterID}_tmp_MTB_anc \\
114-
| sed '1i >MTB_anc' > ${clusterID}_MTB_anc.fasta
115-
116-
# remove the large tab file
117-
rm -rf ${lineage}.tmp.MTB_anc.pos.gz
118-
119-
#·················································································#
120-
121-
# Create final FASTA file
122-
cat ${clusterID}.snpsites.fasta \\
123-
${clusterID}_H37Rv.fasta \\
124-
${clusterID}_MTB_anc.fasta \\
125-
> fasta/${clusterID}_refseq.fasta
126-
127-
# convert to nexus for visualisation
128-
seqret -osformat2 nexus -sequence fasta/${clusterID}_refseq.fasta \\
129-
-outseq nexus/${clusterID}_refseq.nex
130-
*/
62+
}

modules/local/summary/process-clusters/main.nf

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ process PROCESS_CLUSTERS {
1212
*/
1313

1414
conda params.r_stats_env
15-
15+
1616
publishDir "${params.outDir}/results/${runID}/clusters/", mode: 'copy'
1717

1818
input:
@@ -25,12 +25,18 @@ process PROCESS_CLUSTERS {
2525
path("${runID}_processed_clusters.tsv")
2626

2727
script:
28-
"""
29-
Rscript ${params.r_script_dir}/process_clusters.R #\
30-
#--clusters ${pairwise_clusters} \
31-
#--summary ${analysis_summary}
28+
"""
29+
30+
${pairwise_clusters}
31+
32+
Rscript ${params.r_script_dir}/process_clusters.R #\
33+
#--clusters ${pairwise_clusters} \
34+
#--summary ${analysis_summary}
35+
36+
cp processed_clusters.tsv ${runID}_processed_clusters.tsv
3237
33-
cp processed_clusters.tsv ${runID}_processed_clusters.tsv
34-
"""
38+
cp processed_clusters.tsv ${params.outDir}/results/
39+
cp ${pairwise_clusters} ${params.outDir}/results/
40+
"""
3541

3642
}

modules/local/tbprofiler/compile.tbdb/main.nf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ process TBPROFILER_COMPILE_TBDB {
3333
input:
3434
val(runID)
3535
path(tbprofiler_results)
36-
path(tbprofiler_update_db)
3736

3837
output:
3938
path("tbdb-tbprofiler.txt"), emit: tbdb_results

modules/local/tbprofiler/compile.who/main.nf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ process TBPROFILER_COMPILE_WHO {
2929
input:
3030
val runID
3131
path (tbprofiler_who_results)
32-
path(tbprofiler_update_db)
3332

3433
output:
3534
path("who-tbprofiler.txt"), emit: who_results

modules/local/tbprofiler/profile.tbdb/main.nf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ process TBPROFILER_PROFILE_TBDB {
2929
path(mtbc_forward), path(mtbc_reverse), path(mtbseq_class),
3030
path(mtbseq_stats), path(mtbseq_pos), path(mtbseq_vars),
3131
path(tbdb_out), path(who_out), path(mtbseq_vcf)
32-
path(tbprofiler_update_handover)
3332

3433
output:
3534
path("bam/tbdb-${sampleID}.bam")

0 commit comments

Comments
 (0)