Skip to content

Commit 84e09ba

Browse files
authored
Release v0.1.2 (#13)
1 parent cec784a commit 84e09ba

7 files changed

Lines changed: 392 additions & 301 deletions

File tree

README.md

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,33 @@
44

55
Pipefaceee.
66

7-
Nextflow pipeline to align, variant call (SNP's, indels's, SV's) and phase long read [ONT](https://nanoporetech.com/) and/or [pacbio](https://www.pacb.com/) HiFi data.
7+
Nextflow pipeline to align, variant call (SNP's, indels's, SV's), phase and optionally annotate (SNP's, indels's) long read [ONT](https://nanoporetech.com/) and/or [pacbio](https://www.pacb.com/) HiFi data.
8+
9+
There currently exists tools and workflows that undertake comparable analyses, but pipeface serves as a central workflow to process long read data (both ONT and pacbio HiFi data). Pipeface's future hold's STR, CNV and tandem repeat calling, as well as the analysis of cohorts.
810

911
<p align="center">
1012
<img src="./images/pipeface.png">
1113

1214
## Workflow
1315

14-
### Overview
15-
1616
```mermaid
1717
%%{init: {'theme':'dark'}}%%
1818
flowchart LR
1919
20-
input_data("Input data: \n\n ONT fastq.gz \n and/or \n ONT fastq \n and/or \n ONT uBAM \n and/or \n pacbio HiFi uBAM")
20+
input_data("Input data: <br><br> ONT fastq.gz <br> and/or <br> ONT fastq <br> and/or <br> ONT uBAM <br> and/or <br> pacbio HiFi uBAM")
2121
merging{{"Merge runs (if needed)"}}
2222
alignment{{"bam to fastq conversion (if needed), alignment, sorting"}}
2323
depth{{"Calculate alignment depth"}}
2424
snp_indel_calling{{"SNP/indel variant calling"}}
2525
snp_indel_phasing{{"SNP/indel phasing"}}
26+
snp_indel_annotation{{"SNP/indel annotation (optional - hg38 only)"}}
2627
haplotagging{{"Haplotagging bams"}}
2728
sv_calling{{"Structural variant calling"}}
2829
2930
input_data-.->merging-.->alignment-.->snp_indel_calling-.->snp_indel_phasing-.->haplotagging-.->sv_calling
3031
alignment-.->depth
3132
alignment-.->haplotagging
33+
snp_indel_phasing-.->snp_indel_annotation
3234
3335
```
3436

@@ -38,45 +40,50 @@ alignment-.->haplotagging
3840
%%{init: {'theme':'dark'}}%%
3941
flowchart LR
4042
41-
ont_data_f1("Sample 1 \n\n Input data: \n\n ONT fastq.gz")
42-
ont_data_f2("Sample 1 \n\n Input data: \n\n ONT fastq.gz")
43-
pacbio_data_f3("Sample 2 \n\n Input data: \n\n Pacbio HiFi uBAM")
44-
pacbio_data_f4("Sample 2 \n\n Input data: \n\n Pacbio HiFi uBAM")
45-
ont_data_f5("Sample 3 \n\n Input data: \n\n ONT fastq")
46-
ont_data_f6("Sample 4 \n\n Input data: \n\n ONT uBAM")
47-
48-
merging_m1{{"Description: merge runs \n\n Main tools: GNU coreutils \n\n Commands: cat"}}
49-
merging_m2{{"Description: merge runs \n\n Main tools: Samtools \n\n Commands: samtools merge"}}
50-
51-
alignment_s1{{"Description: alignment, sorting \n\n Main tools: Minimap2 and Samtools \n\n Commands: minimap2 and samtools sort"}}
52-
alignment_s2{{"Description: alignment, sorting \n\n Main tools: Minimap2 and Samtools \n\n Commands: minimap2 and samtools sort"}}
53-
alignment_s3{{"Description: bam to fastq conversion, alignment, sorting \n\n Main tools: Minimap2 and Samtools \n\n Commands: minimap2 and samtools sort"}}
54-
alignment_s4{{"Description: bam to fastq conversion, alignment, sorting \n\n Main tools: Minimap2 and Samtools \n\n Commands: minimap2 and samtools sort"}}
55-
56-
depth_s1{{"Description: calculate alignment depth \n\n Main tools: Samtools \n\n Commands: samtools depth"}}
57-
depth_s2{{"Description: calculate alignment depth \n\n Main tools: Samtools \n\n Commands: samtools depth"}}
58-
depth_s3{{"Description: calculate alignment depth \n\n Main tools: Samtools \n\n Commands: samtools depth"}}
59-
depth_s4{{"Description: calculate alignment depth \n\n Main tools: Samtools \n\n Commands: samtools depth"}}
60-
61-
snp_indel_calling_s1{{"Description: SNP/indel variant calling \n\n Main tools: Clair3 or DeepVariant (NVIDIA Parabricks) \n\n Commands: run_clair3.sh or pbrun deepvariant"}}
62-
snp_indel_calling_s2{{"Description: SNP/indel variant calling \n\n Main tools: Clair3 or DeepVariant (NVIDIA Parabricks) \n\n Commands: run_clair3.sh or pbrun deepvariant"}}
63-
snp_indel_calling_s3{{"Description: SNP/indel variant calling \n\n Main tools: Clair3 or DeepVariant (NVIDIA Parabricks) \n\n Commands: run_clair3.sh or pbrun deepvariant"}}
64-
snp_indel_calling_s4{{"Description: SNP/indel variant calling \n\n Main tools: Clair3 or DeepVariant (NVIDIA Parabricks) \n\n Commands: run_clair3.sh or pbrun deepvariant"}}
65-
66-
snp_indel_phasing_s1{{"Description: SNP/indel phasing \n\n Main tools: WhatsHap \n\n Commands: whatshap phase"}}
67-
snp_indel_phasing_s2{{"Description: SNP/indel phasing \n\n Main tools: WhatsHap \n\n Commands: whatshap phase"}}
68-
snp_indel_phasing_s3{{"Description: SNP/indel phasing \n\n Main tools: WhatsHap \n\n Commands: whatshap phase"}}
69-
snp_indel_phasing_s4{{"Description: SNP/indel phasing \n\n Main tools: WhatsHap \n\n Commands: whatshap phase"}}
70-
71-
haplotagging_s1{{"Description: haplotagging bams \n\n Main tools: WhatsHap \n\n Commands: whatshap haplotag"}}
72-
haplotagging_s2{{"Description: haplotagging bams \n\n Main tools: WhatsHap \n\n Commands: whatshap haplotag"}}
73-
haplotagging_s3{{"Description: haplotagging bams \n\n Main tools: WhatsHap \n\n Commands: whatshap haplotag"}}
74-
haplotagging_s4{{"Description: haplotagging bams \n\n Main tools: WhatsHap \n\n Commands: whatshap haplotag"}}
75-
76-
sv_calling_s1{{"Description: structural variant calling \n\n Main tools: Sniffles2 and/or cuteSV \n\n Commands: sniffles and/or cuteSV"}}
77-
sv_calling_s2{{"Description: structural variant calling \n\n Main tools: Sniffles2 and/or cuteSV \n\n Commands: sniffles and/or cuteSV"}}
78-
sv_calling_s3{{"Description: structural variant calling \n\n Main tools: Sniffles2 and/or cuteSV \n\n Commands: sniffles and/or cuteSV"}}
79-
sv_calling_s4{{"Description: structural variant calling \n\n Main tools: Sniffles2 and/or cuteSV \n\n Commands: sniffles and/or cuteSV"}}
43+
ont_data_f1("Sample 1 <br><br> Input data: <br><br> ONT fastq.gz")
44+
ont_data_f2("Sample 1 <br><br> Input data: <br><br> ONT fastq.gz")
45+
pacbio_data_f3("Sample 2 <br><br> Input data: <br><br> Pacbio HiFi uBAM")
46+
pacbio_data_f4("Sample 2 <br><br> Input data: <br><br> Pacbio HiFi uBAM")
47+
ont_data_f5("Sample 3 <br><br> Input data: <br><br> ONT fastq")
48+
ont_data_f6("Sample 4 <br><br> Input data: <br><br> ONT uBAM")
49+
50+
merging_m1{{"Description: merge runs <br><br> Main tools: GNU coreutils <br><br> Commands: cat"}}
51+
merging_m2{{"Description: merge runs <br><br> Main tools: Samtools <br><br> Commands: samtools merge"}}
52+
53+
alignment_s1{{"Description: alignment, sorting <br><br> Main tools: Minimap2 and Samtools <br><br> Commands: minimap2 and samtools sort"}}
54+
alignment_s2{{"Description: alignment, sorting <br><br> Main tools: Minimap2 and Samtools <br><br> Commands: minimap2 and samtools sort"}}
55+
alignment_s3{{"Description: bam to fastq conversion, alignment, sorting <br><br> Main tools: Minimap2 and Samtools <br><br> Commands: minimap2 and samtools sort"}}
56+
alignment_s4{{"Description: bam to fastq conversion, alignment, sorting <br><br> Main tools: Minimap2 and Samtools <br><br> Commands: minimap2 and samtools sort"}}
57+
58+
depth_s1{{"Description: calculate alignment depth <br><br> Main tools: Samtools <br><br> Commands: samtools depth"}}
59+
depth_s2{{"Description: calculate alignment depth <br><br> Main tools: Samtools <br><br> Commands: samtools depth"}}
60+
depth_s3{{"Description: calculate alignment depth <br><br> Main tools: Samtools <br><br> Commands: samtools depth"}}
61+
depth_s4{{"Description: calculate alignment depth <br><br> Main tools: Samtools <br><br> Commands: samtools depth"}}
62+
63+
snp_indel_calling_s1{{"Description: SNP/indel variant calling <br><br> Main tools: Clair3 or DeepVariant <br><br> Commands: run_clair3.sh or run_deepvariant"}}
64+
snp_indel_calling_s2{{"Description: SNP/indel variant calling <br><br> Main tools: Clair3 or DeepVariant <br><br> Commands: run_clair3.sh or run_deepvariant"}}
65+
snp_indel_calling_s3{{"Description: SNP/indel variant calling <br><br> Main tools: Clair3 or DeepVariant <br><br> Commands: run_clair3.sh or run_deepvariant"}}
66+
snp_indel_calling_s4{{"Description: SNP/indel variant calling <br><br> Main tools: Clair3 or DeepVariant <br><br> Commands: run_clair3.sh or run_deepvariant"}}
67+
68+
snp_indel_phasing_s1{{"Description: SNP/indel phasing <br><br> Main tools: WhatsHap <br><br> Commands: whatshap phase"}}
69+
snp_indel_phasing_s2{{"Description: SNP/indel phasing <br><br> Main tools: WhatsHap <br><br> Commands: whatshap phase"}}
70+
snp_indel_phasing_s3{{"Description: SNP/indel phasing <br><br> Main tools: WhatsHap <br><br> Commands: whatshap phase"}}
71+
snp_indel_phasing_s4{{"Description: SNP/indel phasing <br><br> Main tools: WhatsHap <br><br> Commands: whatshap phase"}}
72+
73+
snp_indel_annotation_s1{{"Description: SNP/indel annotation (optional - hg38 only)" <br><br> Main tools: ensembl-vep <br><br> Commands: vep}}
74+
snp_indel_annotation_s2{{"Description: SNP/indel annotation (optional - hg38 only)" <br><br> Main tools: ensembl-vep <br><br> Commands: vep}}
75+
snp_indel_annotation_s3{{"Description: SNP/indel annotation (optional - hg38 only)" <br><br> Main tools: ensembl-vep <br><br> Commands: vep}}
76+
snp_indel_annotation_s4{{"Description: SNP/indel annotation (optional - hg38 only)" <br><br> Main tools: ensembl-vep <br><br> Commands: vep}}
77+
78+
haplotagging_s1{{"Description: haplotagging bams <br><br> Main tools: WhatsHap <br><br> Commands: whatshap haplotag"}}
79+
haplotagging_s2{{"Description: haplotagging bams <br><br> Main tools: WhatsHap <br><br> Commands: whatshap haplotag"}}
80+
haplotagging_s3{{"Description: haplotagging bams <br><br> Main tools: WhatsHap <br><br> Commands: whatshap haplotag"}}
81+
haplotagging_s4{{"Description: haplotagging bams <br><br> Main tools: WhatsHap <br><br> Commands: whatshap haplotag"}}
82+
83+
sv_calling_s1{{"Description: structural variant calling <br><br> Main tools: Sniffles2 and/or cuteSV <br><br> Commands: sniffles and/or cuteSV"}}
84+
sv_calling_s2{{"Description: structural variant calling <br><br> Main tools: Sniffles2 and/or cuteSV <br><br> Commands: sniffles and/or cuteSV"}}
85+
sv_calling_s3{{"Description: structural variant calling <br><br> Main tools: Sniffles2 and/or cuteSV <br><br> Commands: sniffles and/or cuteSV"}}
86+
sv_calling_s4{{"Description: structural variant calling <br><br> Main tools: Sniffles2 and/or cuteSV <br><br> Commands: sniffles and/or cuteSV"}}
8087
8188
ont_data_f1-.->merging_m1-.->alignment_s1-.->snp_indel_calling_s1-.->snp_indel_phasing_s1-.->haplotagging_s1-.->sv_calling_s1
8289
ont_data_f2-.->merging_m1
@@ -96,6 +103,11 @@ alignment_s2-.->haplotagging_s2
96103
alignment_s3-.->haplotagging_s3
97104
alignment_s4-.->haplotagging_s4
98105
106+
snp_indel_phasing_s1-.->snp_indel_annotation_s1
107+
snp_indel_phasing_s2-.->snp_indel_annotation_s2
108+
snp_indel_phasing_s3-.->snp_indel_annotation_s3
109+
snp_indel_phasing_s4-.->snp_indel_annotation_s4
110+
99111
```
100112

101113
## Main analyses
@@ -107,10 +119,11 @@ alignment_s4-.->haplotagging_s4
107119
## Main tools
108120

109121
- [Minimap2](https://github.com/lh3/minimap2)
110-
- [Clair3](https://github.com/HKU-BAL/Clair3) OR [DeepVariant](https://github.com/google/deepvariant) (wrapped in [NVIDIA Parabricks](https://docs.nvidia.com/clara/parabricks/latest/))
122+
- [Clair3](https://github.com/HKU-BAL/Clair3) or [DeepVariant](https://github.com/google/deepvariant)
111123
- [WhatsHap](https://github.com/whatshap/whatshap)
112-
- [Sniffles2](https://github.com/fritzsedlazeck/Sniffles) AND/OR [cuteSV](https://github.com/tjiangHIT/cuteSV)
124+
- [Sniffles2](https://github.com/fritzsedlazeck/Sniffles) and/or [cuteSV](https://github.com/tjiangHIT/cuteSV)
113125
- [Samtools](https://github.com/samtools/samtools)
126+
- [ensembl-vep](https://github.com/Ensembl/ensembl-vep)
114127

115128
## Main input files
116129

@@ -119,6 +132,7 @@ alignment_s4-.->haplotagging_s4
119132
- ONT/pacbio HiFi FASTQ (gzipped or uncompressed) or unaligned BAM
120133
- Indexed reference genome
121134
- Clair3 models (if running Clair3)
135+
- [DeepVariant GPU 1.6.1 docker container](https://hub.docker.com/layers/google/deepvariant/1.6.1-gpu/images/sha256-7929c55106d3739daa18d52802913c43af4ca2879db29656056f59005d1d46cb?context=explore) pulled via singularity (if running DeepVariant)
122136

123137
### Optional
124138

@@ -129,18 +143,26 @@ alignment_s4-.->haplotagging_s4
129143

130144
- Aligned, sorted and haplotagged bam
131145
- Clair3 or DeepVariant phased SNP/indel VCF file
132-
- Clair3 or DeepVariant SNP/indel gVCF file
146+
- Clair3 or DeepVariant phased and annotated SNP/indel VCF file (optional - hg38 only)
133147
- Phased Sniffles2 and/or un-phased cuteSV SV VCF file
134148

135149
## Assumptions
136150

137151
- Running pipeline on Australia's [National Computational Infrastructure (NCI)](https://nci.org.au/)
138152
- Access to if89 project on [National Computational Infrastructure (NCI)](https://nci.org.au/)
153+
- Access to xy86 project on [National Computational Infrastructure (NCI)](https://nci.org.au/) (if running variant annotation)
139154
- Access to pipeline dependencies:
140155
- [Nextflow and it's java dependency](https://nf-co.re/docs/usage/installation). Validated to run on:
141156
- Nextflow 24.04.1
142-
- Java version 17.0.2
157+
- Java 17.0.2
158+
159+
*[See the list of software and their versions used by this version of pipeface](./docs/software_versions.txt) as well as the [list of variant databases and their versions](./docs/database_versions.txt) if variant annotation is carried out (assuming the default [nextflow_pipeface.config](./config/nextflow_pipeface.config) file is used).*
143160

144161
## Run it!
145162

146163
See a walkthrough for how to [run pipeface on NCI](./docs/run_on_nci.md).
164+
165+
## Credit
166+
167+
This is a highly collaborative project, with many contributions from the [Genomic Technologies Lab](https://www.garvan.org.au/research/labs-groups/genomic-technologies-lab). Notably, Dr Andre Reis and Dr Ira Deveson are closely involved in the development of this pipeline. The installation and hosting of software used in this pipeline has and continues to be supported by the [Australian BioCommons Tools and Workflows project (if89)](https://australianbiocommons.github.io/ables/if89/).
168+

config/nextflow_pipeface.config

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
11

2+
params.vep_db = '/g/data/if89/datalib/vep/112/grch38/'
3+
params.revel_db = '/g/data/xy86/revel/1.3/grch38/new_tabbed_revel_grch38.tsv.gz'
4+
params.gnomad_db = '/g/data/xy86/gnomad/genomes/v4.1.0/gnomad.joint.v4.1.sites.chrall.vcf.gz'
5+
params.clinvar_db = '/g/data/xy86/clinvar/2024-08-25/grch38/clinvar_20240825.vcf.gz'
6+
params.cadd_snv_db = '/g/data/xy86/cadd/1.7/grch38/whole_genome_SNVs.tsv.gz'
7+
params.cadd_indel_db = '/g/data/xy86/cadd/1.7/grch38/gnomad.genomes.r4.0.indel.tsv.gz'
8+
params.spliceai_snv_db = '/g/data/xy86/spliceai/v1.3/grch38/spliceai_scores.raw.snv.hg38.vcf.gz'
9+
params.spliceai_indel_db = '/g/data/xy86/spliceai/v1.3/grch38/spliceai_scores.raw.indel.hg38.vcf.gz'
10+
params.alphamissense_db = '/g/data/xy86/alphamissense/grch38/AlphaMissense_hg38.tsv.gz'
11+
212
process {
313

414
executor = 'pbspro'
515
project = 'kr68'
6-
storage = 'gdata/if89+scratch/kr68+gdata/kr68+gdata/ox63'
16+
storage = 'gdata/if89+gdata/xy86+scratch/kr68+gdata/kr68+gdata/ox63'
717
// provide proper access to if89 environmental modules
8-
beforeScript = 'module use -a /g/data/if89/apps/modulefiles'
18+
beforeScript = 'module use -a /g/data/if89/apps/modulefiles && module use -a /g/data/if89/shpcroot/modules'
919

1020
withName: scrape_settings {
1121
queue = 'normal'
@@ -33,7 +43,7 @@ process {
3343
withName: minimap2 {
3444
queue = 'normal'
3545
cpus = '16'
36-
time = '10h'
46+
time = '14h'
3747
memory = '64GB'
3848
module = 'minimap2/2.28:samtools/1.19'
3949
}
@@ -49,21 +59,30 @@ process {
4959
withName: clair3 {
5060
queue = 'normal'
5161
cpus = '32'
52-
time = '6h'
62+
time = '9h'
5363
memory = '128GB'
54-
module = 'clair3/v1.0.9:htslib/1.16'
64+
module = 'clair3/v1.0.9'
5565
}
5666

5767
withName: deepvariant {
5868
queue = 'gpuvolta'
5969
cpus = '24'
6070
gpus = '2'
61-
time = '6h'
62-
memory = '180GB'
63-
module = 'parabricks/4.2.1:htslib/1.16'
71+
time = '8h'
72+
memory = '192GB'
73+
disk = '80GB'
74+
module = 'singularity'
75+
}
76+
77+
withName: vep_snv {
78+
queue = 'normal'
79+
cpus = '32'
80+
time = '10h'
81+
memory = '128GB'
82+
module = 'singularity:htslib/1.16:ensemblorg/ensembl-vep/release_112.0'
6483
}
6584

66-
withName: 'whatshap_phase_clair3|whatshap_phase_dv|whatshap_haplotag' {
85+
withName: 'whatshap_phase|whatshap_haplotag' {
6786
queue = 'normal'
6887
cpus = '4'
6988
time = '10h'
@@ -87,7 +106,7 @@ process {
87106
module = 'cuteSV/1.0.13:htslib/1.16'
88107
}
89108

90-
withName: 'publish_settings|publish_bam_header|publish_minimap2|publish_clair3|publish_deepvariant|publish_whatshap_phase_clair3|publish_whatshap_phase_dv|publish_whatshap_haplotag|publish_sniffles|publish_cutesv' {
109+
withName: 'publish_settings|publish_bam_header|publish_depth|publish_whatshap_phase|publish_whatshap_haplotag|publish_sniffles|publish_cutesv' {
91110
queue = 'normal'
92111
cpus = '1'
93112
time = '20m'

config/parameters_pipeface.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
"tandem_repeat": "",
88
"snp_indel_caller": "",
99
"sv_caller": "",
10-
"outdir": ""
10+
"annotate": "",
11+
"outdir": "",
12+
"deepvariant_container": ""
1113

1214
}
13-

docs/database_versions.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
vep-cache: homo_sapiens/112/GRCh38
2+
REVEL database: 1.3
3+
gnomAD database: 4.1.0
4+
ClinVar database: 2024-08-25
5+
CADD SNV database: 1.7/GRCh38
6+
CADD indel database: 1.7/GRCh38
7+
SpliceAI SNV database: 1.3
8+
SpliceAI indel database: 1.3
9+
AlphaMissense database: GRCh38

0 commit comments

Comments
 (0)