Skip to content

Commit 3469fa0

Browse files
Merge pull request #278 from PacificBiosciences/develop-v3
v3.2.0
2 parents 7ac6634 + fa30371 commit 3469fa0

12 files changed

Lines changed: 220 additions & 196 deletions

File tree

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS
2424

2525
This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized:
2626

27-
1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.1.1):
27+
1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.2.0):
2828

2929
```bash
30-
wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.1.1/hifi-human-wgs-singleton.zip
31-
wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.1.1/hifi-human-wgs-family.zip
30+
wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.2.0/hifi-human-wgs-singleton.zip
31+
wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.2.0/hifi-human-wgs-family.zip
3232
```
3333

3434
2) Clone the repository and initialize the submodules:
3535

3636
```bash
3737
git clone \
38-
--depth 1 --branch v3.1.1 \
38+
--depth 1 --branch v3.2.0 \
3939
--recursive \
4040
https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git
4141
```

docs/family.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,9 @@ flowchart TD
3939
filter_fail_reads["filter fail_reads alignments (if fail_reads provided)"]
4040
end
4141
samtools_merge["samtools merge"]
42-
samtools_merge_fail_reads["samtools merge hifi_reads and fail_reads"]
4342
mosdepth["mosdepth"]
4443
paraphase["Paraphase"]
4544
mitorsaw["MitorSaw"]
46-
trgt["TRGT"]
47-
trgt_dropouts["TR coverage dropouts"]
4845
deepvariant["DeepVariant"]
4946
sawfish_discover["Sawfish discover"]
5047
end
@@ -56,6 +53,8 @@ flowchart TD
5653
end
5754
subgraph "`**Phasing and Downstream**`"
5855
hiphase["HiPhase"]
56+
samtools_merge_fail_reads["samtools merge hifi_reads and fail_reads"]
57+
trgt["TRGT"]
5958
bam_stats["BAM stats"]
6059
bcftools_roh["bcftools roh"]
6160
bcftools_stats["bcftools stats\n(small variants)"]
@@ -89,13 +88,13 @@ flowchart TD
8988
deepvariant --> sawfish_discover
9089
deepvariant --> glnexus
9190
sawfish_discover --> sawfish_call
92-
trgt --> hiphase
9391
9492
glnexus --> split_glnexus
9593
sawfish_call --> split_sawfish
9694
split_glnexus --> hiphase
9795
split_sawfish --> hiphase
9896
97+
hiphase --> trgt
9998
hiphase --> bam_stats
10099
hiphase --> bcftools_roh
101100
hiphase --> bcftools_stats

docs/singleton.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,17 @@ flowchart TD
3737
filter_fail_reads["filter fail_reads alignments (if fail_reads provided)"]
3838
end
3939
samtools_merge["samtools merge"]
40-
samtools_merge_fail_reads["samtools merge hifi_reads and fail_reads"]
4140
mosdepth["mosdepth"]
4241
paraphase["Paraphase"]
4342
mitorsaw["MitorSaw"]
44-
trgt["TRGT"]
45-
trgt_dropouts["TR coverage dropouts"]
4643
deepvariant["DeepVariant"]
4744
sawfish_discover["Sawfish discover"]
4845
sawfish_call["Sawfish call"]
4946
end
5047
subgraph "`**Phasing and Downstream**`"
5148
hiphase["HiPhase"]
49+
samtools_merge_fail_reads["samtools merge phased hifi_reads and aligned fail_reads"]
50+
trgt["TRGT"]
5251
bam_stats["BAM stats"]
5352
bcftools_roh["bcftools roh"]
5453
bcftools_stats["bcftools stats\n(small variants)"]
@@ -77,8 +76,8 @@ flowchart TD
7776
deepvariant --> sawfish_discover
7877
deepvariant --> hiphase
7978
sawfish_discover --> sawfish_call --> hiphase
80-
trgt --> hiphase
8179
80+
hiphase --> trgt
8281
hiphase --> bam_stats
8382
hiphase --> bcftools_roh
8483
hiphase --> bcftools_stats

docs/tools_containers.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont
1414
| pbmm2 | <ul><li>pbmm2 1.17.0</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) |
1515
| mosdepth | <ul><li>mosdepth 0.3.9</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) |
1616
| sawfish | <ul><li>sawfish 2.2.1</li><li>sawshark 0.3.0</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a9e9414ca16b5b25443b4352603551871d5683f3/docker/sawfish) | [sawfish@sha256:18ba096219fea38d6b32f5706fb794a05cc5d1d6cc16e2a09e3a13d62d8181d4](https://quay.io/repository/pacbio/sawfish/manifest/sha256:18ba096219fea38d6b32f5706fb794a05cc5d1d6cc16e2a09e3a13d62d8181d4) |
17-
| trgt | <ul><li>trgt 4.0.0</li><li>`/opt/scripts/check_trgt_coverage.py` 0.1.0</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5614951abe48eab926a5246ffb843b4f7816d47/docker/trgt) | [trgt@sha256:7511072d0f57396b1b99c7e0c08934db417138b6b4ce5d93c4974115faab2a0d](https://quay.io/repository/pacbio/trgt/manifest/sha256:7511072d0f57396b1b99c7e0c08934db417138b6b4ce5d93c4974115faab2a0d) |
17+
| trgt | <ul><li>trgt 5.0.0</li><li>`/opt/scripts/find_trgt_dropouts.py` 0.3.0</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d9c818f3547e8c33cc6a9f1a65e311ec26db8569/docker/trgt) | [trgt@sha256:be0ed7c173d221bd84e360b2b056e2abbecadd07ed86ffd4883a5cecca7a1e57](https://quay.io/repository/pacbio/trgt/manifest/sha256:be0ed7c173d221bd84e360b2b056e2abbecadd07ed86ffd4883a5cecca7a1e57) |
1818
| hiphase | <ul><li>hiphase 1.5.0</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) |
1919
| mitorsaw | <ul><li>mitorsaw 0.2.4</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6f6cf280c8ac0b76dd1d08bd830347b0b8ca9cea/docker/mitorsaw) | [mitorsaw@sha256:d0e47fb84e6e962f01a754d1052a24e550694646c0d4afb056c0e3fd7ace7a0d](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:d0e47fb84e6e962f01a754d1052a24e550694646c0d4afb056c0e3fd7ace7a0d) |
2020
| paraphase | <ul><li>paraphase 3.4.0</li><li>minimap 2.28</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f25a5f465af066496e955e642e284cc45f378a76/docker/paraphase) | [paraphase@sha256:7e70bbc6666a33af9253f2df15dbbd57a7a031d40b166a02b58bf003d9932c4c](https://quay.io/repository/pacbio/paraphase/manifest/sha256:7e70bbc6666a33af9253f2df15dbbd57a7a031d40b166a02b58bf003d9932c4c) |

docs/trgt.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,26 @@ chr4 39348424 39348483 ID=CANVAS_RFC1;MOTIFS=AAGGG,ACAGG,AGGGC,
1111
chr9 69037270 69037304 ID=FRDA_FXN;MOTIFS=A,GAA;STRUC=<TR>;INCLUDE_FAIL_READS
1212
chr13 102161574 102161726 ID=SCA27B_FGF14;MOTIFS=GAA,GAAGGA,GAAGAAGAAGAAGCA,AAGGAG;STRUC=<TR>;INCLUDE_FAIL_READS
1313
```
14+
15+
## Detecting coverage dropouts
16+
17+
Dropouts in coverage at TRGT catalog loci may indicate the presence of large expansions that are not fully spanned by HiFi reads. To detect such dropouts, we run a script (`find_trgt_dropouts.py`) after TRGT genotyping to compares the observed coverage at each locus with a fixed threshold (2 reads per expected haplotype) and reports abnormal coverage in `*.trgt.dropouts.txt`. The columns in this file are as follows:
18+
19+
- chrom
20+
- start
21+
- end
22+
- trid
23+
- expected_ploidy
24+
- hap1_count
25+
- hap2_count
26+
- unphased_count
27+
- fail_read_count
28+
- dropout (FullDropout, HaplotypeDropout, PhasingDropout)
29+
30+
The `fail_read_count` column indicates the number of `fail_reads` that aligned to the locus. This can help interpret dropouts at loci where `fail_reads` were included for genotyping.
31+
32+
The dropout column can be interpreted as follows:
33+
34+
- FullDropout: total HiFi read count (hap1 + hap2 + unphased) is less than `expected_ploidy * 2`
35+
- HaplotypeDropout: one expected haplotype has fewer than 2 HiFi reads
36+
- PhasingDropout: the total HiFi read depth is at least `expected_ploidy * 2`, but both haplotypes have fewer than 2 reads

image_manifest.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de7
1212
quay.io/pacbio/sawfish@sha256:18ba096219fea38d6b32f5706fb794a05cc5d1d6cc16e2a09e3a13d62d8181d4
1313
quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa
1414
quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba
15-
quay.io/pacbio/trgt@sha256:7511072d0f57396b1b99c7e0c08934db417138b6b4ce5d93c4974115faab2a0d
15+
quay.io/pacbio/trgt@sha256:be0ed7c173d221bd84e360b2b056e2abbecadd07ed86ffd4883a5cecca7a1e57
1616
quay.io/pacbio/wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136
1717
google/deepvariant:1.9.0
1818
google/deepvariant:1.9.0-gpu

wdl-ci.config.json

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,7 +1516,7 @@
15161516
"tasks": {
15171517
"trgt": {
15181518
"key": "trgt",
1519-
"digest": "x2lnix7cnso6qjljqx6ckhbrz3lcpimm",
1519+
"digest": "tt7gtlnx7haowm2ibfhpcwcd2rzxgdiq",
15201520
"tests": [
15211521
{
15221522
"inputs": {
@@ -1526,6 +1526,8 @@
15261526
"ref_fasta": "${ref_fasta}",
15271527
"ref_index": "${ref_index}",
15281528
"trgt_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed",
1529+
"expected_male_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XY.bed",
1530+
"expected_female_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XX.bed",
15291531
"out_prefix": "HG002.GRCh38",
15301532
"runtime_attributes": "${default_runtime_attributes}"
15311533
},
@@ -1545,6 +1547,14 @@
15451547
"check_gzip"
15461548
]
15471549
},
1550+
"dropouts": {
1551+
"value": "${resources_file_path}/coverage_dropouts/HG002.GRCh38.trgt.dropouts.txt",
1552+
"test_tasks": [
1553+
"compare_file_basename",
1554+
"check_tab_delimited",
1555+
"count_columns"
1556+
]
1557+
},
15481558
"stat_genotyped_count": {
15491559
"value": "9163",
15501560
"test_tasks": [
@@ -1568,6 +1578,8 @@
15681578
"ref_fasta": "${ref_fasta}",
15691579
"ref_index": "${ref_index}",
15701580
"trgt_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed",
1581+
"expected_male_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XY.bed",
1582+
"expected_female_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XX.bed",
15711583
"out_prefix": "HG002.GRCh38",
15721584
"runtime_attributes": "${default_runtime_attributes}"
15731585
},
@@ -1605,7 +1617,7 @@
16051617
},
16061618
"trgt_merge": {
16071619
"key": "trgt_merge",
1608-
"digest": "kfoep64umfynj4jfl7gnfmdoejazpspl",
1620+
"digest": "7kymqh7t5dmhjxjvksyx2xq4hnqwupn2",
16091621
"tests": [
16101622
{
16111623
"inputs": {
@@ -1636,31 +1648,6 @@
16361648
}
16371649
}
16381650
]
1639-
},
1640-
"coverage_dropouts": {
1641-
"key": "coverage_dropouts",
1642-
"digest": "eyhyifvmfx5g5tcvujinmiu64pb6h4wf",
1643-
"tests": [
1644-
{
1645-
"inputs": {
1646-
"aligned_bam": "${resources_file_path}/trgt/input/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam",
1647-
"aligned_bam_index": "${resources_file_path}/trgt/input/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam.bai",
1648-
"trgt_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed",
1649-
"out_prefix": "HG002.GRCh38",
1650-
"runtime_attributes": "${default_runtime_attributes}"
1651-
},
1652-
"output_tests": {
1653-
"dropouts": {
1654-
"value": "${resources_file_path}/coverage_dropouts/HG002.GRCh38.trgt.dropouts.txt",
1655-
"test_tasks": [
1656-
"compare_file_basename",
1657-
"check_tab_delimited",
1658-
"count_columns"
1659-
]
1660-
}
1661-
}
1662-
}
1663-
]
16641651
}
16651652
}
16661653
},

0 commit comments

Comments
 (0)