diff --git a/.dockstore.yml b/.dockstore.yml index 5395b64b..611f5a6a 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -1,20 +1,6 @@ version: 1.2 workflows: - - name: HiFi-human-WGS-WDL - subclass: WDL - primaryDescriptorPath: /workflows/main.wdl - readMePath: /README.md - authors: - - orcid: 0000-0001-5921-2022 # Juniper Lake - - orcid: 0000-0001-7628-5645 # Gregory Concepcion - - orcid: 0000-0003-1183-0432 # Aaron Wenger - - orcid: 0000-0002-7422-1194 # William Rowell - - orcid: 0000-0002-5507-0896 # Heather Ward - - orcid: 0009-0001-0205-4614 # Karen Fang - latestTagAsDefault: False - filters: - tags: [ /v1\..*dockstore/ ] - name: HiFi-human-WGS-WDL-singleton subclass: WDL primaryDescriptorPath: /workflows/singleton.wdl diff --git a/.gitignore b/.gitignore index ec1f41d8..076e2057 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ tests miniwdl_singularity_cache miniwdl_download_cache miniwdl_call_cache -miniwdl_test_output \ No newline at end of file +miniwdl_test_output + +.venv \ No newline at end of file diff --git a/GRCh38.ref_map.v2p0p0.template.tsv b/GRCh38.ref_map.v2p0p0.template.tsv deleted file mode 100644 index 3a095aaf..00000000 --- a/GRCh38.ref_map.v2p0p0.template.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/GRCh38.ref_map.v3p0p0.template.tsv b/GRCh38.ref_map.v3p0p0.template.tsv new file mode 100644 index 00000000..0551a94a --- /dev/null +++ b/GRCh38.ref_map.v3p0p0.template.tsv @@ -0,0 +1,10 @@ +name GRCh38 +fasta /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz +sawfish_exclude_bed /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/GRCh38.tertiary_map.v2p0p0.template.tsv b/GRCh38.tertiary_map.v2p0p0.template.tsv deleted file mode 100644 index 86b7b3b6..00000000 --- a/GRCh38.tertiary_map.v2p0p0.template.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js /hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff /hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi \ No newline at end of file diff --git a/GRCh38.tertiary_map.v3p0p0.template.tsv b/GRCh38.tertiary_map.v3p0p0.template.tsv new file mode 100644 index 00000000..c66c7e4d --- /dev/null +++ b/GRCh38.tertiary_map.v3p0p0.template.tsv @@ -0,0 +1,12 @@ +slivar_js /hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff /hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt +slivar_gnotate_files /hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi \ No newline at end of file diff --git a/README.md b/README.md index dd7b1fca..0e7ed824 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized: -1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v2.1.2): +1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.2): ```bash - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v2.1.2/hifi-human-wgs-singleton.zip - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v2.1.2/hifi-human-wgs-family.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.2/hifi-human-wgs-singleton.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.2/hifi-human-wgs-family.zip ``` 2) Clone the repository and initialize the submodules: ```bash git clone \ - --depth 1 --branch v2.1.2 \ + --depth 1 --branch v3.0.2 \ --recursive \ https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git ``` @@ -127,7 +127,7 @@ At a high level, we have two types of inputs files: The resource bundle containing the GRCh38 reference and other files used in this workflow can be downloaded from Zenodo: -[10.5281/zenodo.14027047](https://zenodo.org/records/14027047) +[10.5281/zenodo.15750792](https://zenodo.org/records/15750792) # Tool versions and Docker images diff --git a/backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv deleted file mode 100644 index d3254267..00000000 --- a/backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed s3:///hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf s3:///hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index s3:///hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv new file mode 100644 index 00000000..dcecb60c --- /dev/null +++ b/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv @@ -0,0 +1,10 @@ +name GRCh38 +fasta s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +trgt_tandem_repeat_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz +sawfish_exclude_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf s3:///hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv deleted file mode 100644 index 8bbb9b1d..00000000 --- a/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js s3:///hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff s3:///hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup s3:///hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup s3:///hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv new file mode 100644 index 00000000..08f84222 --- /dev/null +++ b/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv @@ -0,0 +1,12 @@ +slivar_js s3:///hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff s3:///hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt +slivar_gnotate_files s3:///hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/aws-healthomics/family.healthomics.inputs.json b/backends/aws-healthomics/family.healthomics.inputs.json index 8df91c16..d728bb12 100644 --- a/backends/aws-healthomics/family.healthomics.inputs.json +++ b/backends/aws-healthomics/family.healthomics.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "s3:///GRCh38.ref_map.v2p0p0.aws.tsv", - "humanwgs_family.tertiary_map_file": "s3:///GRCh38.tertiary_map.v2p0p0.aws.tsv", + "humanwgs_family.ref_map_file": "s3:///GRCh38.ref_map.v3p0p0.aws.tsv", + "humanwgs_family.tertiary_map_file": "s3:///GRCh38.tertiary_map.v3p0p0.aws.tsv", "humanwgs_family.backend": "AWS-HealthOmics", "humanwgs_family.container_registry": "String", "humanwgs_family.preemptible": true diff --git a/backends/aws-healthomics/singleton.healthomics.inputs.json b/backends/aws-healthomics/singleton.healthomics.inputs.json index c852db3e..81254e47 100644 --- a/backends/aws-healthomics/singleton.healthomics.inputs.json +++ b/backends/aws-healthomics/singleton.healthomics.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "s3:///GRCh38.ref_map.v2p0p0.aws.tsv", - "humanwgs_singleton.tertiary_map_file": "s3:///GRCh38.tertiary_map.v2p0p0.aws.tsv", + "humanwgs_singleton.ref_map_file": "s3:///GRCh38.ref_map.v3p0p0.aws.tsv", + "humanwgs_singleton.tertiary_map_file": "s3:///GRCh38.tertiary_map.v3p0p0.aws.tsv", "humanwgs_singleton.backend": "AWS-HealthOmics", "humanwgs_singleton.container_registry": "String", "humanwgs_singleton.preemptible": true diff --git a/backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv b/backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv deleted file mode 100644 index 0e10c7d8..00000000 --- a/backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv b/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv new file mode 100644 index 00000000..cf4863a8 --- /dev/null +++ b/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv @@ -0,0 +1,10 @@ +name GRCh38 +fasta https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +trgt_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz +sawfish_exclude_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv b/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv deleted file mode 100644 index f3ce0508..00000000 --- a/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv b/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv new file mode 100644 index 00000000..08558fd8 --- /dev/null +++ b/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv @@ -0,0 +1,12 @@ +slivar_js https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt +slivar_gnotate_files https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/azure/family.azure.inputs.json b/backends/azure/family.azure.inputs.json index d0f4841a..f668d98b 100644 --- a/backends/azure/family.azure.inputs.json +++ b/backends/azure/family.azure.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.azure.tsv", - "humanwgs_family.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.azure.tsv", + "humanwgs_family.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.azure.tsv", + "humanwgs_family.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.azure.tsv", "humanwgs_family.backend": "Azure", "humanwgs_family.preemptible": "Boolean" } \ No newline at end of file diff --git a/backends/azure/singleton.azure.inputs.json b/backends/azure/singleton.azure.inputs.json index 91fa25b9..14e14c2f 100644 --- a/backends/azure/singleton.azure.inputs.json +++ b/backends/azure/singleton.azure.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.azure.tsv", - "humanwgs_singleton.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.azure.tsv", + "humanwgs_singleton.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.azure.tsv", + "humanwgs_singleton.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.azure.tsv", "humanwgs_singleton.backend": "Azure", "humanwgs_singleton.preemptible": "Boolean" } \ No newline at end of file diff --git a/backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv b/backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv deleted file mode 100644 index 2d63bdea..00000000 --- a/backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv b/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv new file mode 100644 index 00000000..d135ca6f --- /dev/null +++ b/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv @@ -0,0 +1,10 @@ +name GRCh38 +fasta gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +trgt_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz +sawfish_exclude_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv b/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv deleted file mode 100644 index bf5d42a9..00000000 --- a/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv b/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv new file mode 100644 index 00000000..332bd89f --- /dev/null +++ b/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv @@ -0,0 +1,12 @@ +slivar_js gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt +slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/gcp/family.gcp.inputs.json b/backends/gcp/family.gcp.inputs.json index 48823812..e26e8fcd 100644 --- a/backends/gcp/family.gcp.inputs.json +++ b/backends/gcp/family.gcp.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.gcp.tsv", - "humanwgs_family.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.gcp.tsv", + "humanwgs_family.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.gcp.tsv", + "humanwgs_family.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.gcp.tsv", "humanwgs_family.backend": "GCP", "humanwgs_family.zones": "String", "humanwgs_family.preemptible": "Boolean" diff --git a/backends/gcp/singleton.gcp.inputs.json b/backends/gcp/singleton.gcp.inputs.json index fe9b8b6d..7a1de84e 100644 --- a/backends/gcp/singleton.gcp.inputs.json +++ b/backends/gcp/singleton.gcp.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.gcp.tsv", - "humanwgs_singleton.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.gcp.tsv", + "humanwgs_singleton.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.gcp.tsv", + "humanwgs_singleton.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.gcp.tsv", "humanwgs_singleton.backend": "GCP", "humanwgs_singleton.zones": "String", "humanwgs_singleton.preemptible": "Boolean" diff --git a/backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv b/backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv deleted file mode 100644 index 3a095aaf..00000000 --- a/backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv b/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv new file mode 100644 index 00000000..0551a94a --- /dev/null +++ b/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv @@ -0,0 +1,10 @@ +name GRCh38 +fasta /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz +sawfish_exclude_bed /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv b/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv deleted file mode 100644 index f7a2d2f3..00000000 --- a/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js /hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff /hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv b/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv new file mode 100644 index 00000000..383428f5 --- /dev/null +++ b/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv @@ -0,0 +1,12 @@ +slivar_js /hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff /hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt +slivar_gnotate_files /hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/hpc/family.hpc.inputs.json b/backends/hpc/family.hpc.inputs.json index 44a70568..31ef66b8 100644 --- a/backends/hpc/family.hpc.inputs.json +++ b/backends/hpc/family.hpc.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "/dataset/GRCh38.ref_map.v2p0p0.hpc.tsv", - "humanwgs_family.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v2p0p0.hpc.tsv", + "humanwgs_family.ref_map_file": "/dataset/GRCh38.ref_map.v3p0p0.hpc.tsv", + "humanwgs_family.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v3p0p0.hpc.tsv", "humanwgs_family.backend": "HPC", "humanwgs_family.preemptible": true } \ No newline at end of file diff --git a/backends/hpc/singleton.hpc.inputs.json b/backends/hpc/singleton.hpc.inputs.json index 885accf1..ffabbe87 100644 --- a/backends/hpc/singleton.hpc.inputs.json +++ b/backends/hpc/singleton.hpc.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "/dataset/GRCh38.ref_map.v2p0p0.hpc.tsv", - "humanwgs_singleton.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v2p0p0.hpc.tsv", + "humanwgs_singleton.ref_map_file": "/dataset/GRCh38.ref_map.v3p0p0.hpc.tsv", + "humanwgs_singleton.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v3p0p0.hpc.tsv", "humanwgs_singleton.backend": "HPC", "humanwgs_singleton.preemptible": true } \ No newline at end of file diff --git a/docs/backend-gcp.md b/docs/backend-gcp.md index dca0abbf..692913a2 100644 --- a/docs/backend-gcp.md +++ b/docs/backend-gcp.md @@ -20,6 +20,10 @@ gcloud compute zones list | grep For example, the zones in region `us-central1` are `"us-central1-a us-central1-b us-central1c us-central1f"`. +#### Setting the optional cpuPlatform parameter + +Some GCP zones, for example `me-central1`, lack the n1 nodes used by many tasks in the workflow. As a workaround, you can specify the minimum cpu platform to be used by the workflow to `"Intel Cascade Lake"`. There is no need to specify the `cpuPlatform` input unless you encounter this issue. + ## Running the workflow via Google's genomics Pipelines API [Cromwell's documentation](https://cromwell.readthedocs.io/en/stable/tutorials/PipelinesApi101/) on getting started with Google's genomics Pipelines API can be used as an example for how to run the workflow. diff --git a/docs/backend-hpc.md b/docs/backend-hpc.md index e7c2883e..9f562765 100644 --- a/docs/backend-hpc.md +++ b/docs/backend-hpc.md @@ -48,6 +48,8 @@ See [the inputs section of the singleton README](./singleton.md#inputs) for more miniwdl run workflows/singleton.wdl --input ``` +If your compute nodes cannot contact the internet, you can use the script at [`./scripts/populate_miniwdl_singularity_cache.sh`](../scripts/populate_miniwdl_singularity_cache.sh) with the image manifest at [`./image_manifest.txt`](../image_manifest.txt) to populate the miniwdl singularity cache with the required images from a login node with internet access. + #### Running via Cromwell ```bash @@ -56,14 +58,14 @@ cromwell run workflows/singleton.wdl --input ## Reference data bundle -[10.5281/zenodo.14027047](https://zenodo.org/records/14027047) +[10.5281/zenodo.15750792](https://zenodo.org/records/15750792) -Reference data is hosted on Zenodo at [10.5281/zenodo.14027047](https://zenodo.org/record/14027047). Download the reference data bundle and extract it to a location on your HPC, then update the input template file with the path to the reference data. +Reference data is hosted on Zenodo at [10.5281/zenodo.15750792](https://zenodo.org/record/15750792). Download the reference data bundle and extract it to a location on your HPC, then update the input template file with the path to the reference data. ```bash ## download the reference data bundle -wget https://zenodo.org/record/14027047/files/hifi-wdl-resources-v2.0.0.tar +wget https://zenodo.org/record/15750792/files/hifi-wdl-resources-v3.0.0.tar ## extract the reference data bundle and rename as dataset -tar -xvf hifi-wdl-resources-v2.0.0.tar +tar -xvf hifi-wdl-resources-v3.0.0.tar ``` diff --git a/docs/backends.md b/docs/backends.md index 1819695a..393f3a36 100644 --- a/docs/backends.md +++ b/docs/backends.md @@ -1,3 +1,3 @@ - [hpc](./backend-hpc.md) - [azure](./backend-azure.md) -- [gcp](./backend-gcp.md) \ No newline at end of file +- [gcp](./backend-gcp.md) diff --git a/docs/family.md b/docs/family.md index a9fa639c..d2e532b4 100644 --- a/docs/family.md +++ b/docs/family.md @@ -9,7 +9,6 @@ - [Alignments, Coverage, and QC](#alignments-coverage-and-qc) - [Small Variants (\<50 bp)](#small-variants-50-bp) - [Structural Variants (≥50 bp)](#structural-variants-50-bp) - - [Copy Number Variants (≥100 kb)](#copy-number-variants-100-kb) - [Tandem Repeat Genotyping](#tandem-repeat-genotyping) - [Variant Phasing](#variant-phasing) - [Variant Calling in Dark Regions](#variant-calling-in-dark-regions) @@ -24,49 +23,82 @@ title: family.wdl --- flowchart TD - subgraph "`**Upstream of Phasing (per-sample)**`" + subgraph "`**Upstream of Phasing\n(per-sample)**`" subgraph "per-movie" - ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] - pbmm2_align --> pbsv_discover["PBSV discover"] + ubam[/"HiFi uBAM"/] + pbmm2_align["pbmm2 align"] end - pbmm2_align --> merge_read_stats["merge read statistics"] - pbmm2_align --> samtools_merge["samtools merge"] - samtools_merge --> mosdepth["mosdepth"] - samtools_merge --> paraphase["Paraphase"] - samtools_merge --> hificnv["HiFiCNV"] - samtools_merge --> trgt["TRGT"] - samtools_merge --> trgt_dropouts["TR coverage dropouts"] - samtools_merge --> deepvariant["DeepVariant"] - samtools_merge --> hiphase["HiPhase"] + samtools_merge["samtools merge"] + mosdepth["mosdepth"] + paraphase["Paraphase"] + mitorsaw["MitorSaw"] + trgt["TRGT"] + trgt_dropouts["TR coverage dropouts"] + deepvariant["DeepVariant"] + sawfish_discover["Sawfish discover"] end subgraph "`**Joint Calling**`" - deepvariant --> glnexus["GLnexus (joint-call small variants)"] - pbsv_discover --> pbsv_call["PBSV call"] - glnexus --> split_glnexus["split small variant vcf by sample"] - pbsv_call --> split_pbsv["split SV vcf by sample"] + glnexus["GLnexus (joint-call small variants)"] + sawfish_call["Sawfish call"] + split_glnexus["split small variant vcf by sample"] + split_sawfish["split SV vcf by sample"] end - subgraph "`**Phasing and Downstream (per-sample)**`" - split_glnexus --> hiphase - trgt --> hiphase - split_pbsv --> hiphase - hiphase --> bcftools_roh["bcftools roh"] - hiphase --> bcftools_stats["bcftools stats\n(small variants)"] - hiphase --> sv_stats["SV stats"] - hiphase --> cpg_pileup["5mCpG pileup"] - hiphase --> starphase["StarPhase"] - hiphase --> pharmcat["PharmCat"] - starphase --> pharmcat + subgraph "`**Phasing and Downstream**`" + hiphase["HiPhase"] + bam_stats["BAM stats"] + bcftools_roh["bcftools roh"] + bcftools_stats["bcftools stats\n(small variants)"] + sv_stats["SV stats"] + cpg_pileup["5mCpG pileup"] + starphase["StarPhase"] + pharmcat["PharmCat"] end subgraph " " - hiphase --> merge_small_variants["bcftools merge small variants"] - hiphase --> merge_svs["bcftools merge SV"] - hiphase --> trgt_merge["trgt merge"] + merge_small_variants["bcftools merge small variants"] + merge_svs["bcftools merge SV"] + trgt_merge["trgt merge"] end subgraph "`**Tertiary Analysis**`" - merge_small_variants --> slivar_small_variants["slivar small variants"] - merge_svs --> svpack["svpack filter and annotate"] - svpack --> slivar_svpack["slivar svpack tsv"] + slivar_small_variants["slivar small variants"] + svpack["svpack filter and annotate"] + slivar_svpack["slivar svpack tsv"] end + + ubam --> pbmm2_align --> samtools_merge + samtools_merge --> mosdepth + samtools_merge --> paraphase + samtools_merge --> mitorsaw + samtools_merge --> trgt + samtools_merge --> trgt_dropouts + samtools_merge --> deepvariant + samtools_merge --> sawfish_discover + samtools_merge --> hiphase + deepvariant --> sawfish_discover + deepvariant --> glnexus + sawfish_discover --> sawfish_call + trgt --> hiphase + + glnexus --> split_glnexus + sawfish_call --> split_sawfish + split_glnexus --> hiphase + split_sawfish --> hiphase + + hiphase --> bam_stats + hiphase --> bcftools_roh + hiphase --> bcftools_stats + hiphase --> sv_stats + hiphase --> cpg_pileup + hiphase --> starphase + hiphase --> pharmcat + starphase --> pharmcat + + hiphase --> merge_small_variants + hiphase --> merge_svs + hiphase --> trgt_merge + + merge_small_variants --> slivar_small_variants + merge_svs --> svpack + svpack --> slivar_svpack ``` ## Inputs @@ -82,6 +114,7 @@ flowchart TD | Boolean | gpu | Use GPU when possible

Default: `false` | [GPU support](./gpu.md#gpu-support) | | String | backend | Backend where the workflow will be executed

`["GCP", "Azure", "AWS-HealthOmics", "HPC"]` | | | String? | zones | Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'. | [Determining available zones in GCP](./backends.md/gcp#determining-available-zones) | +| String? | cpuPlatform | Minimum CPU platform to use for tasks on GCP | Optional, only necessary in certain zones lacking n1 nodes. | | String? | gpuType | GPU type to use; required if gpu is set to `true` for cloud backends; must match backend | [Available GPU types](./gpu.md#gpu-types) | | String? | container_registry | Container registry where workflow images are hosted.

Default: `"quay.io/pacbio"` | If omitted, [PacBio's public Quay.io registry](https://quay.io/organization/pacbio) will be used.

Custom container_registry must be set if backend is set to 'AWS-HealthOmics'. | | Boolean | preemptible | Where possible, run tasks preemptibly

`[true, false]`

Default: `true` | If set to `true`, run tasks preemptibly where possible. If set to `false`, on-demand VMs will be used for every task. Ignored if backend is set to HPC. | @@ -116,6 +149,8 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | ---- | ---- | ----------- | ----- | | String | workflow_name | Workflow name | | | String | workflow_version | Workflow version | | +| Array\[String\] | msg | Messages from the workflow | | +| File | msg_file | File containing messages from the workflow | | | Array\[String\] | sample_ids | Sample IDs | | | File | stats_file | Table of summary statistics | | | Array\[File\] | bam_stats | BAM stats | Per-read length and read-quality | @@ -168,24 +203,24 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Array\[String\] | stat_sv_INS_count | Structural variant INS count | (PASS variants) | | Array\[String\] | stat_sv_INV_count | Structural variant INV count | (PASS variants) | | Array\[String\] | stat_sv_BND_count | Structural variant BND count | (PASS variants) | +| Array\[String\] | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) | +| File | sv_supporting_reads | Supporting reads for structural variants | | +| Array\[File\] | sv_copynum_bedgraph | CNV copy number BEDGraph | | +| Array\[File\] | sv_depth_bw | CNV depth BigWig | | +| Array\[File\] | sv_gc_bias_corrected_depth_bw | CNV GC-bias corrected depth BigWig | | +| Array\[File\] | sv_maf_bw | CNV MAF BigWig | | | Array\[File\] | bcftools_roh_out | ROH calling | `bcftools roh` | | Array\[File\] | bcftools_roh_bed | Generated from above, without filtering | | | File? | joint_sv_vcf | Joint-called structural variant VCF | | | File? | joint_sv_vcf_index | | | -### Copy Number Variants (≥100 kb) +### Mitochondrial variants and haplotypes | Type | Name | Description | Notes | | ---- | ---- | ----------- | ----- | -| Array\[File\] | cnv_vcf | CNV VCF | | -| Array\[File\] | cnv_vcf_index | Index for CNV VCF | | -| Array\[File\] | cnv_copynum_bedgraph | CNV copy number BEDGraph | | -| Array\[File\] | cnv_depth_bw | CNV depth BigWig | | -| Array\[File\] | cnv_maf_bw | CNV MAF BigWig | | -| Array\[String\] | stat_cnv_DUP_count | Count of DUP events | (for PASS variants) | -| Array\[String\] | stat_cnv_DEL_count | Count of DEL events | (PASS variants) | -| Array\[String\] | stat_cnv_DUP_sum | Sum of DUP bp | (PASS variants) | -| Array\[String\] | stat_cnv_DEL_sum | Sum of DEL bp | (PASS variants) | +| Array\[File\] | mitorsaw_vcf | Mitochondrial variant VCF | | +| Array\[File\] | mitorsaw_vcf_index | Index for mitochondrial variant VCF | | +| Array\[File\] | mitorsaw_hap_stats | Mitochondrial haplotype stats | | ### Tandem Repeat Genotyping @@ -215,9 +250,9 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Type | Name | Description | Notes | | ---- | ---- | ----------- | ----- | -| Array\[File\] | paraphase_output_json | Paraphase output JSON | | -| Array\[File\] | paraphase_realigned_bam | Paraphase realigned BAM | | -| Array\[File\] | paraphase_realigned_bam_index | | | +| Array\[File?\] | paraphase_output_json | Paraphase output JSON | | +| Array\[File?\] | paraphase_realigned_bam | Paraphase realigned BAM | | +| Array\[File?\] | paraphase_realigned_bam_index | | | | Array\[File?\] | paraphase_vcfs | Paraphase VCFs | Compressed as `.tar.gz` | ### 5mCpG Methylation Calling diff --git a/docs/pbmm2.md b/docs/pbmm2.md new file mode 100644 index 00000000..565c1558 --- /dev/null +++ b/docs/pbmm2.md @@ -0,0 +1,17 @@ +# pbmm2 alignment Subworkflow + +```mermaid +flowchart TD + hifi_reads[/"HiFi reads BAm"/] --> is_aligned{"is aligned?"} + is_aligned -- yes --> samtools_reset["samtools reset"] + is_aligned -- no --> has_kinetics{"kinetics?"} + has_kinetics -- yes --> samtools_reset + has_kinetics -- no --> count_records["count records"] + samtools_reset --> count_records + count_records --> compare_counts{"compare counts?"} + compare_counts -- yes --> chunk_bam["chunk BAM"] + compare_counts -- no --> pbmm2_align["pbmm2 align"] + chunk_bam --> pbmm2_align +``` + +This subworkflow checks an input BAM for evidence of alignment or kinetics. If it finds either of these, it strips alignment and kinetics information. Next, it counts the number of records in the BAM, and if chunking is enabled and the number of records is greater than `max_reads_per_chunk`, the BAM is split into chunks of no larger than `max_reads_per_chunk`. Finally, chunks are aligned to the reference with pbmm2. diff --git a/docs/ref_map.md b/docs/ref_map.md index 0d6c5e93..09e62be5 100644 --- a/docs/ref_map.md +++ b/docs/ref_map.md @@ -8,32 +8,9 @@ | File | pbsv_splits | Regions for pbsv parallelization | [below](#pbsv_splits) | | File | pbsv_tandem_repeat_bed | Tandem Repeat BED used by PBSV to normalize SVs within TRs | [link](https://github.com/PacificBiosciences/pbsv/tree/master/annotations) | | File | trgt_tandem_repeat_bed | Tandem Repeat catalog (BED) for TRGT genotyping | [link](https://github.com/PacificBiosciences/trgt/blob/main/docs/repeat_files.md) | -| File | hificnv_exclude_bed | Regions to be excluded by HIFICNV in gzipped BED format | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | -| File | hificnv_exclude_bed_index | BED index | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | -| File | hificnv_expected_bed_male | Expected allosome copy number BED for XY samples | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | -| File | hificnv_expected_bed_female | Expected allosome copy number BED for XX samples | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | +| File | sawfish_exclude_bed | Regions to be excluded for Sawfish CNV calls in gzipped BED format | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#cnv-excluded-regions) | +| File | sawfish_exclude_bed_index | BED index | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#cnv-excluded-regions) | +| File | sawfish_expected_bed_male | Expected allosome copy number BED for XY samples | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#expected-copy-number) | +| File | sawfish_expected_bed_female | Expected allosome copy number BED for XX samples | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#expected-copy-number) | | File | pharmcat_positions_vcf | PharmCAT positions VCF | | | File | pharmcat_positions_vcf_index | PharmCAT positions VCF index | | - -## pbsv_splits - -The `pbsv_splits` file is a JSON array of arrays of strings. Each inner array contains one or more chromosome names such that each inner array is of roughly equal size in base pairs. The inner arrays are processed in parallel. For example: - -```json -[ - ... - [ - "chr10", - "chr11" - ], - [ - "chr12", - "chr13" - ], - [ - "chr14", - "chr15" - ], - ... -] -``` diff --git a/docs/singleton.md b/docs/singleton.md index f5b9b7e6..b4bcff89 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -7,7 +7,6 @@ - [Alignments, Coverage, and QC](#alignments-coverage-and-qc) - [Small Variants (\<50 bp)](#small-variants-50-bp) - [Structural Variants (≥50 bp)](#structural-variants-50-bp) - - [Copy Number Variants (≥100 kb)](#copy-number-variants-100-kb) - [Tandem Repeat Genotyping](#tandem-repeat-genotyping) - [Variant Phasing](#variant-phasing) - [Variant Calling in Dark Regions](#variant-calling-in-dark-regions) @@ -24,37 +23,61 @@ title: singleton.wdl flowchart TD subgraph "`**Upstream of Phasing**`" subgraph "per-movie" - ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] - pbmm2_align --> pbsv_discover["PBSV discover"] + ubam[/"HiFi uBAM"/] + pbmm2_align["pbmm2 align"] end - pbmm2_align --> merge_read_stats["merge read statistics"] - pbmm2_align --> samtools_merge["samtools merge"] - samtools_merge --> mosdepth["mosdepth"] - samtools_merge --> paraphase["Paraphase"] - samtools_merge --> hificnv["HiFiCNV"] - samtools_merge --> trgt["TRGT"] - samtools_merge --> trgt_dropouts["TR coverage dropouts"] - samtools_merge --> deepvariant["DeepVariant"] - samtools_merge --> hiphase["HiPhase"] - pbsv_discover --> pbsv_call["PBSV call"] + samtools_merge["samtools merge"] + mosdepth["mosdepth"] + paraphase["Paraphase"] + mitorsaw["MitorSaw"] + trgt["TRGT"] + trgt_dropouts["TR coverage dropouts"] + deepvariant["DeepVariant"] + sawfish_discover["Sawfish discover"] + sawfish_call["Sawfish call"] end subgraph "`**Phasing and Downstream**`" - deepvariant --> hiphase - trgt --> hiphase - pbsv_call --> hiphase - hiphase --> bcftools_roh["bcftools roh"] - hiphase --> bcftools_stats["bcftools stats\n(small variants)"] - hiphase --> sv_stats["SV stats"] - hiphase --> cpg_pileup["5mCpG pileup"] - hiphase --> starphase["StarPhase"] - hiphase --> pharmcat["PharmCat"] - starphase --> pharmcat + hiphase["HiPhase"] + bam_stats["BAM stats"] + bcftools_roh["bcftools roh"] + bcftools_stats["bcftools stats\n(small variants)"] + sv_stats["SV stats"] + cpg_pileup["5mCpG pileup"] + starphase["StarPhase"] + pharmcat["PharmCat"] end subgraph "`**Tertiary Analysis**`" - hiphase --> slivar_small_variants["slivar small variants"] - hiphase --> svpack["svpack filter and annotate"] - svpack --> slivar_svpack["slivar svpack tsv"] + slivar_small_variants["slivar small variants"] + svpack["svpack filter and annotate"] + slivar_svpack["slivar svpack tsv"] end + + ubam --> pbmm2_align --> samtools_merge + samtools_merge --> mosdepth + samtools_merge --> paraphase + samtools_merge --> mitorsaw + samtools_merge --> trgt + samtools_merge --> trgt_dropouts + samtools_merge --> deepvariant + samtools_merge --> sawfish_discover + samtools_merge --> hiphase + deepvariant --> sawfish_discover + deepvariant --> hiphase + sawfish_discover --> sawfish_call --> hiphase + trgt --> hiphase + + hiphase --> bam_stats + hiphase --> bcftools_roh + hiphase --> bcftools_stats + hiphase --> sv_stats + hiphase --> cpg_pileup + hiphase --> starphase + hiphase --> pharmcat + starphase --> pharmcat + + hiphase --> slivar_small_variants + hiphase --> svpack + svpack --> slivar_svpack ``` ## Inputs @@ -70,6 +93,7 @@ flowchart TD | Boolean | gpu | Use GPU when possible

Default: `false` | [GPU support](./gpu.md#gpu-support) | | String | backend | Backend where the workflow will be executed

`["GCP", "Azure", "AWS-AGC", "AWS-HealthOmics", "HPC"]` | | | String? | zones | Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'. | [Determining available zones in GCP](./backends/gcp.md#determining-available-zones) | +| String? | cpuPlatform | Minimum CPU platform to use for tasks on GCP | Optional, only necessary in certain zones lacking n1 nodes. | | String? | gpuType | GPU type to use; required if gpu is set to `true` for cloud backends; must match backend | [Available GPU types](./gpu.md#gpu-types) | | String? | container_registry | Container registry where workflow images are hosted.

Default: `"quay.io/pacbio"` | If omitted, [PacBio's public Quay.io registry](https://quay.io/organization/pacbio) will be used.

Custom container_registry must be set if backend is set to 'AWS-HealthOmics'. | | Boolean | preemptible | Where possible, run tasks preemptibly

`[true, false]`

Default: `true` | If set to `true`, run tasks preemptibly where possible. If set to `false`, on-demand VMs will be used for every task. Ignored if backend is set to HPC. | @@ -82,6 +106,8 @@ flowchart TD | ---- | ---- | ----------- | ----- | | String | workflow_name | Workflow name | | | String | workflow_version | Workflow version | | +| Array\[String\] | msg | Messages from the workflow | | +| File | msg_file | File containing messages from the workflow | | | File | stats_file | Table of summary statistics | | | File | bam_stats | BAM stats | Per-read length and read-quality | | File | read_length_plot | Read length plot | | @@ -131,22 +157,22 @@ flowchart TD | String | stat_sv_INS_count | Structural variant INS count | (PASS variants) | | String | stat_sv_INV_count | Structural variant INV count | (PASS variants) | | String | stat_sv_BND_count | Structural variant BND count | (PASS variants) | +| String | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) | +| File | sv_supporting_reads | Supporting reads for structural variants | | +| File | sv_copynum_bedgraph | CNV copy number BEDGraph | | +| File | sv_depth_bw | CNV depth BigWig | | +| File | sv_gc_bias_corrected_depth_bw | CNV GC-bias corrected depth BigWig | | +| File | sv_maf_bw | CNV MAF BigWig | | | File | bcftools_roh_out | ROH calling | `bcftools roh` | | File | bcftools_roh_bed | Generated from above, without filtering | | -### Copy Number Variants (≥100 kb) +### Mitochondrial variants and haplotypes | Type | Name | Description | Notes | | ---- | ---- | ----------- | ----- | -| File | cnv_vcf | CNV VCF | | -| File | cnv_vcf_index | Index for CNV VCF | | -| File | cnv_copynum_bedgraph | CNV copy number BEDGraph | | -| File | cnv_depth_bw | CNV depth BigWig | | -| File | cnv_maf_bw | CNV MAF BigWig | | -| String | stat_cnv_DUP_count | Count of DUP events | (for PASS variants) | -| String | stat_cnv_DEL_count | Count of DEL events | (PASS variants) | -| String | stat_cnv_DUP_sum | Sum of DUP bp | (PASS variants) | -| String | stat_cnv_DEL_sum | Sum of DEL bp | (PASS variants) | +| File | mitorsaw_vcf | Mitochondrial variant VCF | | +| File | mitorsaw_vcf_index | Index for mitochondrial variant VCF | | +| File | mitorsaw_hap_stats | Mitochondrial haplotype stats | | ### Tandem Repeat Genotyping @@ -174,9 +200,9 @@ flowchart TD | Type | Name | Description | Notes | | ---- | ---- | ----------- | ----- | -| File | paraphase_output_json | Paraphase output JSON | | -| File | paraphase_realigned_bam | Paraphase realigned BAM | | -| File | paraphase_realigned_bam_index | | | +| File? | paraphase_output_json | Paraphase output JSON | | +| File? | paraphase_realigned_bam | Paraphase realigned BAM | | +| File? | paraphase_realigned_bam_index | | | | File? | paraphase_vcfs | Paraphase VCFs | Compressed as `.tar.gz` | ### 5mCpG Methylation Calling diff --git a/docs/tertiary.md b/docs/tertiary.md index ff158789..2442ed69 100644 --- a/docs/tertiary.md +++ b/docs/tertiary.md @@ -8,7 +8,7 @@ This is a simple, opinionated subworkflow for tertiary analysis in rare disease - We generate a pedigree describing sample relationships and phenotype status, based on the input provided to the entrypoint workflow. In the case of a singleton, the pedigree is a single row. - Using the comma-delimited list of HPO terms provided to the entrypoint workflow, we generate a Phenotype Rank (Phrank) lookup table, a simple two column lookup table mapping gene symbols to Phrank score. Phrank scores are positive real numbers (or null) such that higher scores indicate a gene is more likely to be relevant to the phenotypes. The Phrank lookup is used to prioritize variants based on the predicted impact on the gene and the gene's relevance to the phenotype. Phrank scores are not normalized, and providing more phenotypes for a sample will result in a higher maximum Phrank score. - Reference data is provided by the [`ref_map_file`](./ref_map.md) input. This workflow is currently only compatible with the GRCh38 human reference. -- Population data, other supplemental data, and allele thresholds are provided by the [`tertiary_map_file`](./tertiary_map.md) input. We provide a version of this file that uses population data from [gnomAD v4.1](https://gnomad.broadinstitute.org/news/2024-05-gnomad-v4-1-updates/) and [CoLoRSdb](https://colorsdb.org) v1.1.0 [10.5281/zenodo.13145123](https://zenodo.org/records/13145123). We provide the ability to tweak the allele thresholds, but the default values are recommended, as increasing these will result in much higher resource usage. +- Population data, other supplemental data, and allele thresholds are provided by the [`tertiary_map_file`](./tertiary_map.md) input. We provide a version of this file that uses population data from [gnomAD v4.1](https://gnomad.broadinstitute.org/news/2024-05-gnomad-v4-1-updates/) and [CoLoRSdb](https://colorsdb.org) v1.2.0 [10.5281/zenodo.14814308](https://zenodo.org/records/14814308). We provide the ability to tweak the allele thresholds, but the default values are recommended, as increasing these will result in much higher resource usage. ## Process diff --git a/docs/tools_containers.md b/docs/tools_containers.md index bd6026d2..e672426d 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -13,17 +13,17 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pb_wdl_base |
  • htslib 1.20
  • bcftools 1.20
  • samtools 1.20
  • bedtools 2.31.0
  • python3.9
  • numpy 1.24.24
  • pandas 2.0.3
  • matplotlib 3.7.5
  • seaborn 0.13.2
  • pysam 0.22.1
  • vcfpy 0.13.8
  • biopython 1.83
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/pb_wdl_base) | [sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87](https://quay.io/repository/pacbio/pb_wdl_base/manifest/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87) | | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | -| pbsv |
  • pbsv 2.11.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/89fe9bce4e056f487fa665f53ba1e9253701124e/docker/pbsv) | [pbsv@sha256:2134be37f71b5b2cb41f364736fe5ea14cf8e70403ba41af5005ce50b64086e4](https://quay.io/repository/pacbio/pbsv/manifest/sha256:2134be37f71b5b2cb41f364736fe5ea14cf8e70403ba41af5005ce50b64086e4) | -| trgt |
  • trgt 1.5.1
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/641ed67d29128381f27daeca9936fbc1e41bf58d/docker/trgt) | [trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b](https://quay.io/repository/pacbio/trgt/manifest/sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b) | -| hiphase |
  • hiphase 1.4.5
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/1051d12818e165a2145526e0b58f0ed0d0dc023a/docker/hiphase) | [hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad](https://quay.io/repository/pacbio/hiphase/manifest/sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad) | -| hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | -| paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | -| pbstarphase |
  • pbstarphase 1.1.0
  • Database 20250110
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c92f009d1214cfac0c636dae8b94cb330767fc53/docker/pbstarphase) | [pbstarphase@sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d) | +| sawfish |
  • sawfish 2.0.3
  • sawshark 0.3.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/124a1d97513ddf6caf2b4605832cccd904def609/docker/sawfish) | [sawfish@sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281](https://quay.io/repository/pacbio/sawfish/manifest/sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281) | +| trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | +| hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | +| mitorsaw |
  • mitorsaw 0.2.3
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/dd6d0b7c3953211ee0ea074283f42329998aeff7/docker/mitorsaw) | [mitorsaw@sha256:4a1eac52a6ae80b7ccfb0ad3809f5f34a69c7ed859541e097e36d73623e8ad0e](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:4a1eac52a6ae80b7ccfb0ad3809f5f34a69c7ed859541e097e36d73623e8ad0e) | +| paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | +| pbstarphase |
  • pbstarphase 1.4.1
  • Database 20250515
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5166b28e43f36a381450ba479e2e34a841bb922/docker/pbstarphase) | [pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | | slivar |
  • slivar 0.3.1
  • `/opt/scripts/add_comphet_phase.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/5e1094fd6755203b4971fdac6dcb951bbc098bed/docker/slivar) | [slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa](https://quay.io/repository/pacbio/slivar/manifest/sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa) | | svpack |
  • svpack 54b54db
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6fc750b0c65b4a5c1eb65791eab9eed89864d858/docker/svpack) | [svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba](https://quay.io/repository/pacbio/svpack/manifest/sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba) | -| deepvariant |
  • DeepVariant 1.6.1
| | [deepvariant:1.6.1](https://hub.docker.com/layers/google/deepvariant/1.6.1/images/sha256-ccab95548e6c3ec28c75232987f31209ff1392027d67732435ce1ba3d0b55c68) | -| deepvariant-gpu |
  • DeepVariant 1.6.1
| | [deepvariant:1.6.1-gpu](https://hub.docker.com/layers/google/deepvariant/1.6.1-gpu/images/sha256-7929c55106d3739daa18d52802913c43af4ca2879db29656056f59005d1d46cb) | +| deepvariant |
  • DeepVariant 1.9.0
| | [deepvariant:1.9.0](https://hub.docker.com/layers/google/deepvariant/1.9.0/images/sha256-07e95b34e40cc50074d23273d479934a27e80919ac75bd97bf39a731e3c2d6ad) | +| deepvariant-gpu |
  • DeepVariant 1.9.0
| | [deepvariant:1.9.0-gpu](https://hub.docker.com/layers/google/deepvariant/1.9.0-gpu/images/sha256-e0c8734b8700d945e3ee78d609acb90548f829c874596ffca436af8cf379f87a) | | pharmcat |
  • PharmCat 2.15.4
| | [pharmcat:2.15.4](https://hub.docker.com/layers/pgkb/pharmcat/2.15.4/images/sha256-5b58ae959b4cd85986546c2d67e3596f33097dedc40dfe57dd845b6e78781eb6) | | glnexus |
  • GLnexus 1.4.3
| | [glnexus:1.4.3](https://quay.io/repository/pacbio/glnexus/manifest/sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e) | diff --git a/image_manifest.txt b/image_manifest.txt index 66f4700f..5e199050 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,17 +1,18 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e -quay.io/pacbio/hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d -quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad +quay.io/pacbio/hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482 +quay.io/pacbio/mitorsaw@sha256:4a1eac52a6ae80b7ccfb0ad3809f5f34a69c7ed859541e097e36d73623e8ad0e quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 -quay.io/pacbio/paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e +quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 -quay.io/pacbio/pbstarphase@sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d -quay.io/pacbio/pbsv@sha256:2134be37f71b5b2cb41f364736fe5ea14cf8e70403ba41af5005ce50b64086e4 +quay.io/pacbio/pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4 +quay.io/pacbio/pbtk@sha256:67cd438ed9f343f90f058108170ddbff8fb1d9b5c193f4016be42b737ee2e73c quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 +quay.io/pacbio/sawfish@sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281 quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba -quay.io/pacbio/trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b +quay.io/pacbio/trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284 quay.io/pacbio/wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136 -google/deepvariant:1.6.1 -google/deepvariant:1.6.1-gpu +google/deepvariant:1.9.0 +google/deepvariant:1.9.0-gpu pgkb/pharmcat:2.15.4 diff --git a/scripts/create_image_manifest.sh b/scripts/create_image_manifest.sh index a1704f82..b367e455 100644 --- a/scripts/create_image_manifest.sh +++ b/scripts/create_image_manifest.sh @@ -10,9 +10,9 @@ grep '@sha' -h -r workflows/ \ | sort --unique \ > ./image_manifest.txt -deepvariant_version=$(grep -m1 'String deepvariant_version' workflows/singleton.wdl | tr -s ' ' | cut -f5 -d' ' | sed 's/"//g') +deepvariant_version=1.9.0 echo "google/deepvariant:${deepvariant_version}" >> ./image_manifest.txt echo "google/deepvariant:${deepvariant_version}-gpu" >> ./image_manifest.txt -pharmcat_version=$(grep -m1 'String pharmcat_version' workflows/singleton.wdl | tr -s ' ' | cut -f5 -d' ' | sed 's/"//g') +pharmcat_version=2.15.4 echo "pgkb/pharmcat:${pharmcat_version}" >> ./image_manifest.txt \ No newline at end of file diff --git a/scripts/populate_miniwdl_singularity_cache.sh b/scripts/populate_miniwdl_singularity_cache.sh new file mode 100644 index 00000000..3121c1bb --- /dev/null +++ b/scripts/populate_miniwdl_singularity_cache.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -eo pipefail + +USAGE="Given a manifest file with docker images, this script populates the Singularity cache with those images. +Usage: $0 " + + +# Check if the first argument is -h or --help +if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + echo -e "${USAGE}" + exit 0 +fi +# Check if at least two arguments are provided +if [ $# -lt 2 ]; then + echo -e "${USAGE}" + exit 1 +fi + +image_manifest_file=$1 +miniwdl_singularity_cache_dir=$2 +# Check if the image manifest file exists and is readable, and if the cache directory exists and is writable +[ -r "${image_manifest_file}" ] || (echo "${image_manifest_file} is not readable." >&2 && exit 1) +if [ ! -d "${miniwdl_singularity_cache_dir}" ]; then + echo "${miniwdl_singularity_cache_dir} does not exist. Creating it now..." + mkdir -p "${miniwdl_singularity_cache_dir}" || (echo "Could not create ${miniwdl_singularity_cache_dir}/." >&2 && exit 1) +fi +[ -w "${miniwdl_singularity_cache_dir}" ] || (echo "${miniwdl_singularity_cache_dir}/ is not writable" >&2 && exit 1) +singularity --version || (echo "singularity is not in path. Please install Singularity to use this script." >&2 && exit 1) + +# manifest file should contain one image per line, with no empty lines +# image lines should be in the format: : or @sha256: +# e.g., google/deepvariant:1.9.0 or quay.io/pacbio/some_image@sha256:abc123... +while read -r image; do + if [[ -n "${image}" ]]; then + image_url="docker://${image}" + # miniwdl singularity backend replaces ':' and '/' with '_' in the SIF file name + sif_path="${image_url//:/_}" + sif_path="${sif_path//\//_}" + sif_path="${miniwdl_singularity_cache_dir}/${sif_path}.sif" + if [ -f "${sif_path}" ]; then + echo "Singularity image already exists: ${sif_path}" >&2 + else + echo "Pulling Singularity image: ${image_url}" + singularity pull "${sif_path}" "${image_url}" + fi + fi +done < "${image_manifest_file}" diff --git a/wdl-ci.config.json b/wdl-ci.config.json index de19f1d8..277565e6 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,23 +37,48 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "nz7zrdaatasxka5rziksdmc2oq4tr23z", + "digest": "eq7doe46obnrz37la5oczwie53pzsaf6", "tests": [ { "inputs": { "vcf": "${resources_file_path}/slivar_small_variant/input/HG002-trio.GRCh38.small_variants.vcf.gz", "vcf_index": "${resources_file_path}/slivar_small_variant/input/HG002-trio.GRCh38.small_variants.vcf.gz.tbi", - "pedigree": "${resources_file_path}/slivar_small_variant/input/HG002-trio.ped", + "sample_metadata": [ + [ + "HG002-trio", + "HG002", + "HG003", + "HG004", + "1", + "2" + ], + [ + "HG002-trio", + "HG003", + ".", + ".", + "1", + "1" + ], + [ + "HG002-trio", + "HG004", + ".", + ".", + "2", + "1" + ] + ], "phrank_lookup": "${resources_file_path}/slivar_small_variant/input/HG002-trio_phrank.tsv", "reference": "${ref_fasta}", "reference_index": "${ref_index}", "gff": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz", - "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt", + "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof.gnomadv4p1.lookup", "clinvar_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt", "slivar_js": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js", "gnotate_files": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip", - "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip" + "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip" ], "af_expr": [ "INFO.gnomad_af <= 0.03", @@ -80,7 +105,7 @@ }, "output_tests": { "filtered_vcf": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.vcf.gz", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -88,7 +113,7 @@ ] }, "compound_het_vcf": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.vcf.gz", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -96,7 +121,7 @@ ] }, "filtered_tsv": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.tsv", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", @@ -104,7 +129,7 @@ ] }, "compound_het_tsv": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.tsv", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", @@ -117,26 +142,51 @@ }, "svpack_filter_annotated": { "key": "svpack_filter_annotated", - "digest": "lljobpfqb23lu2zablgfstcozrrny5xt", + "digest": "dogptxnqarr6sgsxs53l4npm7mtktpdi", "tests": [ { "inputs": { - "sv_vcf": "${resources_file_path}/svpack_filter_annotated/input/HG002-trio.GRCh38.structural_variants.vcf.gz", - "pedigree": "${resources_file_path}/svpack_filter_annotated/input/HG002-trio.ped", + "sv_vcf": "${resources_file_path}/svpack_filter_annotated/input/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.vcf.gz", + "sample_metadata": [ + [ + "HG002-trio", + "HG002", + "HG003", + "HG004", + "1", + "2" + ], + [ + "HG002-trio", + "HG003", + ".", + ".", + "1", + "1" + ], + [ + "HG002-trio", + "HG004", + ".", + ".", + "2", + "1" + ] + ], "population_vcfs": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz", - "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz" + "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz" ], "population_vcf_indices": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi", - "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi" + "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi" ], "gff": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "svpack_vcf": { - "value": "${resources_file_path}/svpack_filter_annotated/output/HG002-trio.GRCh38.structural_variants.svpack.vcf.gz", + "value": "${resources_file_path}/svpack_filter_annotated/output/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -149,20 +199,45 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "nyawjsg47zrbjodngljbicaww2mqsspc", + "digest": "tpp6xdwvkn22boyrs72t75y4fyrsjmv2", "tests": [ { "inputs": { - "filtered_vcf": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio.GRCh38.structural_variants.svpack.vcf.gz", - "pedigree": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio.ped", - "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt", + "filtered_vcf": "${resources_file_path}/slivar_svpack_tsv/input/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.vcf.gz", + "sample_metadata": [ + [ + "HG002-trio", + "HG002", + "HG003", + "HG004", + "1", + "2" + ], + [ + "HG002-trio", + "HG003", + ".", + ".", + "1", + "1" + ], + [ + "HG002-trio", + "HG004", + ".", + ".", + "2", + "1" + ] + ], + "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof.gnomadv4p1.lookup", "clinvar_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt", "phrank_lookup": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio_phrank.tsv", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "svpack_tsv": { - "value": "${resources_file_path}/slivar_svpack_tsv/output/HG002-trio.GRCh38.structural_variants.svpack.tsv", + "value": "${resources_file_path}/slivar_svpack_tsv/output/sawfish_new_lof/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", @@ -187,6 +262,113 @@ "description": "", "tasks": {} }, + "workflows/wdl-common/wdl/tasks/bam_stats.wdl": { + "key": "workflows/wdl-common/wdl/tasks/bam_stats.wdl", + "name": "", + "description": "", + "tasks": { + "bam_stats": { + "key": "bam_stats", + "digest": "orfdav6zki7q3pvp6vpikfns2fjdkzrx", + "tests": [ + { + "inputs": { + "sample_id": "HG002", + "ref_name": "${ref_name}", + "bam": "${resources_file_path}/inputs/HG002.GRCh38.haplotagged.bam", + "bam_index": "${resources_file_path}/inputs/HG002.GRCh38.haplotagged.bam.bai", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "bam_statistics": { + "value": "${resources_file_path}/bam_stats/output/HG002.GRCh38.bam_statistics.tsv.gz", + "test_tasks": [ + "compare_file_basename", + "check_tab_delimited", + "count_columns", + "check_gzip" + ] + }, + "read_length_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.read_length_histogram.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "read_quality_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.read_quality_histogram.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "mapq_distribution_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.GRCh38.mapq_distribution.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "mg_distribution_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.GRCh38.mg_distribution.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "stat_num_reads": { + "value": "27398", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_length_mean": { + "value": "14149.12", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_length_median": { + "value": "14666.5", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_quality_mean": { + "value": "35.91", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_quality_median": { + "value": "34.0", + "test_tasks": [ + "compare_string" + ] + }, + "stat_mapped_read_count": { + "value": "27398", + "test_tasks": [ + "compare_string" + ] + }, + "stat_mapped_percent": { + "value": "100.0", + "test_tasks": [ + "compare_string" + ] + } + } + } + ] + } + } + }, "workflows/wdl-common/wdl/tasks/bcftools.wdl": { "key": "workflows/wdl-common/wdl/tasks/bcftools.wdl", "name": "", @@ -194,7 +376,7 @@ "tasks": { "bcftools_stats_roh_small_variants": { "key": "bcftools_stats_roh_small_variants", - "digest": "abrbvw7exjpg4zshsttwosmgoo75f722", + "digest": "a2jlnfpnjbi7gmodm5ooyo2juxbgizzo", "tests": [ { "inputs": { @@ -343,7 +525,7 @@ }, "concat_pbsv_vcf": { "key": "concat_pbsv_vcf", - "digest": "xnv22objo4npixd7xx7rbiri7afensy6", + "digest": "dufpg3hbjkz4fmb53sdbok3hulivtier", "tests": [ { "inputs": { @@ -373,7 +555,7 @@ }, "split_vcf_by_sample": { "key": "split_vcf_by_sample", - "digest": "2hvcuqdk75vxcbe3svrz2je54o7x5umn", + "digest": "zfx6w6oiy3mzreyjkrxar7wigvygm3un", "tests": [ { "inputs": { @@ -430,6 +612,7 @@ "HG003.HG002-trio.GRCh38.structural_variants.chr6_10000000_20000000.vcf.gz.tbi", "HG004.HG002-trio.GRCh38.structural_variants.chr6_10000000_20000000.vcf.gz.tbi" ], + "exclude_uncalled": false, "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { @@ -451,7 +634,7 @@ }, "bcftools_merge": { "key": "bcftools_merge", - "digest": "7b7uqtr435y7abbnap6u5oo3jqlkyzpi", + "digest": "jrbphhh6bnjlyqyi6dxrmp7nreelbk4g", "tests": [ { "inputs": { @@ -535,28 +718,28 @@ }, "sv_stats": { "key": "sv_stats", - "digest": "sq4w257wawiwfuuquazhhuzlhdyiiwg3", + "digest": "y36kt5y2lun65yutrgqtspgheaezbbgh", "tests": [ { "inputs": { - "vcf": "${resources_file_path}/sv_stats/pbsv/HG002.GRCh38.pbsv.phased.vcf.gz", + "vcf": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.vcf.gz", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "stat_sv_DUP_count": { - "value": "2", + "value": "0", "test_tasks": [ "compare_string" ] }, "stat_sv_DEL_count": { - "value": "19", + "value": "17", "test_tasks": [ "compare_string" ] }, "stat_sv_INS_count": { - "value": "37", + "value": "46", "test_tasks": [ "compare_string" ] @@ -568,6 +751,12 @@ ] }, "stat_sv_BND_count": { + "value": "2", + "test_tasks": [ + "compare_string" + ] + }, + "stat_sv_SWAP_count": { "value": "0", "test_tasks": [ "compare_string" @@ -586,7 +775,7 @@ "tasks": { "cpg_pileup": { "key": "cpg_pileup", - "digest": "3gzayuvi7ky5t3kaup3f4sgsoqsxnafq", + "digest": "iuyxupdqsyivt4ozffri4qhh6ps2ihmp", "tests": [ { "inputs": { @@ -707,7 +896,7 @@ "tasks": { "glnexus": { "key": "glnexus", - "digest": "3qe7sprlc7n62p6ozbxkva2xkkn4snlq", + "digest": "o2kth2c3iwky74yz5bfpsjymil3sbzd7", "tests": [ { "inputs": { @@ -740,93 +929,6 @@ } } }, - "workflows/wdl-common/wdl/tasks/hificnv.wdl": { - "key": "workflows/wdl-common/wdl/tasks/hificnv.wdl", - "name": "", - "description": "", - "tasks": { - "hificnv": { - "key": "hificnv", - "digest": "kswxkn6zzlkuucuumxqbiaq5cx2sdiki", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "sex": "MALE", - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "vcf": "${resources_file_path}/inputs/HG002.GRCh38.small_variants.vcf.gz", - "vcf_index": "${resources_file_path}/inputs/HG002.GRCh38.small_variants.vcf.gz", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "exclude_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz", - "exclude_bed_index": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi", - "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", - "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "cnv_vcf": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - }, - "copynum_bedgraph": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.copynum.bedgraph", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_bed_columns" - ] - }, - "depth_bw": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.depth.bw", - "test_tasks": [ - "compare_file_basename", - "bigwig_validator" - ] - }, - "maf_bw": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.maf.bw", - "test_tasks": [ - "compare_file_basename", - "bigwig_validator" - ] - }, - "stat_DUP_count": { - "value": "0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_DUP_sum": { - "value": "0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_DEL_count": { - "value": "89", - "test_tasks": [ - "compare_string" - ] - }, - "stat_DEL_sum": { - "value": "2885534000", - "test_tasks": [ - "compare_string" - ] - } - } - } - ] - } - } - }, "workflows/wdl-common/wdl/tasks/hiphase.wdl": { "key": "workflows/wdl-common/wdl/tasks/hiphase.wdl", "name": "", @@ -834,7 +936,7 @@ "tasks": { "hiphase": { "key": "hiphase", - "digest": "evdi2klxze7sag3fs4p6g4h4ffqmiqzy", + "digest": "bi5osn4mwiklp3fre6mxcfpw5cmhjhcx", "tests": [ { "inputs": { @@ -910,46 +1012,6 @@ "count_columns", "check_gzip" ] - }, - "stat_phased_basepairs": { - "value": "8972304", - "test_tasks": [ - "compare_string" - ] - }, - "stat_phase_block_ng50": { - "value": "0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_mapped_read_count": { - "value": "27398", - "test_tasks": [ - "compare_string" - ] - }, - "stat_mapped_percent": { - "value": "100", - "test_tasks": [ - "compare_string" - ] - }, - "mapq_distribution_plot": { - "value": "${resources_file_path}/hiphase/output/HG002/HG002.GRCh38.mapq_distribution.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "mg_distribution_plot": { - "value": "${resources_file_path}/hiphase/output/HG002/HG002.GRCh38.mg_distribution.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] } } } @@ -957,263 +1019,59 @@ } } }, - "workflows/wdl-common/wdl/tasks/merge_bam_stats.wdl": { - "key": "workflows/wdl-common/wdl/tasks/merge_bam_stats.wdl", + "workflows/wdl-common/wdl/tasks/mosdepth.wdl": { + "key": "workflows/wdl-common/wdl/tasks/mosdepth.wdl", "name": "", "description": "", "tasks": { - "merge_bam_stats": { - "key": "merge_bam_stats", - "digest": "mjd6zpbxtabbulmq3kwhcx4cnubxaf74", + "mosdepth": { + "key": "mosdepth", + "digest": "4drmk2f7kwb57hftqv6udfy5fh4eol7d", "tests": [ { "inputs": { "sample_id": "HG002", - "bam_stats": [ - "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz" - ], + "ref_name": "${ref_name}", + "aligned_bam": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam", + "aligned_bam_index": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam.bai", + "infer_sex": true, "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { - "read_length_and_quality": { - "value": "${resources_file_path}/merge_bam_stats/one_input/HG002.read_length_and_quality.tsv.gz", + "summary": { + "value": "${resources_file_path}/mosdepth/infer_sex/HG002.GRCh38.mosdepth.summary.txt", "test_tasks": [ "compare_file_basename", "check_tab_delimited", "count_columns", - "check_gzip" + "calculate_md5sum" ] }, - "read_length_plot": { - "value": "${resources_file_path}/merge_bam_stats/one_input/HG002.read_length_histogram.png", + "region_bed": { + "value": "${resources_file_path}/mosdepth/infer_sex/HG002.GRCh38.regions.bed.gz", "test_tasks": [ - "calculate_md5sum", "compare_file_basename", - "png_validator" + "check_tab_delimited", + "count_bed_columns", + "check_gzip" ] }, - "read_quality_plot": { - "value": "${resources_file_path}/merge_bam_stats/one_input/HG002.read_quality_histogram.png", + "depth_distribution_plot": { + "value": "${resources_file_path}/mosdepth/infer_sex/HG002.GRCh38.depth_distribution.png", "test_tasks": [ "calculate_md5sum", "compare_file_basename", "png_validator" ] }, - "stat_num_reads": { - "value": "10000", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_mean": { - "value": "23508.73", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_median": { - "value": "22855.5", + "stat_mean_depth": { + "value": "0.07", "test_tasks": [ "compare_string" ] }, - "stat_read_quality_mean": { - "value": "26.97", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_median": { - "value": "27.0", - "test_tasks": [ - "compare_string" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam_stats": [ - "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz", - "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.read_length_and_quality.tsv.gz" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "read_length_and_quality": { - "value": "${resources_file_path}/merge_bam_stats/two_inputs/HG002.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - }, - "read_length_plot": { - "value": "${resources_file_path}/merge_bam_stats/two_inputs/HG002.read_length_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "read_quality_plot": { - "value": "${resources_file_path}/merge_bam_stats/two_inputs/HG002.read_quality_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "stat_num_reads": { - "value": "20000", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_mean": { - "value": "22673.07", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_median": { - "value": "22281.5", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_mean": { - "value": "29.32", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_median": { - "value": "29.0", - "test_tasks": [ - "compare_string" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam_stats": [ - "${resources_file_path}/pbmm2_align_wgs/vega_10k_no_rq/HG002.vega_10k.no_rq.hifi_reads.read_length_and_quality.tsv.gz" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "read_length_and_quality": { - "value": "${resources_file_path}/merge_bam_stats/no_rq/HG002.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - }, - "read_length_plot": { - "value": "${resources_file_path}/merge_bam_stats/no_rq/HG002.read_length_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "stat_num_reads": { - "value": "10000", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_mean": { - "value": "21837.4", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_median": { - "value": "21385.0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_mean": { - "value": "nan", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_median": { - "value": "nan", - "test_tasks": [ - "compare_string" - ] - } - } - } - ] - } - } - }, - "workflows/wdl-common/wdl/tasks/mosdepth.wdl": { - "key": "workflows/wdl-common/wdl/tasks/mosdepth.wdl", - "name": "", - "description": "", - "tasks": { - "mosdepth": { - "key": "mosdepth", - "digest": "57rmcwxx2lmhgupdu4unxkldevrr3n3u", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "ref_name": "${ref_name}", - "aligned_bam": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam", - "aligned_bam_index": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam.bai", - "infer_sex": true, - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "summary": { - "value": "${resources_file_path}/mosdepth/infer_sex/HG002.GRCh38.mosdepth.summary.txt", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "region_bed": { - "value": "${resources_file_path}/mosdepth/infer_sex/HG002.GRCh38.regions.bed.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_bed_columns", - "check_gzip" - ] - }, - "depth_distribution_plot": { - "value": "${resources_file_path}/mosdepth/infer_sex/HG002.GRCh38.depth_distribution.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "stat_mean_depth": { - "value": "0.07", - "test_tasks": [ - "compare_string" - ] - }, - "inferred_sex": { - "value": "MALE", + "inferred_sex": { + "value": "MALE", "test_tasks": [ "compare_string" ] @@ -1258,213 +1116,53 @@ "stat_mean_depth": { "value": "0.07", "test_tasks": [ - "compare_string" - ] - }, - "inferred_sex": { - "value": "", - "test_tasks": [ - "compare_string" - ] - } - } - } - ] - } - } - }, - "workflows/wdl-common/wdl/tasks/paraphase.wdl": { - "key": "workflows/wdl-common/wdl/tasks/paraphase.wdl", - "name": "", - "description": "", - "tasks": { - "paraphase": { - "key": "paraphase", - "digest": "iz5imdibf3z2jszierngsa43lubsthss", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.paraphase_regions.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.paraphase_regions.bam.bai", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "out_json": { - "value": "${resources_file_path}/paraphase/HG002.paraphase.json", - "test_tasks": [ - "compare_file_basename", - "check_json" - ] - }, - "bam": { - "value": "${resources_file_path}/paraphase/HG002.paraphase.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - }, - "vcfs_tar": { - "value": "${resources_file_path}/paraphase/HG002.paraphase_vcfs.tar.gz", - "test_tasks": [ - "compare_file_basename", - "check_gzip" - ] - } - } - } - ] - } - } - }, - "workflows/wdl-common/wdl/tasks/pbmm2.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pbmm2.wdl", - "name": "", - "description": "", - "tasks": { - "pbmm2_align_wgs": { - "key": "pbmm2_align_wgs", - "digest": "4pbv52jhlacjaylj5tgqzp43a77it5xh", - "tests": [ - { - "inputs": { - "sample_id": "HG00733", - "bam": "${resources_file_path}/inputs/sequelii_aligned_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_aligned_10k/HG00733.sequelii_aligned_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_aligned_10k/HG00733.sequelii_aligned_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG00133", - "bam": "${resources_file_path}/inputs/sequelii_kinetics_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG00133", - "bam": "${resources_file_path}/inputs/sequelii_kinetics_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "strip_kinetics": false, - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k_strip_kinetics_false/HG00133.sequelii_kinetics_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k_strip_kinetics_false/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam": "${resources_file_path}/inputs/vega_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" + "compare_string" ] }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.read_length_and_quality.tsv.gz", + "inferred_sex": { + "value": "", "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" + "compare_string" ] } } - }, + } + ] + } + } + }, + "workflows/wdl-common/wdl/tasks/mitorsaw.wdl": { + "key": "workflows/wdl-common/wdl/tasks/mitorsaw.wdl", + "name": "", + "description": "", + "tasks": { + "mitorsaw": { + "key": "mitorsaw", + "digest": "vzdinv7ullc2eobkrznr47mtjchvzuv4", + "tests": [ { "inputs": { - "sample_id": "HG002", - "bam": "${resources_file_path}/inputs/vega_10k.no_rq.hifi_reads.bam", + "aligned_bam": "${resources_file_path}/inputs/HG002-sprq.GRCh38.haplotagged.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG002-sprq.GRCh38.haplotagged.bam.bai", "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", - "ref_name": "${ref_name}", + "out_prefix": "HG002.GRCh38", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k_no_rq/HG002.vega_10k.no_rq.hifi_reads.GRCh38.aligned.bam", + "vcf": { + "value": "${resources_file_path}/mitorsaw/output/HG002.GRCh38.mitorsaw.vcf.gz", "test_tasks": [ "compare_file_basename", - "samtools_quickcheck" + "vcftools_validator", + "check_gzip" ] }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k_no_rq/HG002.vega_10k.no_rq.hifi_reads.read_length_and_quality.tsv.gz", + "hap_stats": { + "value": "${resources_file_path}/mitorsaw/output/HG002.GRCh38.mitorsaw.json", "test_tasks": [ "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" + "check_json" ] } } @@ -1473,40 +1171,44 @@ } } }, - "workflows/wdl-common/wdl/tasks/pbstarphase.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pbstarphase.wdl", + "workflows/wdl-common/wdl/tasks/paraphase.wdl": { + "key": "workflows/wdl-common/wdl/tasks/paraphase.wdl", "name": "", "description": "", "tasks": { - "pbstarphase_diplotype": { - "key": "pbstarphase_diplotype", - "digest": "u5goqyzkczomtamorb2oniq6altzayha", + "paraphase": { + "key": "paraphase", + "digest": "jzs4k5qgxgpjwv54mpw7vlf6nnlu2ki2", "tests": [ { "inputs": { "sample_id": "HG002", - "phased_vcf": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.small_variants.vcf.gz", - "phased_vcf_index": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.small_variants.vcf.gz.tbi", - "aligned_bam": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.bam", - "aligned_bam_index": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.bam.bai", + "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.paraphase_regions.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.paraphase_regions.bam.bai", "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "out_json": { - "value": "${resources_file_path}/pbstarphase_diplotype/output/HG002.pbstarphase.json", + "value": "${resources_file_path}/paraphase/HG002.paraphase.json", "test_tasks": [ "compare_file_basename", "check_json" ] }, - "pharmcat_tsv": { - "value": "${resources_file_path}/pbstarphase_diplotype/output/HG002.pharmcat.tsv", + "bam": { + "value": "${resources_file_path}/paraphase/HG002.paraphase.bam", "test_tasks": [ "compare_file_basename", - "check_tab_delimited", - "count_columns" + "samtools_quickcheck" + ] + }, + "vcfs_tar": { + "value": "${resources_file_path}/paraphase/HG002.paraphase_vcfs.tar.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" ] } } @@ -1515,112 +1217,42 @@ } } }, - "workflows/wdl-common/wdl/tasks/pbsv.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pbsv.wdl", + "workflows/wdl-common/wdl/tasks/pbstarphase.wdl": { + "key": "workflows/wdl-common/wdl/tasks/pbstarphase.wdl", "name": "", "description": "", "tasks": { - "pbsv_discover": { - "key": "pbsv_discover", - "digest": "whyivzym5tmmbwnlrne6l26w6rj3pyzj", - "tests": [ - { - "inputs": { - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "trf_bed": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "svsig": { - "value": "${resources_file_path}/pbsv_discover/HG002.GRCh38.chr6_10000000_20000000.svsig.gz", - "test_tasks": [ - "compare_file_basename", - "check_gzip", - "check_empty_lines" - ] - } - } - } - ] - }, - "pbsv_call": { - "key": "pbsv_call", - "digest": "whqgkxlukuxpojvpg6tz6rzqxmtw3lcd", + "pbstarphase_diplotype": { + "key": "pbstarphase_diplotype", + "digest": "qlxl34shcpfmx4dluqdogguhh2mcekda", "tests": [ { "inputs": { "sample_id": "HG002", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "shard_index": 5, - "regions": [ - "chr6" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/singleton/HG002.GRCh38.5.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz" - ], + "phased_small_variant_vcf": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.small_variants.vcf.gz", + "phased_small_variant_vcf_index": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.small_variants.vcf.gz.tbi", + "phased_structural_variant_vcf": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.structural_variants.vcf.gz", + "phased_structural_variant_vcf_index": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.structural_variants.vcf.gz.tbi", + "aligned_bam": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.bam", + "aligned_bam_index": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.bam.bai", "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", - "ref_name": "${ref_name}", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/singleton_no_shard/HG002.GRCh38.pbsv.vcf.gz", + "out_json": { + "value": "${resources_file_path}/pbstarphase_diplotype/output/pbstarphase_1.3/HG002.pbstarphase.json", "test_tasks": [ "compare_file_basename", - "vcftools_validator", - "check_gzip" + "check_json" ] - } - } - }, - { - "inputs": { - "sample_id": "HG002-trio", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz", - "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.svsig.gz", - "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "sample_count": 3, - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "shard_index": 5, - "regions": [ - "chr6" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/trio/HG002-trio.GRCh38.5.pbsv.vcf.gz", + }, + "pharmcat_tsv": { + "value": "${resources_file_path}/pbstarphase_diplotype/output/pbstarphase_1.3/HG002.pharmcat.tsv", "test_tasks": [ "compare_file_basename", - "vcftools_validator", - "check_gzip" + "check_tab_delimited", + "count_columns" ] } } @@ -1636,7 +1268,7 @@ "tasks": { "samtools_merge": { "key": "samtools_merge", - "digest": "lmc2sfyir2g2cile2vwmaatp46vzddbs", + "digest": "uenrqqsd3frv7cc4rriwaljrh2kpdq2d", "tests": [ { "inputs": { @@ -1649,7 +1281,7 @@ }, "output_tests": { "merged_bam": { - "value": "${resources_file_path}/samtools_merge/output/two_inputs/HG002HG00133.GRCh38.bam", + "value": "${resources_file_path}/samtools_merge/output/two_inputs_combine_rg_pg/HG002HG00133.GRCh38.bam", "test_tasks": [ "compare_file_basename", "samtools_quickcheck" @@ -1678,7 +1310,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "h4unpn2yf3cc23yiieabdgov7bm7pyip", + "digest": "knw5lxj7lvuspa7bfxum7zm3n5a5hjgo", "tests": [ { "inputs": { @@ -1767,7 +1399,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "yvjau74pm7ylqj2p47l2lyqyn35kozv7", + "digest": "jywsabnjsznsx6g6rxf7w5zmwee4fgkn", "tests": [ { "inputs": { @@ -1801,7 +1433,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "wkgohijal32smgigy7olmcrdxpmibpwt", + "digest": "rtq5s4pftxqs4d7xkpz3dyyct3a2seyx", "tests": [ { "inputs": { @@ -1833,7 +1465,7 @@ "tasks": { "split_string": { "key": "split_string", - "digest": "q7byrkraoxoid54g6rnw32zejprmobk6", + "digest": "s4v67veguw3zkfbixrfgn5foulf7kv4p", "tests": [ { "inputs": { @@ -1846,317 +1478,80 @@ "value": [ "hello", "world" - ], - "test_tasks": [ - "compare_string" - ] - } - } - } - ] - }, - "consolidate_stats": { - "key": "consolidate_stats", - "digest": "3w2ttlxevhy2vkao2q2tj7c6n5qilxni", - "tests": [ - { - "inputs": { - "id": "String", - "stats": { - "letters": [ - "a", - "b", - "c" - ], - "numbers": [ - "1", - "2", - "3" - ] - }, - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "output_tsv": { - "value": "${resources_file_path}/consolidate_stats/String.stats.txt", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns" - ] - } - } - } - ] - } - } - }, - "workflows/wdl-common/wdl/tasks/write_ped_phrank.wdl": { - "key": "workflows/wdl-common/wdl/tasks/write_ped_phrank.wdl", - "name": "", - "description": "", - "tasks": { - "write_ped_phrank": { - "key": "write_ped_phrank", - "digest": "d3b7uvydynagkxs2w546ozi7q6oa7cim", - "tests": [ - { - "inputs": { - "id": "HG002", - "sex": "MALE", - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton/HG002.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton/HG002_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002", - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton_no_sex/HG002.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton_no_sex/HG002_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "sex": "MALE", - "affected": true, - "father_id": "HG003", - "mother_id": "HG004", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG003", - "sex": "MALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG004", - "sex": "FEMALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio/HG002-trio_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "sex": "MALE", - "affected": true, - "mother_id": "HG004", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG004", - "sex": "FEMALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_father/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_father/HG002-trio_phrank.tsv", + ], "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" + "compare_string" ] } } - }, + } + ] + }, + "consolidate_stats": { + "key": "consolidate_stats", + "digest": "pot7gxel373bywwte62q3rc7qyb37jgb", + "tests": [ { "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "sex": "MALE", - "affected": true, - "father_id": "HG003", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG003", - "sex": "MALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } + "id": "String", + "stats": { + "letters": [ + "a", + "b", + "c" + ], + "numbers": [ + "1", + "2", + "3" ] }, - "phenotypes": "HP:0000001", + "msg_array": [ + "hello", + "", + "world" + ], "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_mother/HG002-trio.ped", + "output_tsv": { + "value": "${resources_file_path}/consolidate_stats/String.stats.txt", "test_tasks": [ "compare_file_basename", "check_tab_delimited", - "count_columns", - "calculate_md5sum" + "count_columns" ] }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_mother/HG002-trio_phrank.tsv", + "messages": { + "value": "${resources_file_path}/consolidate_stats/String.messages.txt", "test_tasks": [ "compare_file_basename", - "check_tab_delimited", - "count_columns", "calculate_md5sum" ] } } - }, + } + ] + } + } + }, + "workflows/wdl-common/wdl/tasks/write_phrank.wdl": { + "key": "workflows/wdl-common/wdl/tasks/write_phrank.wdl", + "name": "", + "description": "", + "tasks": { + "write_phrank": { + "key": "write_phrank", + "digest": "qkxmuzajagtxsk3lfl4nqif6i5zmgj6f", + "tests": [ { "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "affected": true, - "father_id": "HG003", - "mother_id": "HG004", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG003", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG004", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, "phenotypes": "HP:0000001", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_sex/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_sex/HG002-trio_phrank.tsv", + "value": "${resources_file_path}/write_ped_phrank/output/singleton/phrank.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", @@ -2183,7 +1578,7 @@ "tasks": { "deepvariant_make_examples": { "key": "deepvariant_make_examples", - "digest": "54kvpa3bz3cciywbjcwtjyb5k4ifpzhc", + "digest": "zcg7prtdcfoir3lj7kvsb2mssi4gcquh", "tests": [ { "inputs": { @@ -2199,19 +1594,19 @@ "task_start_index": 0, "tasks_per_shard": 8, "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1", + "docker_image": "google/deepvariant:1.9.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "example_tfrecord_tar": { - "value": "${resources_file_path}/deepvariant_make_examples/output/shard_0/HG002.0.example_tfrecords.tar.gz", + "value": "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" ] }, "nonvariant_site_tfrecord_tar": { - "value": "${resources_file_path}/deepvariant_make_examples/output/shard_0/HG002.0.nonvariant_site_tfrecords.tar.gz", + "value": "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.nonvariant_site_tfrecords.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -2223,29 +1618,29 @@ }, "deepvariant_call_variants_cpu": { "key": "deepvariant_call_variants_cpu", - "digest": "vapflxnbarkmdemttze7e6f4svrtkpmw", + "digest": "krunswnjbnsletxpge2egpzflynzjjyr", "tests": [ { "inputs": { "sample_id": "HG002", "ref_name": "${ref_name}", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.example_tfrecords.tar.gz" ], "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1", + "docker_image": "google/deepvariant:1.9.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "tfrecords_tar": { - "value": "${resources_file_path}/deepvariant_call_variants_cpu/output/HG002/HG002.GRCh38.call_variants_output.tar.gz", + "value": "${resources_file_path}/deepvariant_call_variants_cpu/output/v1p9p0/HG002.GRCh38.call_variants_output.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -2257,29 +1652,29 @@ }, "deepvariant_call_variants_gpu": { "key": "deepvariant_call_variants_gpu", - "digest": "4tcavedqaa6xpqet6ip4f5jxvrucjdcg", + "digest": "oruoeho4i2o5kuzfey4f6z5or22mvrhl", "tests": [ { "inputs": { "sample_id": "HG002", "ref_name": "${ref_name}", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.example_tfrecords.tar.gz" ], "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1-gpu", + "docker_image": "google/deepvariant:1.9.0-gpu", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "tfrecords_tar": { - "value": "${resources_file_path}/deepvariant_call_variants_gpu/output/HG002/HG002.GRCh38.call_variants_output.tar.gz", + "value": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p9p0/HG002.GRCh38.call_variants_output.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -2291,32 +1686,42 @@ }, "deepvariant_postprocess_variants": { "key": "deepvariant_postprocess_variants", - "digest": "xrdc7ay2sjrniggltopieawihwkunoil", + "digest": "7j2ndq2oubqpprkj3xqp7z32eszuqb44", "tests": [ { "inputs": { "sample_id": "HG002", - "tfrecords_tar": "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.GRCh38.call_variants_output.tar.gz", + "tfrecords_tar": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p9p0/HG002.GRCh38.call_variants_output.tar.gz", + "example_tfrecord_tars": [ + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.example_tfrecords.tar.gz" + ], "nonvariant_site_tfrecord_tars": [ - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.0.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.8.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.16.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.24.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.32.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.40.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.48.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.56.nonvariant_site_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.nonvariant_site_tfrecords.tar.gz" ], "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", "ref_name": "${ref_name}", "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1", + "docker_image": "google/deepvariant:1.9.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "vcf": { - "value": "${resources_file_path}/deepvariant_postprocess_variants/output/HG002/HG002.GRCh38.small_variants.vcf.gz", + "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p9p0/HG002.GRCh38.small_variants.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -2324,7 +1729,7 @@ ] }, "gvcf": { - "value": "${resources_file_path}/deepvariant_postprocess_variants/output/HG002/HG002.GRCh38.small_variants.g.vcf.gz", + "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p9p0/HG002.GRCh38.small_variants.g.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -2337,15 +1742,86 @@ } } }, - "workflows/wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl": { - "key": "workflows/wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl", + "workflows/wdl-common/wdl/workflows/pbmm2/pbmm2.wdl": { + "key": "workflows/wdl-common/wdl/workflows/pbmm2/pbmm2.wdl", "name": "", "description": "", "tasks": { - "read_pbsv_splits": { - "key": "read_pbsv_splits", - "digest": "", - "tests": [] + "pbmm2_align_wgs": { + "key": "pbmm2_align_wgs", + "digest": "kobmohrwnpy2pzlsd7l2qvoo7ulkxth2", + "tests": [ + { + "inputs": { + "sample_id": "HG002", + "bam": "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_00.bam", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "ref_name": "${ref_name}", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "aligned_bam": { + "value": "${resources_file_path}/pbmm2_align_wgs/split_vega_1k/HG002.vega_10k.hifi_reads.chunk_00.GRCh38.aligned.bam", + "test_tasks": [ + "compare_file_basename", + "samtools_quickcheck" + ] + } + } + } + ] + }, + "split_input_bam": { + "key": "split_input_bam", + "digest": "uzdcqs4d3i62rs62hwoa5pe65uv7l2cm", + "tests": [ + { + "inputs": { + "bam": "${resources_file_path}/inputs/vega_10k.hifi_reads.bam", + "max_reads_per_chunk": 1000, + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "chunks": { + "value": [ + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_00.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_01.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_02.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_03.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_04.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_05.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_06.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_07.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_08.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_09.bam" + ], + "test_tasks": [ + "compare_file_basename", + "samtools_quickcheck" + ] + } + } + }, + { + "inputs": { + "bam": "${resources_file_path}/inputs/sequelii_aligned_10k.hifi_reads.bam", + "max_reads_per_chunk": 0, + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "chunks": { + "value": [ + "${resources_file_path}/split_input_bam/output/aligned_input/sequelii_aligned_10k.hifi_reads.reset.bam" + ], + "test_tasks": [ + "compare_file_basename", + "samtools_quickcheck" + ] + } + } + } + ] } } }, @@ -2356,7 +1832,7 @@ "tasks": { "pharmcat_preprocess": { "key": "pharmcat_preprocess", - "digest": "2cfpsi2cznnpivx2ehkthn22rv7hkvdr", + "digest": "laipqonb5jd34wvmzmaja6rbm6ndlsn6", "tests": [ { "inputs": { @@ -2390,7 +1866,7 @@ }, "filter_preprocessed_vcf": { "key": "filter_preprocessed_vcf", - "digest": "4jjtxpixe6gq6p3v7lnuyxhlzottg2ib", + "digest": "juenwyp6ursmgzc7q3rjbzgqsjgstzzr", "tests": [ { "inputs": { @@ -2415,7 +1891,7 @@ }, "run_pharmcat": { "key": "run_pharmcat", - "digest": "nsabvlc6qz2u6y7ikis4pa4oljfanewp", + "digest": "oqllo6jjt64smy5j34odu7n7xiv5nawn", "tests": [ { "inputs": { @@ -2502,6 +1978,215 @@ ] } } + }, + "workflows/wdl-common/wdl/tasks/sawfish.wdl": { + "key": "workflows/wdl-common/wdl/tasks/sawfish.wdl", + "name": "", + "description": "", + "tasks": { + "sawfish_discover": { + "key": "sawfish_discover", + "digest": "w6ibz5424mjp5xgg3t3cd3gjklbjbsj3", + "tests": [ + { + "inputs": { + "sample_id": "HG002-minimal", + "sex": "MALE", + "aligned_bam": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam", + "aligned_bam_index": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "exclude_bed": "${datasets_file_path}/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz", + "exclude_bed_index": "${datasets_file_path}/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi", + "expected_male_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XY.bed", + "expected_female_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XX.bed", + "small_variant_vcf": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.GRCh38.small_variants.vcf.gz", + "small_variant_vcf_index": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.GRCh38.small_variants.vcf.gz.tbi", + "out_prefix": "HG002-minimal", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + } + ] + }, + "sawfish_call": { + "key": "sawfish_call", + "digest": "zm7w4vzg4ncahd3464imevnoihzvkn6q", + "tests": [ + { + "inputs": { + "sample_ids": [ + "HG002-minimal" + ], + "discover_tars": [ + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.tar" + ], + "aligned_bams": [ + "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam" + ], + "aligned_bam_indices": [ + "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam.bai" + ], + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "out_prefix": "HG002-minimal.GRCh38.structural_variants", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip", + "vcftools_validator" + ] + }, + "supporting_reads": { + "value": "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.supporting_reads.json.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" + ] + }, + "copynum_bedgraph": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.copynum.bedgraph" + ], + "test_tasks": [ + "compare_file_basename", + "check_tab_delimited", + "count_bed_columns" + ] + }, + "depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "gc_bias_corrected_depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.gc_bias_corrected_depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "maf_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.maf.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + } + } + }, + { + "inputs": { + "sample_ids": [ + "HG002-minimal", + "HG003-minimal", + "HG004-minimal" + ], + "discover_tars": [ + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.tar", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG003-minimal.tar", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG004-minimal.tar" + ], + "aligned_bams": [ + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG003-minimal.m84039_241002_000337_s3.hifi_reads.minimal.GRCh38.aligned.bam", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG004-minimal.m84039_241002_020632_s4.hifi_reads.minimal.GRCh38.aligned.bam" + ], + "aligned_bam_indices": [ + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam.bai", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG003-minimal.m84039_241002_000337_s3.hifi_reads.minimal.GRCh38.aligned.bam.bai", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG004-minimal.m84039_241002_020632_s4.hifi_reads.minimal.GRCh38.aligned.bam.bai" + ], + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "out_prefix": "HG002-trio.joint.GRCh38.structural_variants", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-trio.joint.GRCh38.structural_variants.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip", + "vcftools_validator" + ] + }, + "supporting_reads": { + "value": "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-trio.joint.GRCh38.structural_variants.supporting_reads.json.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" + ] + }, + "copynum_bedgraph": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.copynum.bedgraph", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.copynum.bedgraph", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.copynum.bedgraph" + ], + "test_tasks": [ + "compare_file_basename", + "check_tab_delimited", + "count_bed_columns" + ] + }, + "depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "gc_bias_corrected_depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.gc_bias_corrected_depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.gc_bias_corrected_depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.gc_bias_corrected_depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "maf_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.maf.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.maf.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.maf.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + } + } + } + ] + } + } } }, "engines": { @@ -2518,15 +2203,16 @@ "engine_params": { "pacbio-hpc": { "resources_file_path": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs", - "datasets_file_path": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs/hifi-wdl-resources-v2.0.0", - "ref_fasta": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", - "ref_index": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai", + "datasets_file_path": "/pbi/vast-collections/appslabht/analysis_workflow_inputs/hifi-wgs-wdl-resources/", + "ref_fasta": "/pbi/vast-collections/appslabht/analysis_workflow_inputs/hifi-wgs-wdl-resources/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", + "ref_index": "/pbi/vast-collections/appslabht/analysis_workflow_inputs/hifi-wgs-wdl-resources/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai", "default_runtime_attributes": { "backend": "HPC", "preemptible_tries": 0, "max_retries": 0, "zones": "", - "gpuType": "ampere", + "cpuPlatform": "", + "gpuType": "tesla", "container_registry": "quay.io/pacbio" }, "on_demand_runtime_attributes": { @@ -2534,7 +2220,8 @@ "preemptible_tries": 0, "max_retries": 0, "zones": "", - "gpuType": "ampere", + "cpuPlatform": "", + "gpuType": "tesla", "container_registry": "quay.io/pacbio" } } diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index 8c983756..494c110d 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -2,6 +2,7 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/hiphase.wdl" as Hiphase +import "../wdl-common/wdl/tasks/bam_stats.wdl" as Bamstats import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools import "../wdl-common/wdl/tasks/cpg_pileup.wdl" as Cpgpileup @@ -41,9 +42,6 @@ workflow downstream { aligned_bam_index: { name: "Aligned BAI" } - pharmcat_version: { - name: "PharmCAT version" - } pharmcat_min_coverage: { name: "Minimum coverage for PharmCAT" } @@ -68,7 +66,6 @@ workflow downstream { File aligned_bam File aligned_bam_index - String pharmcat_version Int pharmcat_min_coverage File ref_map_file @@ -106,6 +103,15 @@ workflow downstream { # hiphase.phased_vcfs[1] -> phased SV VCF # hiphase.phased_vcfs[2] -> phased TRGT VCF + call Bamstats.bam_stats { + input: + sample_id = sample_id, + ref_name = ref_map["name"], + bam = hiphase.haplotagged_bam, + bam_index = hiphase.haplotagged_bam_index, + runtime_attributes = default_runtime_attributes + } + call Trgt.coverage_dropouts { input: aligned_bam = hiphase.haplotagged_bam, @@ -142,14 +148,16 @@ workflow downstream { call Pbstarphase.pbstarphase_diplotype { input: - sample_id = sample_id, - phased_vcf = hiphase.phased_vcfs[0], - phased_vcf_index = hiphase.phased_vcf_indices[0], - aligned_bam = hiphase.haplotagged_bam, - aligned_bam_index = hiphase.haplotagged_bam_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - runtime_attributes = default_runtime_attributes + sample_id = sample_id, + phased_small_variant_vcf = hiphase.phased_vcfs[0], + phased_small_variant_vcf_index = hiphase.phased_vcf_indices[0], + phased_structural_variant_vcf = hiphase.phased_vcfs[1], + phased_structural_variant_vcf_index = hiphase.phased_vcf_indices[1], + aligned_bam = hiphase.haplotagged_bam, + aligned_bam_index = hiphase.haplotagged_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + runtime_attributes = default_runtime_attributes } call Pharmcat.pharmcat { @@ -162,7 +170,6 @@ workflow downstream { input_tsvs = [pbstarphase_diplotype.pharmcat_tsv], ref_fasta = ref_map["fasta"], # !FileCoercion ref_index = ref_map["fasta_index"], # !FileCoercion - pharmcat_version = pharmcat_version, pharmcat_positions = ref_map["pharmcat_positions_vcf"], # !FileCoercion pharmcat_positions_index = ref_map["pharmcat_positions_vcf_index"], # !FileCoercion pharmcat_min_coverage = pharmcat_min_coverage, @@ -184,11 +191,21 @@ workflow downstream { File phase_haplotags = hiphase.phase_haplotags String stat_phased_basepairs = hiphase.stat_phased_basepairs String stat_phase_block_ng50 = hiphase.stat_phase_block_ng50 - String stat_mapped_read_count = hiphase.stat_mapped_read_count - String stat_mapped_percent = hiphase.stat_mapped_percent - File mapq_distribution_plot = hiphase.mapq_distribution_plot - File mg_distribution_plot = hiphase.mg_distribution_plot - File trgt_coverage_dropouts = coverage_dropouts.dropouts + + # bam stats + File bam_statistics = bam_stats.bam_statistics + File read_length_plot = bam_stats.read_length_plot + File? read_quality_plot = bam_stats.read_quality_plot + File mapq_distribution_plot = bam_stats.mapq_distribution_plot + File mg_distribution_plot = bam_stats.mg_distribution_plot + String stat_num_reads = bam_stats.stat_num_reads + String stat_read_length_mean = bam_stats.stat_read_length_mean + String stat_read_length_median = bam_stats.stat_read_length_median + String stat_read_quality_mean = bam_stats.stat_read_quality_mean + String stat_read_quality_median = bam_stats.stat_read_quality_median + String stat_mapped_read_count = bam_stats.stat_mapped_read_count + String stat_mapped_percent = bam_stats.stat_mapped_percent + File trgt_coverage_dropouts = coverage_dropouts.dropouts # small variant stats File small_variant_stats = bcftools_stats_roh_small_variants.stats @@ -202,11 +219,12 @@ workflow downstream { File indel_distribution_plot = bcftools_stats_roh_small_variants.indel_distribution_plot # sv stats - String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count - String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count - String stat_sv_INS_count = sv_stats.stat_sv_INS_count - String stat_sv_INV_count = sv_stats.stat_sv_INV_count - String stat_sv_BND_count = sv_stats.stat_sv_BND_count + String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count + String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count + String stat_sv_INS_count = sv_stats.stat_sv_INS_count + String stat_sv_INV_count = sv_stats.stat_sv_INV_count + String stat_sv_BND_count = sv_stats.stat_sv_BND_count + String stat_sv_SWAP_count = sv_stats.stat_sv_SWAP_count # cpg_pileup outputs File? cpg_combined_bed = cpg_pileup.combined_bed diff --git a/workflows/downstream/inputs.json b/workflows/downstream/inputs.json index 80474908..c8907973 100644 --- a/workflows/downstream/inputs.json +++ b/workflows/downstream/inputs.json @@ -8,7 +8,6 @@ "downstream.trgt_vcf_index": "File", "downstream.aligned_bam": "File", "downstream.aligned_bam_index": "File", - "downstream.pharmcat_version": "String", "downstream.pharmcat_min_coverage": "Int", "downstream.ref_map_file": "File", "downstream.default_runtime_attributes": { @@ -17,6 +16,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/family.inputs.json b/workflows/family.inputs.json index ec555a3c..0768bb4b 100644 --- a/workflows/family.inputs.json +++ b/workflows/family.inputs.json @@ -16,16 +16,14 @@ }, "humanwgs_family.phenotypes": "String? (optional)", "humanwgs_family.ref_map_file": "File", - "humanwgs_family.deepvariant_version": "String (optional, default = \"1.6.1\")", - "humanwgs_family.custom_deepvariant_model_tar": "File? (optional)", - "humanwgs_family.pharmcat_version": "String (optional, default = \"2.15.0\")", - "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.tertiary_map_file": "File? (optional)", + "humanwgs_family.max_reads_per_alignment_chunk": "Int (optional, default = 500000)", + "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.glnexus_mem_gb": "Int? (optional)", - "humanwgs_family.pbsv_call_mem_gb": "Int? (optional)", "humanwgs_family.gpu": "Boolean (optional, default = false)", "humanwgs_family.backend": "String", "humanwgs_family.zones": "String? (optional)", + "humanwgs_family.cpuPlatform": "String? (optional)", "humanwgs_family.gpuType": "String? (optional)", "humanwgs_family.container_registry": "String? (optional)", "humanwgs_family.container_namespace": "String? (optional)", diff --git a/workflows/family.wdl b/workflows/family.wdl index 789e1396..e7e3932e 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -7,11 +7,9 @@ import "joint/joint.wdl" as Joint import "downstream/downstream.wdl" as Downstream import "wdl-common/wdl/tasks/bcftools.wdl" as Bcftools import "wdl-common/wdl/tasks/trgt.wdl" as Trgt -import "wdl-common/wdl/tasks/write_ped_phrank.wdl" as Write_ped_phrank import "tertiary/tertiary.wdl" as TertiaryAnalysis import "wdl-common/wdl/tasks/utilities.wdl" as Utilities - workflow humanwgs_family { meta { description: "PacBio HiFi human whole genome sequencing pipeline, with joint calling for related samples." @@ -21,33 +19,24 @@ workflow humanwgs_family { family: { name: "Family struct describing samples, relationships, and unaligned BAM paths" } + phenotypes: { + name: "Comma-delimited list of HPO codes for phenotypes" + } ref_map_file: { name: "TSV containing reference genome file paths; must match backend" } - deepvariant_version: { - name: "DeepVariant version" - } - custom_deepvariant_model_tar: { - name: "Custom DeepVariant model tarball" + tertiary_map_file: { + name: "TSV containing tertiary analysis file paths and thresholds; must match backend" } - pharmcat_version: { - name: "PharmCAT version" + max_reads_per_alignment_chunk: { + name: "Maximum reads per alignment chunk" } pharmcat_min_coverage: { name: "Minimum coverage for PharmCAT" } - phenotypes: { - name: "Comma-delimited list of HPO codes for phenotypes" - } - tertiary_map_file: { - name: "TSV containing tertiary analysis file paths and thresholds; must match backend" - } glnexus_mem_gb: { name: "Override GLnexus memory request (GB)" } - pbsv_call_mem_gb: { - name: "Override PBSV call memory request (GB)" - } gpu: { name: "Use GPU when possible" } @@ -58,6 +47,9 @@ workflow humanwgs_family { zones: { name: "Zones where compute will take place; required if backend is set to 'GCP'" } + cpuPlatform: { + help: "Optional minimum CPU platform to use for tasks on GCP" + } gpuType: { name: "GPU type to use; required if gpu is set to `true` for cloud backends; must match backend" } @@ -75,28 +67,21 @@ workflow humanwgs_family { input { Family family - File ref_map_file - - # These options are only intended for testing purposes. - # There is no guarantee that the pipeline will work with - # other version of DeepVariant or with custom models. - String deepvariant_version = "1.6.1" - File? custom_deepvariant_model_tar - - String pharmcat_version = "2.15.4" - Int pharmcat_min_coverage = 10 - String phenotypes = "HP:0000001" + + File ref_map_file File? tertiary_map_file + Int max_reads_per_alignment_chunk = 500000 + Int pharmcat_min_coverage = 10 Int? glnexus_mem_gb - Int? pbsv_call_mem_gb Boolean gpu = false # Backend configuration String backend String? zones + String? cpuPlatform String? gpuType String? container_registry @@ -109,6 +94,7 @@ workflow humanwgs_family { input: backend = backend, zones = zones, + cpuPlatform = cpuPlatform, gpuType = gpuType, container_registry = container_registry } @@ -119,20 +105,39 @@ workflow humanwgs_family { Boolean single_sample = length(family.samples) == 1 + Map[String, String] pedigree_sex = { + "MALE": "1", + "FEMALE": "2", + "": "." + } + scatter (sample in family.samples) { String sample_id = sample.sample_id + Boolean is_trio_kid = defined(sample.father_id) && defined(sample.mother_id) # !UnusedDeclaration + Boolean is_duo_kid = defined(sample.father_id) != defined(sample.mother_id) # !UnusedDeclaration + call Upstream.upstream { input: - sample_id = sample.sample_id, - sex = sample.sex, - hifi_reads = sample.hifi_reads, - ref_map_file = ref_map_file, - deepvariant_version = deepvariant_version, - custom_deepvariant_model_tar = custom_deepvariant_model_tar, - single_sample = single_sample, - gpu = gpu, - default_runtime_attributes = default_runtime_attributes + sample_id = sample.sample_id, + sex = sample.sex, + hifi_reads = sample.hifi_reads, + ref_map_file = ref_map_file, + max_reads_per_alignment_chunk = max_reads_per_alignment_chunk, + single_sample = single_sample, + gpu = gpu, + default_runtime_attributes = default_runtime_attributes } + + # write sample metadata similar to pedigree format + # family_id, sample_id, father_id, mother_id, sex, affected + Array[String] sample_metadata = [ + family.family_id, + sample.sample_id, + select_first([sample.father_id, "."]), + select_first([sample.mother_id, "."]), + pedigree_sex[upstream.inferred_sex], + if sample.affected then "2" else "1" + ] } if (!single_sample) { @@ -142,10 +147,11 @@ workflow humanwgs_family { sample_ids = sample_id, gvcfs = upstream.small_variant_gvcf, gvcf_indices = upstream.small_variant_gvcf_index, - svsigs = flatten(upstream.svsigs), + discover_tars = upstream.discover_tar, + aligned_bams = upstream.out_bam, + aligned_bam_indices = upstream.out_bam_index, ref_map_file = ref_map_file, glnexus_mem_gb = glnexus_mem_gb, - pbsv_call_mem_gb = pbsv_call_mem_gb, default_runtime_attributes = default_runtime_attributes } } @@ -162,53 +168,12 @@ workflow humanwgs_family { trgt_vcf_index = upstream.trgt_vcf_index[sample_index], aligned_bam = upstream.out_bam[sample_index], aligned_bam_index = upstream.out_bam_index[sample_index], - pharmcat_version = pharmcat_version, pharmcat_min_coverage = pharmcat_min_coverage, ref_map_file = ref_map_file, default_runtime_attributes = default_runtime_attributes } } - Map[String, Array[String]] stats = { - 'sample_id': sample_id, - 'num_reads': upstream.stat_num_reads, - 'read_length_mean': upstream.stat_read_length_mean, - 'read_length_median': upstream.stat_read_length_median, - 'read_quality_mean': upstream.stat_read_quality_mean, - 'read_quality_median': upstream.stat_read_quality_median, - 'mapped_read_count': downstream.stat_mapped_read_count, - 'mapped_percent': downstream.stat_mapped_percent, - 'mean_depth': upstream.stat_mean_depth, - 'inferred_sex': upstream.inferred_sex, - 'stat_phased_basepairs': downstream.stat_phased_basepairs, - 'phase_block_ng50': downstream.stat_phase_block_ng50, - 'cpg_combined_count': downstream.stat_combined_cpg_count, - 'cpg_hap1_count': downstream.stat_hap1_cpg_count, - 'cpg_hap2_count': downstream.stat_hap2_cpg_count, - 'SNV_count': downstream.stat_SNV_count, - 'TSTV_ratio': downstream.stat_TSTV_ratio, - 'HETHOM_ratio': downstream.stat_HETHOM_ratio, - 'INDEL_count': downstream.stat_INDEL_count, - 'sv_DUP_count': downstream.stat_sv_DUP_count, - 'sv_DEL_count': downstream.stat_sv_DEL_count, - 'sv_INS_count': downstream.stat_sv_INS_count, - 'sv_INV_count': downstream.stat_sv_INV_count, - 'sv_BND_count': downstream.stat_sv_BND_count, - 'cnv_DUP_count': upstream.stat_cnv_DUP_count, - 'cnv_DEL_count': upstream.stat_cnv_DEL_count, - 'cnv_DUP_sum': upstream.stat_cnv_DUP_sum, - 'cnv_DEL_sum': upstream.stat_cnv_DEL_sum, - 'trgt_genotyped_count': upstream.stat_trgt_genotyped_count, - 'trgt_uncalled_count': upstream.stat_trgt_uncalled_count - } - - call Utilities.consolidate_stats { - input: - id = family.family_id, - stats = stats, - runtime_attributes = default_runtime_attributes - } - if (!single_sample) { call Bcftools.bcftools_merge as merge_small_variant_vcfs { input: @@ -238,23 +203,12 @@ workflow humanwgs_family { } if (defined(tertiary_map_file)) { - scatter (sample in family.samples) { - Array[File] hifi_reads = sample.hifi_reads - } - - call Write_ped_phrank.write_ped_phrank { - input: - id = family.family_id, - family = family, - phenotypes = phenotypes, - disk_size = ceil(size(flatten(hifi_reads), "GB")) + 10, - runtime_attributes = default_runtime_attributes - } - call TertiaryAnalysis.tertiary_analysis { input: - pedigree = write_ped_phrank.pedigree, - phrank_lookup = write_ped_phrank.phrank_lookup, + sample_metadata = sample_metadata, + phenotypes = phenotypes, + is_trio_kid = is_trio_kid, + is_duo_kid = is_duo_kid, small_variant_vcf = select_first([merge_small_variant_vcfs.merged_vcf, downstream.phased_small_variant_vcf[0]]), small_variant_vcf_index = select_first([merge_small_variant_vcfs.merged_vcf_index, downstream.phased_small_variant_vcf_index[0]]), sv_vcf = select_first([merge_sv_vcfs.merged_vcf, downstream.phased_sv_vcf[0]]), @@ -265,28 +219,67 @@ workflow humanwgs_family { } } + Map[String, Array[String]] stats = { + 'sample_id': sample_id, + 'num_reads': downstream.stat_num_reads, + 'read_length_mean': downstream.stat_read_length_mean, + 'read_length_median': downstream.stat_read_length_median, + 'read_quality_mean': downstream.stat_read_quality_mean, + 'read_quality_median': downstream.stat_read_quality_median, + 'mapped_read_count': downstream.stat_mapped_read_count, + 'mapped_percent': downstream.stat_mapped_percent, + 'mean_depth': upstream.stat_mean_depth, + 'inferred_sex': upstream.inferred_sex, + 'stat_phased_basepairs': downstream.stat_phased_basepairs, + 'phase_block_ng50': downstream.stat_phase_block_ng50, + 'cpg_combined_count': downstream.stat_combined_cpg_count, + 'cpg_hap1_count': downstream.stat_hap1_cpg_count, + 'cpg_hap2_count': downstream.stat_hap2_cpg_count, + 'SNV_count': downstream.stat_SNV_count, + 'TSTV_ratio': downstream.stat_TSTV_ratio, + 'HETHOM_ratio': downstream.stat_HETHOM_ratio, + 'INDEL_count': downstream.stat_INDEL_count, + 'sv_DUP_count': downstream.stat_sv_DUP_count, + 'sv_DEL_count': downstream.stat_sv_DEL_count, + 'sv_INS_count': downstream.stat_sv_INS_count, + 'sv_INV_count': downstream.stat_sv_INV_count, + 'sv_SWAP_count': downstream.stat_sv_SWAP_count, + 'sv_BND_count': downstream.stat_sv_BND_count, + 'trgt_genotyped_count': upstream.stat_trgt_genotyped_count, + 'trgt_uncalled_count': upstream.stat_trgt_uncalled_count + } + + call Utilities.consolidate_stats { + input: + id = family.family_id, + stats = stats, + msg_array = flatten([flatten(upstream.msg)]), + runtime_attributes = default_runtime_attributes + } + output { # to maintain order of samples Array[String] sample_ids = sample_id - File stats_file = consolidate_stats.output_tsv + File stats_file = consolidate_stats.output_tsv + File msg_file = consolidate_stats.messages # bam stats - Array[File] bam_stats = upstream.read_length_and_quality - Array[File] read_length_plot = upstream.read_length_plot - Array[File?] read_quality_plot = upstream.read_quality_plot - Array[String] stat_num_reads = upstream.stat_num_reads - Array[String] stat_read_length_mean = upstream.stat_read_length_mean - Array[String] stat_read_length_median = upstream.stat_read_length_median - Array[String] stat_read_quality_mean = upstream.stat_read_quality_mean - Array[String] stat_read_quality_median = upstream.stat_read_quality_median + Array[File] bam_statistics = downstream.bam_statistics + Array[File] read_length_plot = downstream.read_length_plot + Array[File?] read_quality_plot = downstream.read_quality_plot + Array[File] mapq_distribution_plot = downstream.mapq_distribution_plot + Array[File] mg_distribution_plot = downstream.mg_distribution_plot + Array[String] stat_num_reads = downstream.stat_num_reads + Array[String] stat_read_length_mean = downstream.stat_read_length_mean + Array[String] stat_read_length_median = downstream.stat_read_length_median + Array[String] stat_read_quality_mean = downstream.stat_read_quality_mean + Array[String] stat_read_quality_median = downstream.stat_read_quality_median + Array[String] stat_mapped_read_count = downstream.stat_mapped_read_count + Array[String] stat_mapped_percent = downstream.stat_mapped_percent # merged, haplotagged alignments Array[File] merged_haplotagged_bam = downstream.merged_haplotagged_bam Array[File] merged_haplotagged_bam_index = downstream.merged_haplotagged_bam_index - Array[String] stat_mapped_read_count = downstream.stat_mapped_read_count - Array[String] stat_mapped_percent = downstream.stat_mapped_percent - Array[File] mapq_distribution_plot = downstream.mapq_distribution_plot - Array[File] mg_distribution_plot = downstream.mg_distribution_plot # mosdepth outputs Array[File] mosdepth_summary = upstream.mosdepth_summary @@ -318,15 +311,21 @@ workflow humanwgs_family { Array[String] stat_cpg_combined_count = downstream.stat_combined_cpg_count # sv outputs - Array[File] phased_sv_vcf = downstream.phased_sv_vcf - Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index + Array[File] phased_sv_vcf = downstream.phased_sv_vcf + Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index + File sv_supporting_reads = select_first([joint.sv_supporting_reads, upstream.sv_supporting_reads[0]]) + Array[File] sv_copynum_bedgraph = select_first([joint.sv_copynum_bedgraph, select_all(upstream.sv_copynum_bedgraph)]) + Array[File] sv_depth_bw = select_first([joint.sv_depth_bw, select_all(upstream.sv_depth_bw)]) + Array[File] sv_gc_bias_corrected_depth_bw = select_first([joint.sv_gc_bias_corrected_depth_bw, select_all(upstream.sv_gc_bias_corrected_depth_bw)]) + Array[File] sv_maf_bw = select_first([joint.sv_maf_bw, select_all(upstream.sv_maf_bw)]) # sv stats - Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count - Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count - Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count - Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count - Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count + Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count + Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count + Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count + Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count + Array[String] stat_sv_SWAP_count = downstream.stat_sv_SWAP_count + Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs Array[File] phased_small_variant_vcf = downstream.phased_small_variant_vcf @@ -355,21 +354,15 @@ workflow humanwgs_family { Array[String] stat_trgt_uncalled_count = upstream.stat_trgt_uncalled_count # paraphase outputs - Array[File] paraphase_output_json = upstream.paraphase_output_json - Array[File] paraphase_realigned_bam = upstream.paraphase_realigned_bam - Array[File] paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index + Array[File?] paraphase_output_json = upstream.paraphase_output_json + Array[File?] paraphase_realigned_bam = upstream.paraphase_realigned_bam + Array[File?] paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index Array[File?] paraphase_vcfs = upstream.paraphase_vcfs - # per sample cnv outputs - Array[File] cnv_vcf = upstream.cnv_vcf - Array[File] cnv_vcf_index = upstream.cnv_vcf_index - Array[File] cnv_copynum_bedgraph = upstream.cnv_copynum_bedgraph - Array[File] cnv_depth_bw = upstream.cnv_depth_bw - Array[File] cnv_maf_bw = upstream.cnv_maf_bw - Array[String] stat_cnv_DUP_count = upstream.stat_cnv_DUP_count - Array[String] stat_cnv_DEL_count = upstream.stat_cnv_DEL_count - Array[String] stat_cnv_DUP_sum = upstream.stat_cnv_DUP_sum - Array[String] stat_cnv_DEL_sum = upstream.stat_cnv_DEL_sum + # per sample mitorsaw outputs + Array[File] mitorsaw_vcf = upstream.mitorsaw_vcf + Array[File] mitorsaw_vcf_index = upstream.mitorsaw_vcf_index + Array[File] mitorsaw_hap_stats = upstream.mitorsaw_hap_stats # PGx outputs Array[File] pbstarphase_json = downstream.pbstarphase_json @@ -387,7 +380,6 @@ workflow humanwgs_family { File? joint_trgt_vcf_index = trgt_merge.merged_vcf_index # tertiary analysis outputs - File? pedigree = write_ped_phrank.pedigree File? tertiary_small_variant_filtered_vcf = tertiary_analysis.small_variant_filtered_vcf File? tertiary_small_variant_filtered_vcf_index = tertiary_analysis.small_variant_filtered_vcf_index File? tertiary_small_variant_filtered_tsv = tertiary_analysis.small_variant_filtered_tsv @@ -398,8 +390,15 @@ workflow humanwgs_family { File? tertiary_sv_filtered_vcf_index = tertiary_analysis.sv_filtered_vcf_index File? tertiary_sv_filtered_tsv = tertiary_analysis.sv_filtered_tsv + # qc messages + Array[String] msg = flatten( + [ + flatten(upstream.msg) + ] + ) + # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v2.1.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.2" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/joint/inputs.json b/workflows/joint/inputs.json index 9a0ab408..c779750b 100644 --- a/workflows/joint/inputs.json +++ b/workflows/joint/inputs.json @@ -3,16 +3,18 @@ "joint.sample_ids": "Array[String]", "joint.gvcfs": "Array[File]", "joint.gvcf_indices": "Array[File]", - "joint.svsigs": "Array[File]", + "joint.discover_tars": "Array[File]", + "joint.aligned_bams": "Array[File]", + "joint.aligned_bam_indices": "Array[File]", "joint.ref_map_file": "File", "joint.glnexus_mem_gb": "Int? (optional)", - "joint.pbsv_call_mem_gb": "Int? (optional)", "joint.default_runtime_attributes": { "max_retries": "Int", "container_registry": "String", "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/joint/joint.wdl b/workflows/joint/joint.wdl index 6f1f32b3..e1934121 100644 --- a/workflows/joint/joint.wdl +++ b/workflows/joint/joint.wdl @@ -2,9 +2,8 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/glnexus.wdl" as Glnexus -import "../wdl-common/wdl/tasks/pbsv.wdl" as Pbsv +import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools -import "../wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl" as Pbsv_splits workflow joint { meta { @@ -24,8 +23,14 @@ workflow joint { gvcf_indices: { name: "GVCF Indices" } - svsigs: { - name: "SV Signatures" + discover_tars: { + name: "Sawfish discover output tarballs" + } + aligned_bams: { + name: "Aligned BAMs" + } + aligned_bam_indices: { + name: "Aligned BAM Indices" } ref_map_file: { name: "Reference Map File" @@ -33,9 +38,6 @@ workflow joint { glnexus_mem_gb: { name: "GLnexus Memory (GB)" } - pbsv_call_mem_gb: { - name: "PBSV Call Memory (GB)" - } default_runtime_attributes: { name: "Default Runtime Attribute Struct" } @@ -51,6 +53,18 @@ workflow joint { split_joint_small_variant_vcf_indices: { name: "Joint-call small variant VCF indices, split by sample" } + sv_supporting_reads: { + name: "Supporting reads JSON" + } + sv_copynum_bedgraph: { + name: "Copy number bedgraph" + } + sv_depth_bw: { + name: "Depth bedgraph" + } + sv_maf_bw: { + name: "MAF bedgraph" + } } input { @@ -60,65 +74,46 @@ workflow joint { Array[File] gvcfs Array[File] gvcf_indices - Array[File] svsigs + Array[File] discover_tars + Array[File] aligned_bams + Array[File] aligned_bam_indices File ref_map_file Int? glnexus_mem_gb - Int? pbsv_call_mem_gb RuntimeAttributes default_runtime_attributes } Map[String, String] ref_map = read_map(ref_map_file) - call Pbsv_splits.get_pbsv_splits { + call Sawfish.sawfish_call { input: - pbsv_splits_file = ref_map["pbsv_splits"], # !FileCoercion - default_runtime_attributes = default_runtime_attributes - } - - scatter (shard_index in range(length(get_pbsv_splits.pbsv_splits))) { - Array[String] region_set = get_pbsv_splits.pbsv_splits[shard_index] - - call Pbsv.pbsv_call { - input: - sample_id = family_id + ".joint", - svsigs = svsigs, - sample_count = length(sample_ids), - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - shard_index = shard_index, - regions = region_set, - mem_gb = pbsv_call_mem_gb, - runtime_attributes = default_runtime_attributes - } - } - - # concatenate pbsv vcfs - call Bcftools.concat_pbsv_vcf { - input: - vcfs = pbsv_call.vcf, - vcf_indices = pbsv_call.vcf_index, - out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", - runtime_attributes = default_runtime_attributes + sample_ids = sample_ids, + discover_tars = discover_tars, + aligned_bams = aligned_bams, + aligned_bam_indices = aligned_bam_indices, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", + runtime_attributes = default_runtime_attributes } - String sv_vcf_basename = basename(concat_pbsv_vcf.concatenated_vcf, ".vcf.gz") + String sv_vcf_basename = basename(sawfish_call.vcf, ".vcf.gz") scatter (sample_id in sample_ids) { String split_sv_vcf_name = "~{sample_id}.~{sv_vcf_basename}.vcf.gz" String split_sv_vcf_index_name = "~{sample_id}.~{sv_vcf_basename}.vcf.gz.tbi" } - call Bcftools.split_vcf_by_sample as split_pbsv { + call Bcftools.split_vcf_by_sample as split_sawfish { input: sample_ids = sample_ids, - vcf = concat_pbsv_vcf.concatenated_vcf, - vcf_index = concat_pbsv_vcf.concatenated_vcf_index, + vcf = sawfish_call.vcf, + vcf_index = sawfish_call.vcf_index, split_vcf_names = split_sv_vcf_name, split_vcf_index_names = split_sv_vcf_index_name, + exclude_uncalled = false, runtime_attributes = default_runtime_attributes } @@ -150,9 +145,14 @@ workflow joint { } output { - Array[File] split_joint_structural_variant_vcfs = split_pbsv.split_vcfs - Array[File] split_joint_structural_variant_vcf_indices = split_pbsv.split_vcf_indices + Array[File] split_joint_structural_variant_vcfs = split_sawfish.split_vcfs + Array[File] split_joint_structural_variant_vcf_indices = split_sawfish.split_vcf_indices Array[File] split_joint_small_variant_vcfs = split_glnexus.split_vcfs Array[File] split_joint_small_variant_vcf_indices = split_glnexus.split_vcf_indices + File sv_supporting_reads = select_first([sawfish_call.supporting_reads]) + Array[File] sv_copynum_bedgraph = sawfish_call.copynum_bedgraph + Array[File] sv_depth_bw = sawfish_call.depth_bw + Array[File] sv_gc_bias_corrected_depth_bw = sawfish_call.gc_bias_corrected_depth_bw + Array[File] sv_maf_bw = sawfish_call.maf_bw } } diff --git a/workflows/singleton.inputs.json b/workflows/singleton.inputs.json index 472b8867..3e0d1b29 100644 --- a/workflows/singleton.inputs.json +++ b/workflows/singleton.inputs.json @@ -6,14 +6,13 @@ ], "humanwgs_singleton.phenotypes": "String? (optional)", "humanwgs_singleton.ref_map_file": "File", - "humanwgs_singleton.deepvariant_version": "String (optional, default = \"1.6.1\")", - "humanwgs_singleton.custom_deepvariant_model_tar": "File? (optional)", - "humanwgs_singleton.pharmcat_version": "String (optional, default = \"2.15.0\")", - "humanwgs_singleton.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_singleton.tertiary_map_file": "File? (optional)", + "humanwgs_singleton.max_reads_per_alignment_chunk": "Int (optional, default = 500000)", + "humanwgs_singleton.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_singleton.gpu": "Boolean (optional, default = false)", "humanwgs_singleton.backend": "String", "humanwgs_singleton.zones": "String? (optional)", + "humanwgs_singleton.cpuPlatform": "String? (optional)", "humanwgs_singleton.gpuType": "String? (optional)", "humanwgs_singleton.container_registry": "String? (optional)", "humanwgs_singleton.container_namespace": "String? (optional)", diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index d8fd9a3a..2191606b 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -4,7 +4,6 @@ import "humanwgs_structs.wdl" import "wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl" as BackendConfiguration import "upstream/upstream.wdl" as Upstream import "downstream/downstream.wdl" as Downstream -import "wdl-common/wdl/tasks/write_ped_phrank.wdl" as Write_ped_phrank import "tertiary/tertiary.wdl" as TertiaryAnalysis import "wdl-common/wdl/tasks/utilities.wdl" as Utilities @@ -25,27 +24,21 @@ workflow humanwgs_singleton { hifi_reads: { name: "Array of paths to HiFi reads in unaligned BAM format." } + phenotypes: { + name: "Comma-delimited list of HPO codes for phenotypes" + } ref_map_file: { name: "TSV containing reference genome file paths; must match backend" } - deepvariant_version: { - name: "DeepVariant version" - } - custom_deepvariant_model_tar: { - name: "Custom DeepVariant model tarball" + tertiary_map_file: { + name: "TSV containing tertiary analysis file paths and thresholds; must match backend" } - pharmcat_version: { - name: "PharmCAT version" + max_reads_per_alignment_chunk: { + name: "Maximum reads per alignment chunk" } pharmcat_min_coverage: { name: "Minimum coverage for PharmCAT" } - phenotypes: { - name: "Comma-delimited list of HPO codes for phenotypes" - } - tertiary_map_file: { - name: "TSV containing tertiary analysis file paths and thresholds; must match backend" - } gpu: { name: "Use GPU when possible" } @@ -56,6 +49,9 @@ workflow humanwgs_singleton { zones: { name: "Zones where compute will take place; required if backend is set to 'GCP'" } + cpuPlatform: { + help: "Optional minimum CPU platform to use for tasks on GCP" + } gpuType: { name: "GPU type to use; required if gpu is set to `true` for cloud backends; must match backend" } @@ -76,25 +72,20 @@ workflow humanwgs_singleton { String? sex Array[File] hifi_reads - File ref_map_file + String phenotypes = "HP:0000001" - # These options are only intended for testing purposes. - # There is no guarantee that the pipeline will work with - # other version of DeepVariant or with custom models. - String deepvariant_version = "1.6.1" - File? custom_deepvariant_model_tar + File ref_map_file + File? tertiary_map_file - String pharmcat_version = "2.15.4" + Int max_reads_per_alignment_chunk = 500000 Int pharmcat_min_coverage = 10 - String phenotypes = "HP:0000001" - File? tertiary_map_file - Boolean gpu = false # Backend configuration String backend String? zones + String? cpuPlatform String? gpuType String? container_registry @@ -107,6 +98,7 @@ workflow humanwgs_singleton { input: backend = backend, zones = zones, + cpuPlatform = cpuPlatform, gpuType = gpuType, container_registry = container_registry } @@ -115,15 +107,14 @@ workflow humanwgs_singleton { call Upstream.upstream { input: - sample_id = sample_id, - sex = sex, - hifi_reads = hifi_reads, - ref_map_file = ref_map_file, - deepvariant_version = deepvariant_version, - custom_deepvariant_model_tar = custom_deepvariant_model_tar, - single_sample = true, - gpu = gpu, - default_runtime_attributes = default_runtime_attributes + sample_id = sample_id, + sex = sex, + hifi_reads = hifi_reads, + ref_map_file = ref_map_file, + max_reads_per_alignment_chunk = max_reads_per_alignment_chunk, + single_sample = true, + gpu = gpu, + default_runtime_attributes = default_runtime_attributes } call Downstream.downstream { @@ -137,19 +128,49 @@ workflow humanwgs_singleton { trgt_vcf_index = upstream.trgt_vcf_index, aligned_bam = upstream.out_bam, aligned_bam_index = upstream.out_bam_index, - pharmcat_version = pharmcat_version, pharmcat_min_coverage = pharmcat_min_coverage, ref_map_file = ref_map_file, default_runtime_attributes = default_runtime_attributes } + Map[String, String] pedigree_sex = { + "MALE": "1", + "FEMALE": "2", + "": "." + } + + # write sample metadata similar to pedigree format + # family_id, sample_id, father_id, mother_id, sex, affected + Array[String] sample_metadata = [ + sample_id, sample_id, + ".", ".", + pedigree_sex[upstream.inferred_sex], "2" + ] + + if (defined(tertiary_map_file)) { + call TertiaryAnalysis.tertiary_analysis { + input: + sample_metadata = [sample_metadata], + phenotypes = phenotypes, + is_trio_kid = [false], + is_duo_kid = [false], + small_variant_vcf = downstream.phased_small_variant_vcf, + small_variant_vcf_index = downstream.phased_small_variant_vcf_index, + sv_vcf = downstream.phased_sv_vcf, + sv_vcf_index = downstream.phased_sv_vcf_index, + ref_map_file = ref_map_file, + tertiary_map_file = select_first([tertiary_map_file]), + default_runtime_attributes = default_runtime_attributes + } + } + Map[String, Array[String]] stats = { 'sample_id': [sample_id], - 'num_reads': [upstream.stat_num_reads], - 'read_length_mean': [upstream.stat_read_length_mean], - 'read_length_median': [upstream.stat_read_length_median], - 'read_quality_mean': [upstream.stat_read_quality_mean], - 'read_quality_median': [upstream.stat_read_quality_median], + 'num_reads': [downstream.stat_num_reads], + 'read_length_mean': [downstream.stat_read_length_mean], + 'read_length_median': [downstream.stat_read_length_median], + 'read_quality_mean': [downstream.stat_read_quality_mean], + 'read_quality_median': [downstream.stat_read_quality_median], 'mapped_read_count': [downstream.stat_mapped_read_count], 'mapped_percent': [downstream.stat_mapped_percent], 'mean_depth': [upstream.stat_mean_depth], @@ -167,11 +188,8 @@ workflow humanwgs_singleton { 'sv_DEL_count': [downstream.stat_sv_DEL_count], 'sv_INS_count': [downstream.stat_sv_INS_count], 'sv_INV_count': [downstream.stat_sv_INV_count], + 'sv_SWAP_count': [downstream.stat_sv_SWAP_count], 'sv_BND_count': [downstream.stat_sv_BND_count], - 'cnv_DUP_count': [upstream.stat_cnv_DUP_count], - 'cnv_DEL_count': [upstream.stat_cnv_DEL_count], - 'cnv_DUP_sum': [upstream.stat_cnv_DUP_sum], - 'cnv_DEL_sum': [upstream.stat_cnv_DEL_sum], 'trgt_genotyped_count': [upstream.stat_trgt_genotyped_count], 'trgt_uncalled_count': [upstream.stat_trgt_uncalled_count] } @@ -180,53 +198,32 @@ workflow humanwgs_singleton { input: id = sample_id, stats = stats, + msg_array = flatten([upstream.msg]), runtime_attributes = default_runtime_attributes } - if (defined(tertiary_map_file)) { - call Write_ped_phrank.write_ped_phrank { - input: - id = sample_id, - sex = select_first([sex, upstream.inferred_sex]), - phenotypes = phenotypes, - runtime_attributes = default_runtime_attributes - } - - call TertiaryAnalysis.tertiary_analysis { - input: - pedigree = write_ped_phrank.pedigree, - phrank_lookup = write_ped_phrank.phrank_lookup, - small_variant_vcf = downstream.phased_small_variant_vcf, - small_variant_vcf_index = downstream.phased_small_variant_vcf_index, - sv_vcf = downstream.phased_sv_vcf, - sv_vcf_index = downstream.phased_sv_vcf_index, - ref_map_file = ref_map_file, - tertiary_map_file = select_first([tertiary_map_file]), - default_runtime_attributes = default_runtime_attributes - } - } - output { # consolidated stats File stats_file = consolidate_stats.output_tsv + File msg_file = consolidate_stats.messages # bam stats - File bam_stats = upstream.read_length_and_quality - File read_length_plot = upstream.read_length_plot - File? read_quality_plot = upstream.read_quality_plot - String stat_num_reads = upstream.stat_num_reads - String stat_read_length_mean = upstream.stat_read_length_mean - String stat_read_length_median = upstream.stat_read_length_median - String stat_read_quality_mean = upstream.stat_read_quality_mean - String stat_read_quality_median = upstream.stat_read_quality_median + File bam_statistics = downstream.bam_statistics + File read_length_plot = downstream.read_length_plot + File? read_quality_plot = downstream.read_quality_plot + File mapq_distribution_plot = downstream.mapq_distribution_plot + File mg_distribution_plot = downstream.mg_distribution_plot + String stat_num_reads = downstream.stat_num_reads + String stat_read_length_mean = downstream.stat_read_length_mean + String stat_read_length_median = downstream.stat_read_length_median + String stat_read_quality_mean = downstream.stat_read_quality_mean + String stat_read_quality_median = downstream.stat_read_quality_median + String stat_mapped_read_count = downstream.stat_mapped_read_count + String stat_mapped_percent = downstream.stat_mapped_percent # merged, haplotagged alignments File merged_haplotagged_bam = downstream.merged_haplotagged_bam File merged_haplotagged_bam_index = downstream.merged_haplotagged_bam_index - String stat_mapped_read_count = downstream.stat_mapped_read_count - String stat_mapped_percent = downstream.stat_mapped_percent - File mapq_distribution_plot = downstream.mapq_distribution_plot - File mg_distribution_plot = downstream.mg_distribution_plot # mosdepth outputs File mosdepth_summary = upstream.mosdepth_summary @@ -258,15 +255,21 @@ workflow humanwgs_singleton { String stat_cpg_combined_count = downstream.stat_combined_cpg_count # sv outputs - File phased_sv_vcf = downstream.phased_sv_vcf - File phased_sv_vcf_index = downstream.phased_sv_vcf_index + File phased_sv_vcf = downstream.phased_sv_vcf + File phased_sv_vcf_index = downstream.phased_sv_vcf_index + File sv_supporting_reads = select_first([upstream.sv_supporting_reads]) + File sv_copynum_bedgraph = select_first([upstream.sv_copynum_bedgraph]) + File sv_depth_bw = select_first([upstream.sv_depth_bw]) + File sv_gc_bias_corrected_depth_bw = select_first([upstream.sv_gc_bias_corrected_depth_bw]) + File sv_maf_bw = select_first([upstream.sv_maf_bw]) # sv stats - String stat_sv_DUP_count = downstream.stat_sv_DUP_count - String stat_sv_DEL_count = downstream.stat_sv_DEL_count - String stat_sv_INS_count = downstream.stat_sv_INS_count - String stat_sv_INV_count = downstream.stat_sv_INV_count - String stat_sv_BND_count = downstream.stat_sv_BND_count + String stat_sv_DUP_count = downstream.stat_sv_DUP_count + String stat_sv_DEL_count = downstream.stat_sv_DEL_count + String stat_sv_INS_count = downstream.stat_sv_INS_count + String stat_sv_INV_count = downstream.stat_sv_INV_count + String stat_sv_SWAP_count = downstream.stat_sv_SWAP_count + String stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs File phased_small_variant_vcf = downstream.phased_small_variant_vcf @@ -295,21 +298,15 @@ workflow humanwgs_singleton { String stat_trgt_uncalled_count = upstream.stat_trgt_uncalled_count # paraphase outputs - File paraphase_output_json = upstream.paraphase_output_json - File paraphase_realigned_bam = upstream.paraphase_realigned_bam - File paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index + File? paraphase_output_json = upstream.paraphase_output_json + File? paraphase_realigned_bam = upstream.paraphase_realigned_bam + File? paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index File? paraphase_vcfs = upstream.paraphase_vcfs - # per sample cnv outputs - File cnv_vcf = upstream.cnv_vcf - File cnv_vcf_index = upstream.cnv_vcf_index - File cnv_copynum_bedgraph = upstream.cnv_copynum_bedgraph - File cnv_depth_bw = upstream.cnv_depth_bw - File cnv_maf_bw = upstream.cnv_maf_bw - String stat_cnv_DUP_count = upstream.stat_cnv_DUP_count - String stat_cnv_DEL_count = upstream.stat_cnv_DEL_count - String stat_cnv_DUP_sum = upstream.stat_cnv_DUP_sum - String stat_cnv_DEL_sum = upstream.stat_cnv_DEL_sum + # per sample mitorsaw outputs + File mitorsaw_vcf = upstream.mitorsaw_vcf + File mitorsaw_vcf_index = upstream.mitorsaw_vcf_index + File mitorsaw_hap_stats = upstream.mitorsaw_hap_stats # PGx outputs File pbstarphase_json = downstream.pbstarphase_json @@ -319,7 +316,6 @@ workflow humanwgs_singleton { File? pharmcat_report_json = downstream.pharmcat_report_json # tertiary analysis outputs - File? pedigree = write_ped_phrank.pedigree File? tertiary_small_variant_filtered_vcf = tertiary_analysis.small_variant_filtered_vcf File? tertiary_small_variant_filtered_vcf_index = tertiary_analysis.small_variant_filtered_vcf_index File? tertiary_small_variant_filtered_tsv = tertiary_analysis.small_variant_filtered_tsv @@ -330,8 +326,15 @@ workflow humanwgs_singleton { File? tertiary_sv_filtered_vcf_index = tertiary_analysis.sv_filtered_vcf_index File? tertiary_sv_filtered_tsv = tertiary_analysis.sv_filtered_tsv + # qc messages + Array[String] msg = flatten( + [ + upstream.msg + ] + ) + # workflow metadata - String workflow_name = "humanwgs_family" - String workflow_version = "v2.1.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_name = "humanwgs_singleton" + String workflow_version = "v3.0.2" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } diff --git a/workflows/tertiary/inputs.json b/workflows/tertiary/inputs.json index 3116e69f..8e76a0c9 100644 --- a/workflows/tertiary/inputs.json +++ b/workflows/tertiary/inputs.json @@ -13,6 +13,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/tertiary/tertiary.wdl b/workflows/tertiary/tertiary.wdl index 3dce4dac..71241495 100644 --- a/workflows/tertiary/tertiary.wdl +++ b/workflows/tertiary/tertiary.wdl @@ -1,6 +1,7 @@ version 1.0 import "../humanwgs_structs.wdl" +import "../wdl-common/wdl/tasks/write_phrank.wdl" as Write_phrank import "../wdl-common/wdl/tasks/utilities.wdl" as Utilities workflow tertiary_analysis { @@ -9,11 +10,17 @@ workflow tertiary_analysis { } parameter_meta { - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } - phrank_lookup: { - name: "Gene symbol -> Phrank phenotype rank score lookup table" + phenotypes: { + name: "Comma-delimited list of HPO codes for phenotypes" + } + is_trio_kid: { + name: "Boolean array indicating if the sample is a child with both parents defined" + } + is_duo_kid: { + name: "Boolean array indicating if the sample is a child with only one parent defined" } small_variant_vcf: { name: "Small variant VCF" @@ -66,8 +73,11 @@ workflow tertiary_analysis { } input { - File pedigree - File phrank_lookup + Array[Array[String]] sample_metadata + String phenotypes + + Array[Boolean] is_trio_kid # !UnusedDeclaration + Array[Boolean] is_duo_kid # !UnusedDeclaration File small_variant_vcf File small_variant_vcf_index @@ -83,6 +93,12 @@ workflow tertiary_analysis { Map[String, String] ref_map = read_map(ref_map_file) Map[String, String] tertiary_map = read_map(tertiary_map_file) + call Write_phrank.write_phrank { + input: + phenotypes = phenotypes, + runtime_attributes = default_runtime_attributes + } + call Utilities.split_string as split_gnotate_files { input: concatenated_string = tertiary_map["slivar_gnotate_files"], @@ -114,8 +130,8 @@ workflow tertiary_analysis { input: vcf = small_variant_vcf, vcf_index = small_variant_vcf_index, - pedigree = pedigree, - phrank_lookup = phrank_lookup, + sample_metadata = sample_metadata, + phrank_lookup = write_phrank.phrank_lookup, reference = ref_map["fasta"], # !FileCoercion reference_index = ref_map["fasta_index"], # !FileCoercion gff = tertiary_map["ensembl_gff"], # !FileCoercion @@ -148,7 +164,7 @@ workflow tertiary_analysis { call svpack_filter_annotated { input: sv_vcf = sv_vcf, - pedigree = pedigree, + sample_metadata = sample_metadata, population_vcfs = split_sv_vcfs.array, # !FileCoercion population_vcf_indices = split_sv_vcf_indices.array, # !FileCoercion gff = tertiary_map["ensembl_gff"], # !FileCoercion @@ -158,10 +174,10 @@ workflow tertiary_analysis { call slivar_svpack_tsv { input: filtered_vcf = svpack_filter_annotated.svpack_vcf, - pedigree = pedigree, + sample_metadata = sample_metadata, lof_lookup = tertiary_map["lof_lookup"], # !FileCoercion clinvar_lookup = tertiary_map["clinvar_lookup"], # !FileCoercion - phrank_lookup = phrank_lookup, + phrank_lookup = write_phrank.phrank_lookup, runtime_attributes = default_runtime_attributes } @@ -191,8 +207,8 @@ task slivar_small_variant { vcf_index: { name: "Small variant VCF index" } - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } phrank_lookup: { name: "Gene symbol -> Phrank phenotype rank score lookup table" @@ -257,7 +273,7 @@ task slivar_small_variant { File vcf File vcf_index - File pedigree + Array[Array[String]] sample_metadata File phrank_lookup File reference @@ -319,12 +335,17 @@ task slivar_small_variant { String vcf_basename = basename(vcf, ".vcf.gz") Int threads = 8 - Int mem_gb = 2 * threads + Int mem_gb = 16 Int disk_size = ceil((size(vcf, "GB") + size(reference, "GB") + size(gnotate_files, "GB") + size(gff, "GB") + size(lof_lookup, "GB") + size(clinvar_lookup, "GB") + size(phrank_lookup, "GB")) * 2 + 20) command <<< set -euo pipefail + cut -f1,2 ~{lof_lookup} > pli.lookup + cut -f1,3 ~{lof_lookup} > oe.lookup + cut -f1,4 ~{lof_lookup} > loeuf.lookup + cut -f1,5 ~{lof_lookup} > loeuf_decile.lookup + bcftools --version bcftools norm \ @@ -356,7 +377,7 @@ task slivar_small_variant { --sample-expr '~{sep=" && " sample_expr}' \ ~{sep=" " prefix("--gnotate ", gnotate_files)} \ --vcf ~{vcf_basename}.norm.bcf \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ | bcftools csq \ --local-csq \ --samples - \ @@ -376,7 +397,7 @@ task slivar_small_variant { --skip ~{sep=',' skip_list} \ --vcf ~{vcf_basename}.norm.slivar.vcf.gz \ --sample-field comphet_side \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --allow-non-trios \ | add_comphet_phase.py \ | bcftools view \ @@ -392,13 +413,16 @@ task slivar_small_variant { --sample-field dominant \ --sample-field recessive \ --csq-field BCSQ \ - --gene-description ~{lof_lookup} \ + --gene-description pli.lookup \ + --gene-description oe.lookup \ + --gene-description loeuf.lookup \ + --gene-description loeuf_decile.lookup \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{vcf_basename}.norm.slivar.vcf.gz \ - | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ + | sed '1 s/gene_description_1/pLI/;s/gene_description_2/oe.lof/;s/gene_description_3/LOEUF/;s/gene_description_4/LOEUF_decile/;s/gene_description_5/clinvar/;s/gene_description_6/phrank/;' \ > ~{vcf_basename}.norm.slivar.tsv slivar tsv \ @@ -406,13 +430,16 @@ task slivar_small_variant { --sample-field slivar_comphet \ --info-field slivar_comphet \ --csq-field BCSQ \ - --gene-description ~{lof_lookup} \ + --gene-description pli.lookup \ + --gene-description oe.lookup \ + --gene-description loeuf.lookup \ + --gene-description loeuf_decile.lookup \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{vcf_basename}.norm.slivar.compound_hets.vcf.gz \ - | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ + | sed '1 s/gene_description_1/pLI/;s/gene_description_2/oe.lof/;s/gene_description_3/LOEUF/;s/gene_description_4/LOEUF_decile/;s/gene_description_5/clinvar/;s/gene_description_6/phrank/;' \ > ~{vcf_basename}.norm.slivar.compound_hets.tsv >>> @@ -428,13 +455,14 @@ task slivar_small_variant { runtime { docker: "~{runtime_attributes.container_registry}/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa" cpu: threads - memory: mem_gb + " GB" + memory: mem_gb + " GiB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries maxRetries: runtime_attributes.max_retries awsBatchRetryAttempts: runtime_attributes.max_retries zones: runtime_attributes.zones + cpuPlatform: runtime_attributes.cpuPlatform } } @@ -444,8 +472,8 @@ task svpack_filter_annotated { } parameter_meta { - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } sv_vcf: { name: "Structural variant VCF" @@ -472,7 +500,7 @@ task svpack_filter_annotated { input { File sv_vcf - File pedigree + Array[Array[String]] sample_metadata Array[File] population_vcfs Array[File] population_vcf_indices @@ -492,7 +520,7 @@ task svpack_filter_annotated { echo "svpack version:" cat /opt/svpack/.git/HEAD - affected=$(awk -F'\t' '$6 ~ /2/ {{ print $2 }}' ~{pedigree} | paste -sd',') # TODO: potentially replace awk + affected=$(awk -F'\t' '$6 ~ /2/ {{ print $2 }}' ~{write_tsv(sample_metadata)} | paste -sd',') # TODO: potentially replace awk svpack \ filter \ @@ -527,13 +555,14 @@ task svpack_filter_annotated { runtime { docker: "~{runtime_attributes.container_registry}/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba" cpu: threads - memory: mem_gb + " GB" + memory: mem_gb + " GiB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries maxRetries: runtime_attributes.max_retries awsBatchRetryAttempts: runtime_attributes.max_retries zones: runtime_attributes.zones + cpuPlatform: runtime_attributes.cpuPlatform } } @@ -546,8 +575,8 @@ task slivar_svpack_tsv { filtered_vcf : { name: "Filtered and annotated structural variant VCF" } - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } lof_lookup: { name: "Gene symbol -> LoF score lookup table" @@ -569,7 +598,7 @@ task slivar_svpack_tsv { input { File filtered_vcf - File pedigree + Array[Array[String]] sample_metadata File lof_lookup File clinvar_lookup File phrank_lookup @@ -580,8 +609,6 @@ task slivar_svpack_tsv { Array[String] info_fields = [ 'SVTYPE', 'SVLEN', - 'SVANN', - 'CIPOS', 'MATEID', 'END' ] @@ -595,6 +622,11 @@ task slivar_svpack_tsv { command <<< set -euo pipefail + cut -f1,2 ~{lof_lookup} > pli.lookup + cut -f1,3 ~{lof_lookup} > oe.lookup + cut -f1,4 ~{lof_lookup} > loeuf.lookup + cut -f1,5 ~{lof_lookup} > loeuf_decile.lookup + # slivar has no version option slivar expr 2>&1 | grep -Eo 'slivar version: [0-9.]+ [0-9a-f]+' @@ -603,13 +635,16 @@ task slivar_svpack_tsv { --sample-field hetalt \ --sample-field homalt \ --csq-field BCSQ \ - --gene-description ~{lof_lookup} \ + --gene-description pli.lookup \ + --gene-description oe.lookup \ + --gene-description loeuf.lookup \ + --gene-description loeuf_decile.lookup \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{filtered_vcf} \ - | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ + | sed '1 s/gene_description_1/pLI/;s/gene_description_2/oe.lof/;s/gene_description_3/LOEUF/;s/gene_description_4/LOEUF_decile/;s/gene_description_5/clinvar/;s/gene_description_6/phrank/;' \ > ~{filtered_vcf_basename}.tsv >>> @@ -620,12 +655,13 @@ task slivar_svpack_tsv { runtime { docker: "~{runtime_attributes.container_registry}/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa" cpu: threads - memory: mem_gb + " GB" + memory: mem_gb + " GiB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries maxRetries: runtime_attributes.max_retries awsBatchRetryAttempts: runtime_attributes.max_retries zones: runtime_attributes.zones + cpuPlatform: runtime_attributes.cpuPlatform } } diff --git a/workflows/upstream/inputs.json b/workflows/upstream/inputs.json index ac0324b3..f007688a 100644 --- a/workflows/upstream/inputs.json +++ b/workflows/upstream/inputs.json @@ -3,8 +3,7 @@ "upstream.sex": "String? (optional)", "upstream.hifi_reads": "Array[File]", "upstream.ref_map_file": "File", - "upstream.deepvariant_version": "String", - "upstream.custom_deepvariant_model_tar": "File? (optional)", + "upstream.max_reads_per_alignment_chunk": "Int", "upstream.single_sample": "Boolean (optional, default = false)", "upstream.gpu": "Boolean", "upstream.default_runtime_attributes": { @@ -13,6 +12,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index c2c56dbf..577f046b 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -1,17 +1,14 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" -import "../wdl-common/wdl/tasks/pbmm2.wdl" as Pbmm2 -import "../wdl-common/wdl/tasks/merge_bam_stats.wdl" as MergeBamStats -import "../wdl-common/wdl/tasks/pbsv.wdl" as Pbsv -import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools +import "../wdl-common/wdl/workflows/pbmm2/pbmm2.wdl" as Pbmm2 +import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/workflows/deepvariant/deepvariant.wdl" as DeepVariant import "../wdl-common/wdl/tasks/samtools.wdl" as Samtools import "../wdl-common/wdl/tasks/mosdepth.wdl" as Mosdepth import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/paraphase.wdl" as Paraphase -import "../wdl-common/wdl/tasks/hificnv.wdl" as Hificnv -import "../wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl" as Pbsv_splits +import "../wdl-common/wdl/tasks/mitorsaw.wdl" as Mitorsaw workflow upstream { meta { @@ -32,11 +29,8 @@ workflow upstream { ref_map_file: { name: "TSV containing reference genome information" } - deepvariant_version: { - name: "DeepVariant version" - } - custom_deepvariant_model_tar: { - name: "Custom DeepVariant model tarball" + max_reads_per_alignment_chunk: { + name: "Maximum reads per alignment chunk" } single_sample: { name: "Single sample workflow" @@ -56,8 +50,7 @@ workflow upstream { File ref_map_file - String deepvariant_version - File? custom_deepvariant_model_tar + Int max_reads_per_alignment_chunk Boolean single_sample = false @@ -69,44 +62,31 @@ workflow upstream { Map[String, String] ref_map = read_map(ref_map_file) scatter (hifi_read_bam in hifi_reads) { - call Pbmm2.pbmm2_align_wgs as pbmm2_align { - input: - sample_id = sample_id, - bam = hifi_read_bam, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - runtime_attributes = default_runtime_attributes - } - call Pbsv.pbsv_discover { + call Pbmm2.pbmm2 as pbmm2 { input: - aligned_bam = pbmm2_align.aligned_bam, - aligned_bam_index = pbmm2_align.aligned_bam_index, - trf_bed = ref_map["pbsv_tandem_repeat_bed"], # !FileCoercion - runtime_attributes = default_runtime_attributes + sample_id = sample_id, + bam = hifi_read_bam, + max_reads_per_chunk = max_reads_per_alignment_chunk, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + ref_name = ref_map["name"], + default_runtime_attributes = default_runtime_attributes } } - call MergeBamStats.merge_bam_stats { - input: - sample_id = sample_id, - bam_stats = pbmm2_align.bam_stats, - runtime_attributes = default_runtime_attributes - } - # merge aligned bams if there are multiple - if (length(pbmm2_align.aligned_bam) > 1) { + if (length(flatten(pbmm2.aligned_bams)) > 1) { call Samtools.samtools_merge { input: - bams = pbmm2_align.aligned_bam, + bams = flatten(pbmm2.aligned_bams), out_prefix = "~{sample_id}.~{ref_map['name']}", runtime_attributes = default_runtime_attributes } } # select the merged bam if it exists, otherwise select the first (only) aligned bam - File aligned_bam_data = select_first([samtools_merge.merged_bam, pbmm2_align.aligned_bam[0]]) - File aligned_bam_index = select_first([samtools_merge.merged_bam_index, pbmm2_align.aligned_bam_index[0]]) + File aligned_bam_data = select_first([samtools_merge.merged_bam, flatten(pbmm2.aligned_bams)[0]]) + File aligned_bam_index = select_first([samtools_merge.merged_bam_index, flatten(pbmm2.aligned_bam_indices)[0]]) call Mosdepth.mosdepth { input: @@ -118,29 +98,50 @@ workflow upstream { runtime_attributes = default_runtime_attributes } + String qc_sex = + if (defined(sex) && (mosdepth.inferred_sex != sex)) + then "~{sample_id}: Reported sex ~{sex} does not match inferred sex ~{mosdepth.inferred_sex}." + else "" + call DeepVariant.deepvariant { input: - sample_id = sample_id, - aligned_bams = [aligned_bam_data], - aligned_bam_indices = [aligned_bam_index], - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - deepvariant_version = deepvariant_version, - custom_deepvariant_model_tar = custom_deepvariant_model_tar, - gpu = gpu, - default_runtime_attributes = default_runtime_attributes + sample_id = sample_id, + aligned_bams = [aligned_bam_data], + aligned_bam_indices = [aligned_bam_index], + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + ref_name = ref_map["name"], + gpu = gpu, + default_runtime_attributes = default_runtime_attributes + } + + call Sawfish.sawfish_discover { + input: + sample_id = sample_id, + sex = mosdepth.inferred_sex, + aligned_bam = aligned_bam_data, + aligned_bam_index = aligned_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + exclude_bed = ref_map["sawfish_exclude_bed"], # !FileCoercion + exclude_bed_index = ref_map["sawfish_exclude_bed_index"], # !FileCoercion + expected_male_bed = ref_map["sawfish_expected_bed_male"], # !FileCoercion + expected_female_bed = ref_map["sawfish_expected_bed_female"], # !FileCoercion + small_variant_vcf = deepvariant.vcf, + small_variant_vcf_index = deepvariant.vcf_index, + out_prefix = "~{sample_id}", + runtime_attributes = default_runtime_attributes } call Trgt.trgt { input: sample_id = sample_id, - sex = select_first([sex, mosdepth.inferred_sex]), + sex = mosdepth.inferred_sex, aligned_bam = aligned_bam_data, aligned_bam_index = aligned_bam_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - trgt_bed = ref_map["trgt_tandem_repeat_bed"], # !FileCoercion + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + trgt_bed = ref_map["trgt_tandem_repeat_bed"], # !FileCoercion out_prefix = "~{sample_id}.~{ref_map['name']}", runtime_attributes = default_runtime_attributes } @@ -155,68 +156,36 @@ workflow upstream { runtime_attributes = default_runtime_attributes } - call Hificnv.hificnv { + call Mitorsaw.mitorsaw { input: - sample_id = sample_id, - sex = select_first([sex, mosdepth.inferred_sex]), - aligned_bam = aligned_bam_data, - aligned_bam_index = aligned_bam_index, - vcf = deepvariant.vcf, - vcf_index = deepvariant.vcf_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - exclude_bed = ref_map["hificnv_exclude_bed"], # !FileCoercion - exclude_bed_index = ref_map["hificnv_exclude_bed_index"], # !FileCoercion - expected_male_bed = ref_map["hificnv_expected_bed_male"], # !FileCoercion - expected_female_bed = ref_map["hificnv_expected_bed_female"], # !FileCoercion - runtime_attributes = default_runtime_attributes + aligned_bam = aligned_bam_data, + aligned_bam_index = aligned_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}", + runtime_attributes = default_runtime_attributes } if (single_sample) { - call Pbsv_splits.get_pbsv_splits { + call Sawfish.sawfish_call { input: - pbsv_splits_file = ref_map["pbsv_splits"], # !FileCoercion - default_runtime_attributes = default_runtime_attributes - } - - scatter (shard_index in range(length(get_pbsv_splits.pbsv_splits))) { - Array[String] region_set = get_pbsv_splits.pbsv_splits[shard_index] - - call Pbsv.pbsv_call { - input: - sample_id = sample_id, - svsigs = pbsv_discover.svsig, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - shard_index = shard_index, - regions = region_set, - runtime_attributes = default_runtime_attributes - } + sample_ids = [sample_id], + discover_tars = [sawfish_discover.discover_tar], + aligned_bams = [aligned_bam_data], + aligned_bam_indices = [aligned_bam_index], + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", + runtime_attributes = default_runtime_attributes } - # concatenate pbsv vcfs - call Bcftools.concat_pbsv_vcf { - input: - vcfs = pbsv_call.vcf, - vcf_indices = pbsv_call.vcf_index, - out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", - runtime_attributes = default_runtime_attributes - } + File copynum_bedgraph_output = sawfish_call.copynum_bedgraph[0] + File depth_bw_output = sawfish_call.depth_bw[0] + File gc_bias_corrected_depth_bw_output = sawfish_call.gc_bias_corrected_depth_bw[0] + File maf_bw_output = sawfish_call.maf_bw[0] } output { - # bam stats - File read_length_and_quality = merge_bam_stats.read_length_and_quality - File read_length_plot = merge_bam_stats.read_length_plot - File? read_quality_plot = merge_bam_stats.read_quality_plot - String stat_num_reads = merge_bam_stats.stat_num_reads - String stat_read_length_mean = merge_bam_stats.stat_read_length_mean - String stat_read_length_median = merge_bam_stats.stat_read_length_median - String stat_read_quality_mean = merge_bam_stats.stat_read_quality_mean - String stat_read_quality_median = merge_bam_stats.stat_read_quality_median - # alignments File out_bam = aligned_bam_data File out_bam_index = aligned_bam_index @@ -229,13 +198,17 @@ workflow upstream { String inferred_sex = mosdepth.inferred_sex String stat_mean_depth = mosdepth.stat_mean_depth - # per movie sv signatures - # if we've already called variants, no need to keep these - Array[File] svsigs = if single_sample then [] else pbsv_discover.svsig + # per sample sv signatures + File discover_tar = sawfish_discover.discover_tar - # pbsv outputs for single sample - File? sv_vcf = concat_pbsv_vcf.concatenated_vcf - File? sv_vcf_index = concat_pbsv_vcf.concatenated_vcf_index + # sawfish outputs for single sample + File? sv_vcf = sawfish_call.vcf + File? sv_vcf_index = sawfish_call.vcf_index + File? sv_supporting_reads = sawfish_call.supporting_reads + File? sv_copynum_bedgraph = copynum_bedgraph_output + File? sv_depth_bw = depth_bw_output + File? sv_gc_bias_corrected_depth_bw = gc_bias_corrected_depth_bw_output + File? sv_maf_bw = maf_bw_output # small variant outputs File small_variant_vcf = deepvariant.vcf @@ -252,20 +225,24 @@ workflow upstream { String stat_trgt_uncalled_count = trgt.stat_uncalled_count # paraphase outputs - File paraphase_output_json = paraphase.out_json - File paraphase_realigned_bam = paraphase.bam - File paraphase_realigned_bam_index = paraphase.bam_index + File? paraphase_output_json = paraphase.out_json + File? paraphase_realigned_bam = paraphase.bam + File? paraphase_realigned_bam_index = paraphase.bam_index File? paraphase_vcfs = paraphase.vcfs_tar - # per sample hificnv outputs - File cnv_vcf = hificnv.cnv_vcf - File cnv_vcf_index = hificnv.cnv_vcf_index - File cnv_copynum_bedgraph = hificnv.copynum_bedgraph - File cnv_depth_bw = hificnv.depth_bw - File cnv_maf_bw = hificnv.maf_bw - String stat_cnv_DUP_count = hificnv.stat_DUP_count - String stat_cnv_DEL_count = hificnv.stat_DEL_count - String stat_cnv_DUP_sum = hificnv.stat_DUP_sum - String stat_cnv_DEL_sum = hificnv.stat_DEL_sum + # per sample mitorsaw outputs + File mitorsaw_vcf = mitorsaw.vcf + File mitorsaw_vcf_index = mitorsaw.vcf_index + File mitorsaw_hap_stats = mitorsaw.hap_stats + + # qc messages + Array[String] msg = flatten( + [ + flatten(pbmm2.msg), + [qc_sex], + trgt.msg, + sawfish_discover.msg + ] + ) } } diff --git a/workflows/wdl-common b/workflows/wdl-common index 7dc8a8ca..2ae390d4 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 7dc8a8cab22fcd8b5e4c68fd55afdf9630c3dc3c +Subproject commit 2ae390d4ed6b80dd2a2ef10c960832ffa8c7d1d3