From b92092c17646107fa52c7e4d33118b6b38b1315e Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 10 Dec 2024 15:38:25 -0800 Subject: [PATCH 01/61] Added sawfish. --- wdl-ci.config.json | 287 ++++++++++++++++------------ workflows/downstream/downstream.wdl | 11 +- workflows/family.inputs.json | 1 - workflows/family.wdl | 27 ++- workflows/joint/inputs.json | 5 +- workflows/joint/joint.wdl | 71 +++---- workflows/singleton.wdl | 14 +- workflows/upstream/upstream.wdl | 73 +++---- workflows/wdl-common | 2 +- 9 files changed, 253 insertions(+), 238 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index de19f1d8..7af2bc10 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -535,11 +535,11 @@ }, "sv_stats": { "key": "sv_stats", - "digest": "sq4w257wawiwfuuquazhhuzlhdyiiwg3", + "digest": "foqixa2ryrx7e64ymqylurlfad7gpdpi", "tests": [ { "inputs": { - "vcf": "${resources_file_path}/sv_stats/pbsv/HG002.GRCh38.pbsv.phased.vcf.gz", + "vcf": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.vcf.gz", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { @@ -550,13 +550,13 @@ ] }, "stat_sv_DEL_count": { - "value": "19", + "value": "17", "test_tasks": [ "compare_string" ] }, "stat_sv_INS_count": { - "value": "37", + "value": "46", "test_tasks": [ "compare_string" ] @@ -568,7 +568,13 @@ ] }, "stat_sv_BND_count": { - "value": "0", + "value": "2", + "test_tasks": [ + "compare_string" + ] + }, + "stat_sv_INVBND_count": { + "value": "4", "test_tasks": [ "compare_string" ] @@ -1515,120 +1521,6 @@ } } }, - "workflows/wdl-common/wdl/tasks/pbsv.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pbsv.wdl", - "name": "", - "description": "", - "tasks": { - "pbsv_discover": { - "key": "pbsv_discover", - "digest": "whyivzym5tmmbwnlrne6l26w6rj3pyzj", - "tests": [ - { - "inputs": { - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "trf_bed": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "svsig": { - "value": "${resources_file_path}/pbsv_discover/HG002.GRCh38.chr6_10000000_20000000.svsig.gz", - "test_tasks": [ - "compare_file_basename", - "check_gzip", - "check_empty_lines" - ] - } - } - } - ] - }, - "pbsv_call": { - "key": "pbsv_call", - "digest": "whqgkxlukuxpojvpg6tz6rzqxmtw3lcd", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "shard_index": 5, - "regions": [ - "chr6" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/singleton/HG002.GRCh38.5.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/singleton_no_shard/HG002.GRCh38.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002-trio", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz", - "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.svsig.gz", - "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "sample_count": 3, - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "shard_index": 5, - "regions": [ - "chr6" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/trio/HG002-trio.GRCh38.5.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - } - ] - } - } - }, "workflows/wdl-common/wdl/tasks/samtools.wdl": { "key": "workflows/wdl-common/wdl/tasks/samtools.wdl", "name": "", @@ -2502,6 +2394,163 @@ ] } } + }, + "workflows/wdl-common/wdl/tasks/sawfish.wdl": { + "key": "workflows/wdl-common/wdl/tasks/sawfish.wdl", + "name": "", + "description": "", + "tasks": { + "sawfish_discover": { + "key": "sawfish_discover", + "digest": "eh67skuq3swjgkbrinqhzfxf2wfea2hp", + "tests": [ + { + "inputs": { + "sex": "MALE", + "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", + "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", + "out_prefix": "HG002.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_discover/output/HG002/HG002.GRCh38.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + }, + { + "inputs": { + "sex": "MALE", + "aligned_bam": "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", + "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", + "out_prefix": "HG003.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_discover/output/HG003/HG003.GRCh38.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + }, + { + "inputs": { + "aligned_bam": "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", + "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", + "out_prefix": "HG004.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_discover/output/HG004/HG004.GRCh38.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + } + ] + }, + "sawfish_call": { + "key": "sawfish_call", + "digest": "gcc2gfurgryq2ziqgyfgxc5a2k3dtkyo", + "tests": [ + { + "inputs": { + "discover_tars": [ + "${resources_file_path}/sawfish_call/input/HG002.GRCh38.tar" + ], + "aligned_bams": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam" + ], + "aligned_bam_indices": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai" + ], + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "ref_name": "${ref_name}", + "out_prefix": "HG002.GRCh38.structural_variants", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip", + "vcftools_validator" + ] + }, + "supporting_reads": { + "value": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.supporting_reads.json.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" + ] + } + } + }, + { + "inputs": { + "discover_tars": [ + "${resources_file_path}/sawfish_call/input/HG002.GRCh38.tar", + "${resources_file_path}/sawfish_call/input/HG003.GRCh38.tar", + "${resources_file_path}/sawfish_call/input/HG004.GRCh38.tar" + ], + "aligned_bams": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", + "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam", + "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam" + ], + "aligned_bam_indices": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", + "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam.bai", + "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam.bai" + ], + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "ref_name": "${ref_name}", + "out_prefix": "HG002-trio.joint.GRCh38.structural_variants", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/sawfish_call/output/HG002-trio/HG002-trio.joint.GRCh38.structural_variants.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip", + "vcftools_validator" + ] + }, + "supporting_reads": { + "value": "${resources_file_path}/sawfish_call/output/HG002-trio/HG002-trio.joint.GRCh38.structural_variants.supporting_reads.json.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" + ] + } + } + } + ] + } + } } }, "engines": { diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index 8c983756..829f527e 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -202,11 +202,12 @@ workflow downstream { File indel_distribution_plot = bcftools_stats_roh_small_variants.indel_distribution_plot # sv stats - String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count - String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count - String stat_sv_INS_count = sv_stats.stat_sv_INS_count - String stat_sv_INV_count = sv_stats.stat_sv_INV_count - String stat_sv_BND_count = sv_stats.stat_sv_BND_count + String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count + String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count + String stat_sv_INS_count = sv_stats.stat_sv_INS_count + String stat_sv_INV_count = sv_stats.stat_sv_INV_count + String stat_sv_INVBND_count = sv_stats.stat_sv_INVBND_count + String stat_sv_BND_count = sv_stats.stat_sv_BND_count # cpg_pileup outputs File? cpg_combined_bed = cpg_pileup.combined_bed diff --git a/workflows/family.inputs.json b/workflows/family.inputs.json index ec555a3c..90bcb20f 100644 --- a/workflows/family.inputs.json +++ b/workflows/family.inputs.json @@ -22,7 +22,6 @@ "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.tertiary_map_file": "File? (optional)", "humanwgs_family.glnexus_mem_gb": "Int? (optional)", - "humanwgs_family.pbsv_call_mem_gb": "Int? (optional)", "humanwgs_family.gpu": "Boolean (optional, default = false)", "humanwgs_family.backend": "String", "humanwgs_family.zones": "String? (optional)", diff --git a/workflows/family.wdl b/workflows/family.wdl index 789e1396..7324b796 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -45,9 +45,6 @@ workflow humanwgs_family { glnexus_mem_gb: { name: "Override GLnexus memory request (GB)" } - pbsv_call_mem_gb: { - name: "Override PBSV call memory request (GB)" - } gpu: { name: "Use GPU when possible" } @@ -90,7 +87,6 @@ workflow humanwgs_family { File? tertiary_map_file Int? glnexus_mem_gb - Int? pbsv_call_mem_gb Boolean gpu = false @@ -140,12 +136,13 @@ workflow humanwgs_family { input: family_id = family.family_id, sample_ids = sample_id, - gvcfs = upstream.small_variant_gvcf, - gvcf_indices = upstream.small_variant_gvcf_index, - svsigs = flatten(upstream.svsigs), + gvcfs = upstream.small_variant_vcf, + gvcf_indices = upstream.small_variant_vcf_index, + discover_tars = upstream.discover_tar, + aligned_bams = upstream.out_bam, + aligned_bam_indices = upstream.out_bam_index, ref_map_file = ref_map_file, glnexus_mem_gb = glnexus_mem_gb, - pbsv_call_mem_gb = pbsv_call_mem_gb, default_runtime_attributes = default_runtime_attributes } } @@ -193,6 +190,7 @@ workflow humanwgs_family { 'sv_DEL_count': downstream.stat_sv_DEL_count, 'sv_INS_count': downstream.stat_sv_INS_count, 'sv_INV_count': downstream.stat_sv_INV_count, + 'sv_INVBND_count': downstream.stat_sv_INVBND_count, 'sv_BND_count': downstream.stat_sv_BND_count, 'cnv_DUP_count': upstream.stat_cnv_DUP_count, 'cnv_DEL_count': upstream.stat_cnv_DEL_count, @@ -322,11 +320,12 @@ workflow humanwgs_family { Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index # sv stats - Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count - Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count - Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count - Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count - Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count + Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count + Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count + Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count + Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count + Array[String] stat_sv_INVBND_count = downstream.stat_sv_INVBND_count + Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs Array[File] phased_small_variant_vcf = downstream.phased_small_variant_vcf @@ -400,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v2.1.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/joint/inputs.json b/workflows/joint/inputs.json index 9a0ab408..90e3de13 100644 --- a/workflows/joint/inputs.json +++ b/workflows/joint/inputs.json @@ -3,10 +3,11 @@ "joint.sample_ids": "Array[String]", "joint.gvcfs": "Array[File]", "joint.gvcf_indices": "Array[File]", - "joint.svsigs": "Array[File]", + "joint.discover_tars": "Array[File]", + "joint.aligned_bams": "Array[File]", + "joint.aligned_bam_indices": "Array[File]", "joint.ref_map_file": "File", "joint.glnexus_mem_gb": "Int? (optional)", - "joint.pbsv_call_mem_gb": "Int? (optional)", "joint.default_runtime_attributes": { "max_retries": "Int", "container_registry": "String", diff --git a/workflows/joint/joint.wdl b/workflows/joint/joint.wdl index 6f1f32b3..dac45709 100644 --- a/workflows/joint/joint.wdl +++ b/workflows/joint/joint.wdl @@ -2,9 +2,8 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/glnexus.wdl" as Glnexus -import "../wdl-common/wdl/tasks/pbsv.wdl" as Pbsv +import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools -import "../wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl" as Pbsv_splits workflow joint { meta { @@ -24,8 +23,14 @@ workflow joint { gvcf_indices: { name: "GVCF Indices" } - svsigs: { - name: "SV Signatures" + discover_tars: { + name: "Sawfish discover output tarballs" + } + aligned_bams: { + name: "Aligned BAMs" + } + aligned_bam_indices: { + name: "Aligned BAM Indices" } ref_map_file: { name: "Reference Map File" @@ -33,9 +38,6 @@ workflow joint { glnexus_mem_gb: { name: "GLnexus Memory (GB)" } - pbsv_call_mem_gb: { - name: "PBSV Call Memory (GB)" - } default_runtime_attributes: { name: "Default Runtime Attribute Struct" } @@ -60,63 +62,42 @@ workflow joint { Array[File] gvcfs Array[File] gvcf_indices - Array[File] svsigs + Array[File] discover_tars + Array[File] aligned_bams + Array[File] aligned_bam_indices File ref_map_file Int? glnexus_mem_gb - Int? pbsv_call_mem_gb RuntimeAttributes default_runtime_attributes } Map[String, String] ref_map = read_map(ref_map_file) - call Pbsv_splits.get_pbsv_splits { - input: - pbsv_splits_file = ref_map["pbsv_splits"], # !FileCoercion - default_runtime_attributes = default_runtime_attributes - } - - scatter (shard_index in range(length(get_pbsv_splits.pbsv_splits))) { - Array[String] region_set = get_pbsv_splits.pbsv_splits[shard_index] - - call Pbsv.pbsv_call { - input: - sample_id = family_id + ".joint", - svsigs = svsigs, - sample_count = length(sample_ids), - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - shard_index = shard_index, - regions = region_set, - mem_gb = pbsv_call_mem_gb, - runtime_attributes = default_runtime_attributes - } - } - - # concatenate pbsv vcfs - call Bcftools.concat_pbsv_vcf { + call Sawfish.sawfish_call { input: - vcfs = pbsv_call.vcf, - vcf_indices = pbsv_call.vcf_index, - out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", - runtime_attributes = default_runtime_attributes + discover_tars = discover_tars, + aligned_bams = aligned_bams, + aligned_bam_indices = aligned_bam_indices, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", + runtime_attributes = default_runtime_attributes } - String sv_vcf_basename = basename(concat_pbsv_vcf.concatenated_vcf, ".vcf.gz") + String sv_vcf_basename = basename(sawfish_call.vcf, ".vcf.gz") scatter (sample_id in sample_ids) { String split_sv_vcf_name = "~{sample_id}.~{sv_vcf_basename}.vcf.gz" String split_sv_vcf_index_name = "~{sample_id}.~{sv_vcf_basename}.vcf.gz.tbi" } - call Bcftools.split_vcf_by_sample as split_pbsv { + call Bcftools.split_vcf_by_sample as split_sawfish { input: sample_ids = sample_ids, - vcf = concat_pbsv_vcf.concatenated_vcf, - vcf_index = concat_pbsv_vcf.concatenated_vcf_index, + vcf = sawfish_call.vcf, + vcf_index = sawfish_call.vcf_index, split_vcf_names = split_sv_vcf_name, split_vcf_index_names = split_sv_vcf_index_name, runtime_attributes = default_runtime_attributes @@ -150,8 +131,8 @@ workflow joint { } output { - Array[File] split_joint_structural_variant_vcfs = split_pbsv.split_vcfs - Array[File] split_joint_structural_variant_vcf_indices = split_pbsv.split_vcf_indices + Array[File] split_joint_structural_variant_vcfs = split_sawfish.split_vcfs + Array[File] split_joint_structural_variant_vcf_indices = split_sawfish.split_vcf_indices Array[File] split_joint_small_variant_vcfs = split_glnexus.split_vcfs Array[File] split_joint_small_variant_vcf_indices = split_glnexus.split_vcf_indices } diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index d8fd9a3a..9f81e1d6 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -167,6 +167,7 @@ workflow humanwgs_singleton { 'sv_DEL_count': [downstream.stat_sv_DEL_count], 'sv_INS_count': [downstream.stat_sv_INS_count], 'sv_INV_count': [downstream.stat_sv_INV_count], + 'sv_INVBND_count': [downstream.stat_sv_INVBND_count], 'sv_BND_count': [downstream.stat_sv_BND_count], 'cnv_DUP_count': [upstream.stat_cnv_DUP_count], 'cnv_DEL_count': [upstream.stat_cnv_DEL_count], @@ -262,11 +263,12 @@ workflow humanwgs_singleton { File phased_sv_vcf_index = downstream.phased_sv_vcf_index # sv stats - String stat_sv_DUP_count = downstream.stat_sv_DUP_count - String stat_sv_DEL_count = downstream.stat_sv_DEL_count - String stat_sv_INS_count = downstream.stat_sv_INS_count - String stat_sv_INV_count = downstream.stat_sv_INV_count - String stat_sv_BND_count = downstream.stat_sv_BND_count + String stat_sv_DUP_count = downstream.stat_sv_DUP_count + String stat_sv_DEL_count = downstream.stat_sv_DEL_count + String stat_sv_INS_count = downstream.stat_sv_INS_count + String stat_sv_INV_count = downstream.stat_sv_INV_count + String stat_sv_INVBND_count = downstream.stat_sv_INVBND_count + String stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs File phased_small_variant_vcf = downstream.phased_small_variant_vcf @@ -332,6 +334,6 @@ workflow humanwgs_singleton { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v2.1.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index c2c56dbf..9da7be73 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -3,15 +3,13 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/pbmm2.wdl" as Pbmm2 import "../wdl-common/wdl/tasks/merge_bam_stats.wdl" as MergeBamStats -import "../wdl-common/wdl/tasks/pbsv.wdl" as Pbsv -import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools +import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/workflows/deepvariant/deepvariant.wdl" as DeepVariant import "../wdl-common/wdl/tasks/samtools.wdl" as Samtools import "../wdl-common/wdl/tasks/mosdepth.wdl" as Mosdepth import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/paraphase.wdl" as Paraphase import "../wdl-common/wdl/tasks/hificnv.wdl" as Hificnv -import "../wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl" as Pbsv_splits workflow upstream { meta { @@ -78,13 +76,6 @@ workflow upstream { ref_name = ref_map["name"], runtime_attributes = default_runtime_attributes } - call Pbsv.pbsv_discover { - input: - aligned_bam = pbmm2_align.aligned_bam, - aligned_bam_index = pbmm2_align.aligned_bam_index, - trf_bed = ref_map["pbsv_tandem_repeat_bed"], # !FileCoercion - runtime_attributes = default_runtime_attributes - } } call MergeBamStats.merge_bam_stats { @@ -132,6 +123,19 @@ workflow upstream { default_runtime_attributes = default_runtime_attributes } + call Sawfish.sawfish_discover { + input: + sex = select_first([sex, mosdepth.inferred_sex]), + aligned_bam = aligned_bam_data, + aligned_bam_index = aligned_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}", + expected_male_bed = ref_map["hificnv_expected_bed_male"], # !FileCoercion + expected_female_bed = ref_map["hificnv_expected_bed_female"], # !FileCoercion + runtime_attributes = default_runtime_attributes + } + call Trgt.trgt { input: sample_id = sample_id, @@ -174,35 +178,15 @@ workflow upstream { } if (single_sample) { - call Pbsv_splits.get_pbsv_splits { - input: - pbsv_splits_file = ref_map["pbsv_splits"], # !FileCoercion - default_runtime_attributes = default_runtime_attributes - } - - scatter (shard_index in range(length(get_pbsv_splits.pbsv_splits))) { - Array[String] region_set = get_pbsv_splits.pbsv_splits[shard_index] - - call Pbsv.pbsv_call { - input: - sample_id = sample_id, - svsigs = pbsv_discover.svsig, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - shard_index = shard_index, - regions = region_set, - runtime_attributes = default_runtime_attributes - } - } - - # concatenate pbsv vcfs - call Bcftools.concat_pbsv_vcf { - input: - vcfs = pbsv_call.vcf, - vcf_indices = pbsv_call.vcf_index, - out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", - runtime_attributes = default_runtime_attributes + call Sawfish.sawfish_call { + input: + discover_tars = [sawfish_discover.discover_tar], + aligned_bams = [aligned_bam_data], + aligned_bam_indices = [aligned_bam_index], + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", + runtime_attributes = default_runtime_attributes } } @@ -229,13 +213,12 @@ workflow upstream { String inferred_sex = mosdepth.inferred_sex String stat_mean_depth = mosdepth.stat_mean_depth - # per movie sv signatures - # if we've already called variants, no need to keep these - Array[File] svsigs = if single_sample then [] else pbsv_discover.svsig + # per sample sv signatures + File discover_tar = sawfish_discover.discover_tar - # pbsv outputs for single sample - File? sv_vcf = concat_pbsv_vcf.concatenated_vcf - File? sv_vcf_index = concat_pbsv_vcf.concatenated_vcf_index + # sawfish outputs for single sample + File? sv_vcf = sawfish_call.vcf + File? sv_vcf_index = sawfish_call.vcf_index # small variant outputs File small_variant_vcf = deepvariant.vcf diff --git a/workflows/wdl-common b/workflows/wdl-common index 7dc8a8ca..e503a1ca 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 7dc8a8cab22fcd8b5e4c68fd55afdf9630c3dc3c +Subproject commit e503a1ca877f6ab6a72bbf4a2d48dfedf7c9fdec From 77098082d2370c0b5302a16e910c008d07630800 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 11 Feb 2025 18:46:14 -0800 Subject: [PATCH 02/61] Update docs to replace pbsv with Sawfish. --- docs/family.md | 9 +++++---- docs/singleton.md | 6 +++--- docs/tools_containers.md | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/family.md b/docs/family.md index a9fa639c..f7b59e04 100644 --- a/docs/family.md +++ b/docs/family.md @@ -27,7 +27,7 @@ flowchart TD subgraph "`**Upstream of Phasing (per-sample)**`" subgraph "per-movie" ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] - pbmm2_align --> pbsv_discover["PBSV discover"] + pbmm2_align --> sawfish_discover["Sawfish discover"] end pbmm2_align --> merge_read_stats["merge read statistics"] pbmm2_align --> samtools_merge["samtools merge"] @@ -41,14 +41,14 @@ flowchart TD end subgraph "`**Joint Calling**`" deepvariant --> glnexus["GLnexus (joint-call small variants)"] - pbsv_discover --> pbsv_call["PBSV call"] + sawfish_discover --> sawfish_call["Sawfish call"] glnexus --> split_glnexus["split small variant vcf by sample"] - pbsv_call --> split_pbsv["split SV vcf by sample"] + sawfish_call --> split_sawfish["split SV vcf by sample"] end subgraph "`**Phasing and Downstream (per-sample)**`" split_glnexus --> hiphase trgt --> hiphase - split_pbsv --> hiphase + split_sawfish --> hiphase hiphase --> bcftools_roh["bcftools roh"] hiphase --> bcftools_stats["bcftools stats\n(small variants)"] hiphase --> sv_stats["SV stats"] @@ -167,6 +167,7 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Array\[String\] | stat_sv_DEL_count | Structural variant DEL count | (PASS variants) | | Array\[String\] | stat_sv_INS_count | Structural variant INS count | (PASS variants) | | Array\[String\] | stat_sv_INV_count | Structural variant INV count | (PASS variants) | +| Array\[String\] | stat_sv_INVBND_count | Structural variant INVBND count | (PASS variants) | | Array\[String\] | stat_sv_BND_count | Structural variant BND count | (PASS variants) | | Array\[File\] | bcftools_roh_out | ROH calling | `bcftools roh` | | Array\[File\] | bcftools_roh_bed | Generated from above, without filtering | | diff --git a/docs/singleton.md b/docs/singleton.md index f5b9b7e6..823ac121 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -25,7 +25,7 @@ flowchart TD subgraph "`**Upstream of Phasing**`" subgraph "per-movie" ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] - pbmm2_align --> pbsv_discover["PBSV discover"] + pbmm2_align --> sawfish_discover["Sawfish discover"] end pbmm2_align --> merge_read_stats["merge read statistics"] pbmm2_align --> samtools_merge["samtools merge"] @@ -35,8 +35,7 @@ flowchart TD samtools_merge --> trgt["TRGT"] samtools_merge --> trgt_dropouts["TR coverage dropouts"] samtools_merge --> deepvariant["DeepVariant"] - samtools_merge --> hiphase["HiPhase"] - pbsv_discover --> pbsv_call["PBSV call"] + sawfish_discover --> sawfish_call["Sawfish call"] end subgraph "`**Phasing and Downstream**`" deepvariant --> hiphase @@ -130,6 +129,7 @@ flowchart TD | String | stat_sv_DEL_count | Structural variant DEL count | (PASS variants) | | String | stat_sv_INS_count | Structural variant INS count | (PASS variants) | | String | stat_sv_INV_count | Structural variant INV count | (PASS variants) | +| String | stat_sv_INVBND_count | Structural variant INVBND count | (PASS variants) | | String | stat_sv_BND_count | Structural variant BND count | (PASS variants) | | File | bcftools_roh_out | ROH calling | `bcftools roh` | | File | bcftools_roh_bed | Generated from above, without filtering | | diff --git a/docs/tools_containers.md b/docs/tools_containers.md index bd6026d2..126bf611 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -13,7 +13,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pb_wdl_base | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/pb_wdl_base) | [sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87](https://quay.io/repository/pacbio/pb_wdl_base/manifest/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87) | | pbmm2 | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | -| pbsv | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/89fe9bce4e056f487fa665f53ba1e9253701124e/docker/pbsv) | [pbsv@sha256:2134be37f71b5b2cb41f364736fe5ea14cf8e70403ba41af5005ce50b64086e4](https://quay.io/repository/pacbio/pbsv/manifest/sha256:2134be37f71b5b2cb41f364736fe5ea14cf8e70403ba41af5005ce50b64086e4) | +| sawfish | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/74325e0c73d53da2fef43d01ce9d0c7d0cd49c7a/docker/sawfish) | [sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b](https://quay.io/repository/pacbio/sawfish/manifest/sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b) | | trgt | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/641ed67d29128381f27daeca9936fbc1e41bf58d/docker/trgt) | [trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b](https://quay.io/repository/pacbio/trgt/manifest/sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b) | | hiphase | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/1051d12818e165a2145526e0b58f0ed0d0dc023a/docker/hiphase) | [hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad](https://quay.io/repository/pacbio/hiphase/manifest/sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad) | | hificnv | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | From a038bed76abdbb34107277c28e90c3385d52959b Mon Sep 17 00:00:00 2001 From: William Rowell Date: Wed, 19 Feb 2025 11:21:21 -0800 Subject: [PATCH 03/61] Remove INFO fields absent in sawfish VCFs. --- wdl-ci.config.json | 8 ++++---- workflows/tertiary/tertiary.wdl | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 7af2bc10..c1d437f5 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -121,7 +121,7 @@ "tests": [ { "inputs": { - "sv_vcf": "${resources_file_path}/svpack_filter_annotated/input/HG002-trio.GRCh38.structural_variants.vcf.gz", + "sv_vcf": "${resources_file_path}/svpack_filter_annotated/input/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.vcf.gz", "pedigree": "${resources_file_path}/svpack_filter_annotated/input/HG002-trio.ped", "population_vcfs": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz", @@ -136,7 +136,7 @@ }, "output_tests": { "svpack_vcf": { - "value": "${resources_file_path}/svpack_filter_annotated/output/HG002-trio.GRCh38.structural_variants.svpack.vcf.gz", + "value": "${resources_file_path}/svpack_filter_annotated/output/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -153,7 +153,7 @@ "tests": [ { "inputs": { - "filtered_vcf": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio.GRCh38.structural_variants.svpack.vcf.gz", + "filtered_vcf": "${resources_file_path}/slivar_svpack_tsv/input/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.vcf.gz", "pedigree": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio.ped", "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt", "clinvar_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt", @@ -162,7 +162,7 @@ }, "output_tests": { "svpack_tsv": { - "value": "${resources_file_path}/slivar_svpack_tsv/output/HG002-trio.GRCh38.structural_variants.svpack.tsv", + "value": "${resources_file_path}/slivar_svpack_tsv/output/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", diff --git a/workflows/tertiary/tertiary.wdl b/workflows/tertiary/tertiary.wdl index 3dce4dac..224ccfe4 100644 --- a/workflows/tertiary/tertiary.wdl +++ b/workflows/tertiary/tertiary.wdl @@ -580,8 +580,6 @@ task slivar_svpack_tsv { Array[String] info_fields = [ 'SVTYPE', 'SVLEN', - 'SVANN', - 'CIPOS', 'MATEID', 'END' ] From e465b99d8ea03b048f964f4a1d6ff581b5bb23b6 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2025 22:10:10 +0000 Subject: [PATCH 04/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index c1d437f5..033ae14f 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -149,7 +149,7 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "nyawjsg47zrbjodngljbicaww2mqsspc", + "digest": "mdnthjkki3cj62rev5nl7le3nv4cgzbt", "tests": [ { "inputs": { From 57e25c10f6109061c3e6a567261e71c0bee64348 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 11 Feb 2025 22:16:59 -0800 Subject: [PATCH 05/61] Update to DeepVariant 1.8.0. - remove the deepvariant_version option and hard-code the default into the subworkflow - remove the custom_deepvariant_model_tar option - update the version to 1.8.0 - modify task command calls to 1.8.0 options - use generic tfrecord filenames Remove the pharmcat_version option and provide the default directly within the subworkflow. --- docs/tools_containers.md | 4 +- scripts/create_image_manifest.sh | 4 +- wdl-ci.config.json | 88 ++++++++++++++++------------- workflows/downstream/downstream.wdl | 5 -- workflows/downstream/inputs.json | 1 - workflows/family.inputs.json | 3 - workflows/family.wdl | 19 ------- workflows/singleton.inputs.json | 3 - workflows/singleton.wdl | 19 ------- workflows/upstream/inputs.json | 2 - workflows/upstream/upstream.wdl | 11 ---- 11 files changed, 53 insertions(+), 106 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 126bf611..f9c3494b 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -23,7 +23,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | | slivar |
  • slivar 0.3.1
  • `/opt/scripts/add_comphet_phase.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/5e1094fd6755203b4971fdac6dcb951bbc098bed/docker/slivar) | [slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa](https://quay.io/repository/pacbio/slivar/manifest/sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa) | | svpack |
  • svpack 54b54db
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6fc750b0c65b4a5c1eb65791eab9eed89864d858/docker/svpack) | [svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba](https://quay.io/repository/pacbio/svpack/manifest/sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba) | -| deepvariant |
  • DeepVariant 1.6.1
| | [deepvariant:1.6.1](https://hub.docker.com/layers/google/deepvariant/1.6.1/images/sha256-ccab95548e6c3ec28c75232987f31209ff1392027d67732435ce1ba3d0b55c68) | -| deepvariant-gpu |
  • DeepVariant 1.6.1
| | [deepvariant:1.6.1-gpu](https://hub.docker.com/layers/google/deepvariant/1.6.1-gpu/images/sha256-7929c55106d3739daa18d52802913c43af4ca2879db29656056f59005d1d46cb) | +| deepvariant |
  • DeepVariant 1.8.0
| | [deepvariant:1.8.0](https://hub.docker.com/layers/google/deepvariant/1.8.0/images/sha256-eb223b3c487be43d34cc7b08c906b9c558d195716e10672db1bd6e910dc3a00a) | +| deepvariant-gpu |
  • DeepVariant 1.8.0
| | [deepvariant:1.8.0-gpu](https://hub.docker.com/layers/google/deepvariant/1.8.0-gpu/images/sha256-892a6a8a92865f40229ee7fcaeea48f61673d8f8273a643b1f55c4fe0543d3c7) | | pharmcat |
  • PharmCat 2.15.4
| | [pharmcat:2.15.4](https://hub.docker.com/layers/pgkb/pharmcat/2.15.4/images/sha256-5b58ae959b4cd85986546c2d67e3596f33097dedc40dfe57dd845b6e78781eb6) | | glnexus |
  • GLnexus 1.4.3
| | [glnexus:1.4.3](https://quay.io/repository/pacbio/glnexus/manifest/sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e) | diff --git a/scripts/create_image_manifest.sh b/scripts/create_image_manifest.sh index a1704f82..66f23975 100644 --- a/scripts/create_image_manifest.sh +++ b/scripts/create_image_manifest.sh @@ -10,9 +10,9 @@ grep '@sha' -h -r workflows/ \ | sort --unique \ > ./image_manifest.txt -deepvariant_version=$(grep -m1 'String deepvariant_version' workflows/singleton.wdl | tr -s ' ' | cut -f5 -d' ' | sed 's/"//g') +deepvariant_version=1.8.0 echo "google/deepvariant:${deepvariant_version}" >> ./image_manifest.txt echo "google/deepvariant:${deepvariant_version}-gpu" >> ./image_manifest.txt -pharmcat_version=$(grep -m1 'String pharmcat_version' workflows/singleton.wdl | tr -s ' ' | cut -f5 -d' ' | sed 's/"//g') +pharmcat_version=2.15.4 echo "pgkb/pharmcat:${pharmcat_version}" >> ./image_manifest.txt \ No newline at end of file diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 033ae14f..102c861d 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -2075,7 +2075,7 @@ "tasks": { "deepvariant_make_examples": { "key": "deepvariant_make_examples", - "digest": "54kvpa3bz3cciywbjcwtjyb5k4ifpzhc", + "digest": "", "tests": [ { "inputs": { @@ -2091,19 +2091,19 @@ "task_start_index": 0, "tasks_per_shard": 8, "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1", + "docker_image": "google/deepvariant:1.8.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "example_tfrecord_tar": { - "value": "${resources_file_path}/deepvariant_make_examples/output/shard_0/HG002.0.example_tfrecords.tar.gz", + "value": "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" ] }, "nonvariant_site_tfrecord_tar": { - "value": "${resources_file_path}/deepvariant_make_examples/output/shard_0/HG002.0.nonvariant_site_tfrecords.tar.gz", + "value": "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.nonvariant_site_tfrecords.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -2115,29 +2115,29 @@ }, "deepvariant_call_variants_cpu": { "key": "deepvariant_call_variants_cpu", - "digest": "vapflxnbarkmdemttze7e6f4svrtkpmw", + "digest": "", "tests": [ { "inputs": { "sample_id": "HG002", "ref_name": "${ref_name}", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_cpu/input/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.example_tfrecords.tar.gz" ], "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1", + "docker_image": "google/deepvariant:1.8.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "tfrecords_tar": { - "value": "${resources_file_path}/deepvariant_call_variants_cpu/output/HG002/HG002.GRCh38.call_variants_output.tar.gz", + "value": "${resources_file_path}/deepvariant_call_variants_cpu/output/v1p8p0/HG002.GRCh38.call_variants_output.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -2149,29 +2149,29 @@ }, "deepvariant_call_variants_gpu": { "key": "deepvariant_call_variants_gpu", - "digest": "4tcavedqaa6xpqet6ip4f5jxvrucjdcg", + "digest": "", "tests": [ { "inputs": { "sample_id": "HG002", "ref_name": "${ref_name}", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_call_variants_gpu/input/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.example_tfrecords.tar.gz" ], "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1-gpu", + "docker_image": "google/deepvariant:1.8.0-gpu", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "tfrecords_tar": { - "value": "${resources_file_path}/deepvariant_call_variants_gpu/output/HG002/HG002.GRCh38.call_variants_output.tar.gz", + "value": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p8p0/HG002.GRCh38.call_variants_output.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -2183,32 +2183,42 @@ }, "deepvariant_postprocess_variants": { "key": "deepvariant_postprocess_variants", - "digest": "xrdc7ay2sjrniggltopieawihwkunoil", + "digest": "", "tests": [ { "inputs": { "sample_id": "HG002", - "tfrecords_tar": "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.GRCh38.call_variants_output.tar.gz", + "tfrecords_tar": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p8p0/HG002.GRCh38.call_variants_output.tar.gz", + "example_tfrecord_tars": [ + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.example_tfrecords.tar.gz" + ], "nonvariant_site_tfrecord_tars": [ - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.0.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.8.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.16.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.24.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.32.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.40.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.48.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_postprocess_variants/input/HG002.56.nonvariant_site_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.nonvariant_site_tfrecords.tar.gz" ], "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", "ref_name": "${ref_name}", "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.6.1", + "docker_image": "google/deepvariant:1.8.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "vcf": { - "value": "${resources_file_path}/deepvariant_postprocess_variants/output/HG002/HG002.GRCh38.small_variants.vcf.gz", + "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p8p0/HG002.GRCh38.small_variants.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -2216,7 +2226,7 @@ ] }, "gvcf": { - "value": "${resources_file_path}/deepvariant_postprocess_variants/output/HG002/HG002.GRCh38.small_variants.g.vcf.gz", + "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p8p0/HG002.GRCh38.small_variants.g.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index 829f527e..7971521e 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -41,9 +41,6 @@ workflow downstream { aligned_bam_index: { name: "Aligned BAI" } - pharmcat_version: { - name: "PharmCAT version" - } pharmcat_min_coverage: { name: "Minimum coverage for PharmCAT" } @@ -68,7 +65,6 @@ workflow downstream { File aligned_bam File aligned_bam_index - String pharmcat_version Int pharmcat_min_coverage File ref_map_file @@ -162,7 +158,6 @@ workflow downstream { input_tsvs = [pbstarphase_diplotype.pharmcat_tsv], ref_fasta = ref_map["fasta"], # !FileCoercion ref_index = ref_map["fasta_index"], # !FileCoercion - pharmcat_version = pharmcat_version, pharmcat_positions = ref_map["pharmcat_positions_vcf"], # !FileCoercion pharmcat_positions_index = ref_map["pharmcat_positions_vcf_index"], # !FileCoercion pharmcat_min_coverage = pharmcat_min_coverage, diff --git a/workflows/downstream/inputs.json b/workflows/downstream/inputs.json index 80474908..7bbf1517 100644 --- a/workflows/downstream/inputs.json +++ b/workflows/downstream/inputs.json @@ -8,7 +8,6 @@ "downstream.trgt_vcf_index": "File", "downstream.aligned_bam": "File", "downstream.aligned_bam_index": "File", - "downstream.pharmcat_version": "String", "downstream.pharmcat_min_coverage": "Int", "downstream.ref_map_file": "File", "downstream.default_runtime_attributes": { diff --git a/workflows/family.inputs.json b/workflows/family.inputs.json index 90bcb20f..cf47370c 100644 --- a/workflows/family.inputs.json +++ b/workflows/family.inputs.json @@ -16,9 +16,6 @@ }, "humanwgs_family.phenotypes": "String? (optional)", "humanwgs_family.ref_map_file": "File", - "humanwgs_family.deepvariant_version": "String (optional, default = \"1.6.1\")", - "humanwgs_family.custom_deepvariant_model_tar": "File? (optional)", - "humanwgs_family.pharmcat_version": "String (optional, default = \"2.15.0\")", "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.tertiary_map_file": "File? (optional)", "humanwgs_family.glnexus_mem_gb": "Int? (optional)", diff --git a/workflows/family.wdl b/workflows/family.wdl index 7324b796..0b612805 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -24,15 +24,6 @@ workflow humanwgs_family { ref_map_file: { name: "TSV containing reference genome file paths; must match backend" } - deepvariant_version: { - name: "DeepVariant version" - } - custom_deepvariant_model_tar: { - name: "Custom DeepVariant model tarball" - } - pharmcat_version: { - name: "PharmCAT version" - } pharmcat_min_coverage: { name: "Minimum coverage for PharmCAT" } @@ -74,13 +65,6 @@ workflow humanwgs_family { File ref_map_file - # These options are only intended for testing purposes. - # There is no guarantee that the pipeline will work with - # other version of DeepVariant or with custom models. - String deepvariant_version = "1.6.1" - File? custom_deepvariant_model_tar - - String pharmcat_version = "2.15.4" Int pharmcat_min_coverage = 10 String phenotypes = "HP:0000001" @@ -123,8 +107,6 @@ workflow humanwgs_family { sex = sample.sex, hifi_reads = sample.hifi_reads, ref_map_file = ref_map_file, - deepvariant_version = deepvariant_version, - custom_deepvariant_model_tar = custom_deepvariant_model_tar, single_sample = single_sample, gpu = gpu, default_runtime_attributes = default_runtime_attributes @@ -159,7 +141,6 @@ workflow humanwgs_family { trgt_vcf_index = upstream.trgt_vcf_index[sample_index], aligned_bam = upstream.out_bam[sample_index], aligned_bam_index = upstream.out_bam_index[sample_index], - pharmcat_version = pharmcat_version, pharmcat_min_coverage = pharmcat_min_coverage, ref_map_file = ref_map_file, default_runtime_attributes = default_runtime_attributes diff --git a/workflows/singleton.inputs.json b/workflows/singleton.inputs.json index 472b8867..57f5e614 100644 --- a/workflows/singleton.inputs.json +++ b/workflows/singleton.inputs.json @@ -6,9 +6,6 @@ ], "humanwgs_singleton.phenotypes": "String? (optional)", "humanwgs_singleton.ref_map_file": "File", - "humanwgs_singleton.deepvariant_version": "String (optional, default = \"1.6.1\")", - "humanwgs_singleton.custom_deepvariant_model_tar": "File? (optional)", - "humanwgs_singleton.pharmcat_version": "String (optional, default = \"2.15.0\")", "humanwgs_singleton.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_singleton.tertiary_map_file": "File? (optional)", "humanwgs_singleton.gpu": "Boolean (optional, default = false)", diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 9f81e1d6..15e52dc7 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -28,15 +28,6 @@ workflow humanwgs_singleton { ref_map_file: { name: "TSV containing reference genome file paths; must match backend" } - deepvariant_version: { - name: "DeepVariant version" - } - custom_deepvariant_model_tar: { - name: "Custom DeepVariant model tarball" - } - pharmcat_version: { - name: "PharmCAT version" - } pharmcat_min_coverage: { name: "Minimum coverage for PharmCAT" } @@ -78,13 +69,6 @@ workflow humanwgs_singleton { File ref_map_file - # These options are only intended for testing purposes. - # There is no guarantee that the pipeline will work with - # other version of DeepVariant or with custom models. - String deepvariant_version = "1.6.1" - File? custom_deepvariant_model_tar - - String pharmcat_version = "2.15.4" Int pharmcat_min_coverage = 10 String phenotypes = "HP:0000001" @@ -119,8 +103,6 @@ workflow humanwgs_singleton { sex = sex, hifi_reads = hifi_reads, ref_map_file = ref_map_file, - deepvariant_version = deepvariant_version, - custom_deepvariant_model_tar = custom_deepvariant_model_tar, single_sample = true, gpu = gpu, default_runtime_attributes = default_runtime_attributes @@ -137,7 +119,6 @@ workflow humanwgs_singleton { trgt_vcf_index = upstream.trgt_vcf_index, aligned_bam = upstream.out_bam, aligned_bam_index = upstream.out_bam_index, - pharmcat_version = pharmcat_version, pharmcat_min_coverage = pharmcat_min_coverage, ref_map_file = ref_map_file, default_runtime_attributes = default_runtime_attributes diff --git a/workflows/upstream/inputs.json b/workflows/upstream/inputs.json index ac0324b3..5174d8d2 100644 --- a/workflows/upstream/inputs.json +++ b/workflows/upstream/inputs.json @@ -3,8 +3,6 @@ "upstream.sex": "String? (optional)", "upstream.hifi_reads": "Array[File]", "upstream.ref_map_file": "File", - "upstream.deepvariant_version": "String", - "upstream.custom_deepvariant_model_tar": "File? (optional)", "upstream.single_sample": "Boolean (optional, default = false)", "upstream.gpu": "Boolean", "upstream.default_runtime_attributes": { diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index 9da7be73..11efaff5 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -30,12 +30,6 @@ workflow upstream { ref_map_file: { name: "TSV containing reference genome information" } - deepvariant_version: { - name: "DeepVariant version" - } - custom_deepvariant_model_tar: { - name: "Custom DeepVariant model tarball" - } single_sample: { name: "Single sample workflow" } @@ -54,9 +48,6 @@ workflow upstream { File ref_map_file - String deepvariant_version - File? custom_deepvariant_model_tar - Boolean single_sample = false Boolean gpu @@ -117,8 +108,6 @@ workflow upstream { ref_fasta = ref_map["fasta"], # !FileCoercion ref_index = ref_map["fasta_index"], # !FileCoercion ref_name = ref_map["name"], - deepvariant_version = deepvariant_version, - custom_deepvariant_model_tar = custom_deepvariant_model_tar, gpu = gpu, default_runtime_attributes = default_runtime_attributes } From 0618d4d96b95a253e8cdec476d0f58f62f91654c Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 13 Feb 2025 22:25:30 +0000 Subject: [PATCH 06/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 102c861d..4244be5a 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -2075,7 +2075,7 @@ "tasks": { "deepvariant_make_examples": { "key": "deepvariant_make_examples", - "digest": "", + "digest": "kenkd6hpkprqjxzrr67ws7jsyflofniu", "tests": [ { "inputs": { @@ -2115,7 +2115,7 @@ }, "deepvariant_call_variants_cpu": { "key": "deepvariant_call_variants_cpu", - "digest": "", + "digest": "7atkl2o6dbmjxf7uxae2bjnfnt4lwan4", "tests": [ { "inputs": { @@ -2149,7 +2149,7 @@ }, "deepvariant_call_variants_gpu": { "key": "deepvariant_call_variants_gpu", - "digest": "", + "digest": "nsrzbzbmry52xlmgtq3nokymfcynkesh", "tests": [ { "inputs": { @@ -2183,7 +2183,7 @@ }, "deepvariant_postprocess_variants": { "key": "deepvariant_postprocess_variants", - "digest": "", + "digest": "nx7xlzdxdgso2oviegbruwywwd7cjdgl", "tests": [ { "inputs": { From c03d62c348d6112f767cd446a9e8937e10f8daaf Mon Sep 17 00:00:00 2001 From: William Rowell Date: Thu, 20 Feb 2025 13:01:44 -0800 Subject: [PATCH 07/61] Update to CoLoRSdb v1.2.0 --- GRCh38.tertiary_map.v2p0p0.template.tsv | 6 +++--- .../GRCh38.tertiary_map.v2p0p0.aws.tsv | 6 +++--- .../azure/GRCh38.tertiary_map.v2p0p0.azure.tsv | 14 +++++++------- backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv | 6 +++--- backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv | 6 +++--- docs/tertiary.md | 2 +- wdl-ci.config.json | 6 +++--- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/GRCh38.tertiary_map.v2p0p0.template.tsv b/GRCh38.tertiary_map.v2p0p0.template.tsv index 86b7b3b6..af86967d 100644 --- a/GRCh38.tertiary_map.v2p0p0.template.tsv +++ b/GRCh38.tertiary_map.v2p0p0.template.tsv @@ -2,11 +2,11 @@ slivar_js /hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js ensembl_gff /hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz lof_lookup /hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt clinvar_lookup /hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip +slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi \ No newline at end of file +svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi \ No newline at end of file diff --git a/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv index 8bbb9b1d..a8770690 100644 --- a/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv +++ b/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv @@ -2,11 +2,11 @@ slivar_js s3:///hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v ensembl_gff s3:///hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz lof_lookup s3:///hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt clinvar_lookup s3:///hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip +slivar_gnotate_files s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi +svpack_pop_vcfs s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv b/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv index f3ce0508..3743aaed 100644 --- a/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv +++ b/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv @@ -1,12 +1,12 @@ -slivar_js https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip +slivar_js /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt +clinvar_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +slivar_gnotate_files /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi +svpack_pop_vcfs /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv b/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv index bf5d42a9..1e916b9c 100644 --- a/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv +++ b/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv @@ -2,11 +2,11 @@ slivar_js gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2 ensembl_gff gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip +slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi +svpack_pop_vcfs gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv b/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv index f7a2d2f3..3a2f37dc 100644 --- a/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv +++ b/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv @@ -2,11 +2,11 @@ slivar_js /hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js ensembl_gff /hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz lof_lookup /hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt clinvar_lookup /hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip +slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi +svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/docs/tertiary.md b/docs/tertiary.md index ff158789..2442ed69 100644 --- a/docs/tertiary.md +++ b/docs/tertiary.md @@ -8,7 +8,7 @@ This is a simple, opinionated subworkflow for tertiary analysis in rare disease - We generate a pedigree describing sample relationships and phenotype status, based on the input provided to the entrypoint workflow. In the case of a singleton, the pedigree is a single row. - Using the comma-delimited list of HPO terms provided to the entrypoint workflow, we generate a Phenotype Rank (Phrank) lookup table, a simple two column lookup table mapping gene symbols to Phrank score. Phrank scores are positive real numbers (or null) such that higher scores indicate a gene is more likely to be relevant to the phenotypes. The Phrank lookup is used to prioritize variants based on the predicted impact on the gene and the gene's relevance to the phenotype. Phrank scores are not normalized, and providing more phenotypes for a sample will result in a higher maximum Phrank score. - Reference data is provided by the [`ref_map_file`](./ref_map.md) input. This workflow is currently only compatible with the GRCh38 human reference. -- Population data, other supplemental data, and allele thresholds are provided by the [`tertiary_map_file`](./tertiary_map.md) input. We provide a version of this file that uses population data from [gnomAD v4.1](https://gnomad.broadinstitute.org/news/2024-05-gnomad-v4-1-updates/) and [CoLoRSdb](https://colorsdb.org) v1.1.0 [10.5281/zenodo.13145123](https://zenodo.org/records/13145123). We provide the ability to tweak the allele thresholds, but the default values are recommended, as increasing these will result in much higher resource usage. +- Population data, other supplemental data, and allele thresholds are provided by the [`tertiary_map_file`](./tertiary_map.md) input. We provide a version of this file that uses population data from [gnomAD v4.1](https://gnomad.broadinstitute.org/news/2024-05-gnomad-v4-1-updates/) and [CoLoRSdb](https://colorsdb.org) v1.2.0 [10.5281/zenodo.14814308](https://zenodo.org/records/14814308). We provide the ability to tweak the allele thresholds, but the default values are recommended, as increasing these will result in much higher resource usage. ## Process diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 4244be5a..746805b4 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -53,7 +53,7 @@ "slivar_js": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js", "gnotate_files": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip", - "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.1.0.deepvariant.glnexus.zip" + "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip" ], "af_expr": [ "INFO.gnomad_af <= 0.03", @@ -125,11 +125,11 @@ "pedigree": "${resources_file_path}/svpack_filter_annotated/input/HG002-trio.ped", "population_vcfs": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz", - "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz" + "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz" ], "population_vcf_indices": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi", - "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.1.0.pbsv.jasmine.vcf.gz.tbi" + "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi" ], "gff": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz", "runtime_attributes": "${default_runtime_attributes}" From a818b97307bbcdaf61e03c1d152a973db4d4ed07 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Thu, 20 Feb 2025 13:06:41 -0800 Subject: [PATCH 08/61] Force tests. --- wdl-ci.config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 746805b4..fc5695d2 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,7 +37,7 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "nz7zrdaatasxka5rziksdmc2oq4tr23z", + "digest": "", "tests": [ { "inputs": { @@ -117,7 +117,7 @@ }, "svpack_filter_annotated": { "key": "svpack_filter_annotated", - "digest": "lljobpfqb23lu2zablgfstcozrrny5xt", + "digest": "", "tests": [ { "inputs": { From 41401022b5ac91a48616a921a92bd84c54be94a5 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Feb 2025 21:15:50 +0000 Subject: [PATCH 09/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index fc5695d2..746805b4 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,7 +37,7 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "", + "digest": "nz7zrdaatasxka5rziksdmc2oq4tr23z", "tests": [ { "inputs": { @@ -117,7 +117,7 @@ }, "svpack_filter_annotated": { "key": "svpack_filter_annotated", - "digest": "", + "digest": "lljobpfqb23lu2zablgfstcozrrny5xt", "tests": [ { "inputs": { From f0c065877dbc62d9915c81ad07892c92cca61ba3 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 21 Feb 2025 13:41:12 -0800 Subject: [PATCH 10/61] Update docs to refer to new reference bundle on Zenodo. --- README.md | 2 +- docs/backend-hpc.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index dd7b1fca..a8b3792b 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ At a high level, we have two types of inputs files: The resource bundle containing the GRCh38 reference and other files used in this workflow can be downloaded from Zenodo: -[10.5281/zenodo.14027047](https://zenodo.org/records/14027047) +[10.5281/zenodo.14908106](https://zenodo.org/records/14908106) # Tool versions and Docker images diff --git a/docs/backend-hpc.md b/docs/backend-hpc.md index e7c2883e..5ec63247 100644 --- a/docs/backend-hpc.md +++ b/docs/backend-hpc.md @@ -56,14 +56,14 @@ cromwell run workflows/singleton.wdl --input ## Reference data bundle -[10.5281/zenodo.14027047](https://zenodo.org/records/14027047) +[10.5281/zenodo.14908106](https://zenodo.org/records/14908106) -Reference data is hosted on Zenodo at [10.5281/zenodo.14027047](https://zenodo.org/record/14027047). Download the reference data bundle and extract it to a location on your HPC, then update the input template file with the path to the reference data. +Reference data is hosted on Zenodo at [10.5281/zenodo.14908106](https://zenodo.org/record/14908106). Download the reference data bundle and extract it to a location on your HPC, then update the input template file with the path to the reference data. ```bash ## download the reference data bundle -wget https://zenodo.org/record/14027047/files/hifi-wdl-resources-v2.0.0.tar +wget https://zenodo.org/record/14908106/files/hifi-wdl-resources-v2.1.0.tar ## extract the reference data bundle and rename as dataset -tar -xvf hifi-wdl-resources-v2.0.0.tar +tar -xvf hifi-wdl-resources-v2.1.0.tar ``` From db51534f25d03da87ab88b14f0ef5d699927ef6d Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 7 Feb 2025 17:38:38 -0800 Subject: [PATCH 11/61] Replace json->pedigree with simple WDL native functions. PEDigree file syntax is generated directly within WDL for each sample independently. PED files are written within tasks by `write_tsv()` by concatenating sample lines. `is_duo_kid` and `is_trio_kid` arrays are generated for convenience in preparation for duo-/trio-specific secondary/tertiary analyses. Move phenotype lookup to tertiary subworkflow. Adjust tests for changes to write_phrank. Fix write_phrank test. If sex cannot be inferred by mosdepth, output is empty string. Change `pedigree_sex["UNKNOWN"]` to `pedigree_sex[""]`. --- wdl-ci.config.json | 348 ++++++++------------------------ workflows/family.wdl | 41 ++-- workflows/singleton.wdl | 30 +-- workflows/tertiary/tertiary.wdl | 68 ++++--- 4 files changed, 171 insertions(+), 316 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 746805b4..042cab3a 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,13 +37,38 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "nz7zrdaatasxka5rziksdmc2oq4tr23z", + "digest": "pssprgcwn5b4liynj6ujl4poz6xx6tay", "tests": [ { "inputs": { "vcf": "${resources_file_path}/slivar_small_variant/input/HG002-trio.GRCh38.small_variants.vcf.gz", "vcf_index": "${resources_file_path}/slivar_small_variant/input/HG002-trio.GRCh38.small_variants.vcf.gz.tbi", - "pedigree": "${resources_file_path}/slivar_small_variant/input/HG002-trio.ped", + "sample_metadata": [ + [ + "HG002-trio", + "HG002", + "HG003", + "HG004", + "1", + "2" + ], + [ + "HG002-trio", + "HG003", + ".", + ".", + "1", + "1" + ], + [ + "HG002-trio", + "HG004", + ".", + ".", + "2", + "1" + ] + ], "phrank_lookup": "${resources_file_path}/slivar_small_variant/input/HG002-trio_phrank.tsv", "reference": "${ref_fasta}", "reference_index": "${ref_index}", @@ -117,12 +142,37 @@ }, "svpack_filter_annotated": { "key": "svpack_filter_annotated", - "digest": "lljobpfqb23lu2zablgfstcozrrny5xt", + "digest": "62s5ilqjjfenj2earblxjez57cca6xrp", "tests": [ { "inputs": { "sv_vcf": "${resources_file_path}/svpack_filter_annotated/input/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.vcf.gz", - "pedigree": "${resources_file_path}/svpack_filter_annotated/input/HG002-trio.ped", + "sample_metadata": [ + [ + "HG002-trio", + "HG002", + "HG003", + "HG004", + "1", + "2" + ], + [ + "HG002-trio", + "HG003", + ".", + ".", + "1", + "1" + ], + [ + "HG002-trio", + "HG004", + ".", + ".", + "2", + "1" + ] + ], "population_vcfs": [ "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz", "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz" @@ -149,12 +199,37 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "mdnthjkki3cj62rev5nl7le3nv4cgzbt", + "digest": "bm3dufvjozq4pr2dkopvk3htphiruyjn", "tests": [ { "inputs": { "filtered_vcf": "${resources_file_path}/slivar_svpack_tsv/input/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.vcf.gz", - "pedigree": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio.ped", + "sample_metadata": [ + [ + "HG002-trio", + "HG002", + "HG003", + "HG004", + "1", + "2" + ], + [ + "HG002-trio", + "HG003", + ".", + ".", + "1", + "1" + ], + [ + "HG002-trio", + "HG004", + ".", + ".", + "2", + "1" + ] + ], "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt", "clinvar_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt", "phrank_lookup": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio_phrank.tsv", @@ -1783,272 +1858,23 @@ } } }, - "workflows/wdl-common/wdl/tasks/write_ped_phrank.wdl": { - "key": "workflows/wdl-common/wdl/tasks/write_ped_phrank.wdl", + "workflows/wdl-common/wdl/tasks/write_phrank.wdl": { + "key": "workflows/wdl-common/wdl/tasks/write_phrank.wdl", "name": "", "description": "", "tasks": { - "write_ped_phrank": { - "key": "write_ped_phrank", - "digest": "d3b7uvydynagkxs2w546ozi7q6oa7cim", + "write_phrank": { + "key": "write_phrank", + "digest": "hfknzcdeg3y5whspi5ndsoo2eut7wpd7", "tests": [ { "inputs": { - "id": "HG002", - "sex": "MALE", - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton/HG002.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton/HG002_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002", - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton_no_sex/HG002.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/singleton_no_sex/HG002_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "sex": "MALE", - "affected": true, - "father_id": "HG003", - "mother_id": "HG004", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG003", - "sex": "MALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG004", - "sex": "FEMALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio/HG002-trio_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "sex": "MALE", - "affected": true, - "mother_id": "HG004", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG004", - "sex": "FEMALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_father/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_father/HG002-trio_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "sex": "MALE", - "affected": true, - "father_id": "HG003", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG003", - "sex": "MALE", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, - "phenotypes": "HP:0000001", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_mother/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, - "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_mother/HG002-trio_phrank.tsv", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - } - } - }, - { - "inputs": { - "id": "HG002-trio", - "family": { - "family_id": "HG002-trio", - "samples": [ - { - "sample_id": "HG002", - "affected": true, - "father_id": "HG003", - "mother_id": "HG004", - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG003", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - }, - { - "sample_id": "HG004", - "affected": false, - "hifi_reads": [ - "${resources_file_path}/write_ped_phrank/input/dummy.bam" - ] - } - ] - }, "phenotypes": "HP:0000001", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { - "pedigree": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_sex/HG002-trio.ped", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "calculate_md5sum" - ] - }, "phrank_lookup": { - "value": "${resources_file_path}/write_ped_phrank/output/trio_no_sex/HG002-trio_phrank.tsv", + "value": "${resources_file_path}/write_ped_phrank/output/singleton/phrank.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", diff --git a/workflows/family.wdl b/workflows/family.wdl index 0b612805..47f6f644 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -7,7 +7,6 @@ import "joint/joint.wdl" as Joint import "downstream/downstream.wdl" as Downstream import "wdl-common/wdl/tasks/bcftools.wdl" as Bcftools import "wdl-common/wdl/tasks/trgt.wdl" as Trgt -import "wdl-common/wdl/tasks/write_ped_phrank.wdl" as Write_ped_phrank import "tertiary/tertiary.wdl" as TertiaryAnalysis import "wdl-common/wdl/tasks/utilities.wdl" as Utilities @@ -99,8 +98,17 @@ workflow humanwgs_family { Boolean single_sample = length(family.samples) == 1 + Map[String, String] pedigree_sex = { + "MALE": "1", + "FEMALE": "2", + "": "." + } + scatter (sample in family.samples) { String sample_id = sample.sample_id + Boolean is_trio_kid = defined(sample.father_id) && defined(sample.mother_id) # !UnusedDeclaration + Boolean is_duo_kid = defined(sample.father_id) != defined(sample.mother_id) # !UnusedDeclaration + call Upstream.upstream { input: sample_id = sample.sample_id, @@ -111,6 +119,17 @@ workflow humanwgs_family { gpu = gpu, default_runtime_attributes = default_runtime_attributes } + + # write sample metadata similar to pedigree format + # family_id, sample_id, father_id, mother_id, sex, affected + Array[String] sample_metadata = [ + family.family_id, + sample.sample_id, + select_first([sample.father_id, "."]), + select_first([sample.mother_id, "."]), + pedigree_sex[upstream.inferred_sex], + if sample.affected then "2" else "1" + ] } if (!single_sample) { @@ -217,23 +236,12 @@ workflow humanwgs_family { } if (defined(tertiary_map_file)) { - scatter (sample in family.samples) { - Array[File] hifi_reads = sample.hifi_reads - } - - call Write_ped_phrank.write_ped_phrank { - input: - id = family.family_id, - family = family, - phenotypes = phenotypes, - disk_size = ceil(size(flatten(hifi_reads), "GB")) + 10, - runtime_attributes = default_runtime_attributes - } - call TertiaryAnalysis.tertiary_analysis { input: - pedigree = write_ped_phrank.pedigree, - phrank_lookup = write_ped_phrank.phrank_lookup, + sample_metadata = sample_metadata, + phenotypes = phenotypes, + is_trio_kid = is_trio_kid, + is_duo_kid = is_duo_kid, small_variant_vcf = select_first([merge_small_variant_vcfs.merged_vcf, downstream.phased_small_variant_vcf[0]]), small_variant_vcf_index = select_first([merge_small_variant_vcfs.merged_vcf_index, downstream.phased_small_variant_vcf_index[0]]), sv_vcf = select_first([merge_sv_vcfs.merged_vcf, downstream.phased_sv_vcf[0]]), @@ -367,7 +375,6 @@ workflow humanwgs_family { File? joint_trgt_vcf_index = trgt_merge.merged_vcf_index # tertiary analysis outputs - File? pedigree = write_ped_phrank.pedigree File? tertiary_small_variant_filtered_vcf = tertiary_analysis.small_variant_filtered_vcf File? tertiary_small_variant_filtered_vcf_index = tertiary_analysis.small_variant_filtered_vcf_index File? tertiary_small_variant_filtered_tsv = tertiary_analysis.small_variant_filtered_tsv diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 15e52dc7..351414a1 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -4,7 +4,6 @@ import "humanwgs_structs.wdl" import "wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl" as BackendConfiguration import "upstream/upstream.wdl" as Upstream import "downstream/downstream.wdl" as Downstream -import "wdl-common/wdl/tasks/write_ped_phrank.wdl" as Write_ped_phrank import "tertiary/tertiary.wdl" as TertiaryAnalysis import "wdl-common/wdl/tasks/utilities.wdl" as Utilities @@ -165,19 +164,27 @@ workflow humanwgs_singleton { runtime_attributes = default_runtime_attributes } - if (defined(tertiary_map_file)) { - call Write_ped_phrank.write_ped_phrank { - input: - id = sample_id, - sex = select_first([sex, upstream.inferred_sex]), - phenotypes = phenotypes, - runtime_attributes = default_runtime_attributes - } + Map[String, String] pedigree_sex = { + "MALE": "1", + "FEMALE": "2", + "": "." + } + + # write sample metadata similar to pedigree format + # family_id, sample_id, father_id, mother_id, sex, affected + Array[String] sample_metadata = [ + sample_id, sample_id, + ".", ".", + pedigree_sex[upstream.inferred_sex], "2" + ] + if (defined(tertiary_map_file)) { call TertiaryAnalysis.tertiary_analysis { input: - pedigree = write_ped_phrank.pedigree, - phrank_lookup = write_ped_phrank.phrank_lookup, + sample_metadata = [sample_metadata], + phenotypes = phenotypes, + is_trio_kid = [false], + is_duo_kid = [false], small_variant_vcf = downstream.phased_small_variant_vcf, small_variant_vcf_index = downstream.phased_small_variant_vcf_index, sv_vcf = downstream.phased_sv_vcf, @@ -302,7 +309,6 @@ workflow humanwgs_singleton { File? pharmcat_report_json = downstream.pharmcat_report_json # tertiary analysis outputs - File? pedigree = write_ped_phrank.pedigree File? tertiary_small_variant_filtered_vcf = tertiary_analysis.small_variant_filtered_vcf File? tertiary_small_variant_filtered_vcf_index = tertiary_analysis.small_variant_filtered_vcf_index File? tertiary_small_variant_filtered_tsv = tertiary_analysis.small_variant_filtered_tsv diff --git a/workflows/tertiary/tertiary.wdl b/workflows/tertiary/tertiary.wdl index 224ccfe4..d283287c 100644 --- a/workflows/tertiary/tertiary.wdl +++ b/workflows/tertiary/tertiary.wdl @@ -1,6 +1,7 @@ version 1.0 import "../humanwgs_structs.wdl" +import "../wdl-common/wdl/tasks/write_phrank.wdl" as Write_phrank import "../wdl-common/wdl/tasks/utilities.wdl" as Utilities workflow tertiary_analysis { @@ -9,11 +10,17 @@ workflow tertiary_analysis { } parameter_meta { - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } - phrank_lookup: { - name: "Gene symbol -> Phrank phenotype rank score lookup table" + phenotypes: { + name: "Comma-delimited list of HPO codes for phenotypes" + } + is_trio_kid: { + name: "Boolean array indicating if the sample is a child with both parents defined" + } + is_duo_kid: { + name: "Boolean array indicating if the sample is a child with only one parent defined" } small_variant_vcf: { name: "Small variant VCF" @@ -66,8 +73,11 @@ workflow tertiary_analysis { } input { - File pedigree - File phrank_lookup + Array[Array[String]] sample_metadata + String phenotypes + + Array[Boolean] is_trio_kid # !UnusedDeclaration + Array[Boolean] is_duo_kid # !UnusedDeclaration File small_variant_vcf File small_variant_vcf_index @@ -83,6 +93,12 @@ workflow tertiary_analysis { Map[String, String] ref_map = read_map(ref_map_file) Map[String, String] tertiary_map = read_map(tertiary_map_file) + call Write_phrank.write_phrank { + input: + phenotypes = phenotypes, + runtime_attributes = default_runtime_attributes + } + call Utilities.split_string as split_gnotate_files { input: concatenated_string = tertiary_map["slivar_gnotate_files"], @@ -114,8 +130,8 @@ workflow tertiary_analysis { input: vcf = small_variant_vcf, vcf_index = small_variant_vcf_index, - pedigree = pedigree, - phrank_lookup = phrank_lookup, + sample_metadata = sample_metadata, + phrank_lookup = write_phrank.phrank_lookup, reference = ref_map["fasta"], # !FileCoercion reference_index = ref_map["fasta_index"], # !FileCoercion gff = tertiary_map["ensembl_gff"], # !FileCoercion @@ -148,7 +164,7 @@ workflow tertiary_analysis { call svpack_filter_annotated { input: sv_vcf = sv_vcf, - pedigree = pedigree, + sample_metadata = sample_metadata, population_vcfs = split_sv_vcfs.array, # !FileCoercion population_vcf_indices = split_sv_vcf_indices.array, # !FileCoercion gff = tertiary_map["ensembl_gff"], # !FileCoercion @@ -158,10 +174,10 @@ workflow tertiary_analysis { call slivar_svpack_tsv { input: filtered_vcf = svpack_filter_annotated.svpack_vcf, - pedigree = pedigree, + sample_metadata = sample_metadata, lof_lookup = tertiary_map["lof_lookup"], # !FileCoercion clinvar_lookup = tertiary_map["clinvar_lookup"], # !FileCoercion - phrank_lookup = phrank_lookup, + phrank_lookup = write_phrank.phrank_lookup, runtime_attributes = default_runtime_attributes } @@ -191,8 +207,8 @@ task slivar_small_variant { vcf_index: { name: "Small variant VCF index" } - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } phrank_lookup: { name: "Gene symbol -> Phrank phenotype rank score lookup table" @@ -257,7 +273,7 @@ task slivar_small_variant { File vcf File vcf_index - File pedigree + Array[Array[String]] sample_metadata File phrank_lookup File reference @@ -356,7 +372,7 @@ task slivar_small_variant { --sample-expr '~{sep=" && " sample_expr}' \ ~{sep=" " prefix("--gnotate ", gnotate_files)} \ --vcf ~{vcf_basename}.norm.bcf \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ | bcftools csq \ --local-csq \ --samples - \ @@ -376,7 +392,7 @@ task slivar_small_variant { --skip ~{sep=',' skip_list} \ --vcf ~{vcf_basename}.norm.slivar.vcf.gz \ --sample-field comphet_side \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --allow-non-trios \ | add_comphet_phase.py \ | bcftools view \ @@ -395,7 +411,7 @@ task slivar_small_variant { --gene-description ~{lof_lookup} \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{vcf_basename}.norm.slivar.vcf.gz \ | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ @@ -409,7 +425,7 @@ task slivar_small_variant { --gene-description ~{lof_lookup} \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{vcf_basename}.norm.slivar.compound_hets.vcf.gz \ | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ @@ -444,8 +460,8 @@ task svpack_filter_annotated { } parameter_meta { - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } sv_vcf: { name: "Structural variant VCF" @@ -472,7 +488,7 @@ task svpack_filter_annotated { input { File sv_vcf - File pedigree + Array[Array[String]] sample_metadata Array[File] population_vcfs Array[File] population_vcf_indices @@ -492,7 +508,7 @@ task svpack_filter_annotated { echo "svpack version:" cat /opt/svpack/.git/HEAD - affected=$(awk -F'\t' '$6 ~ /2/ {{ print $2 }}' ~{pedigree} | paste -sd',') # TODO: potentially replace awk + affected=$(awk -F'\t' '$6 ~ /2/ {{ print $2 }}' ~{write_tsv(sample_metadata)} | paste -sd',') # TODO: potentially replace awk svpack \ filter \ @@ -546,8 +562,8 @@ task slivar_svpack_tsv { filtered_vcf : { name: "Filtered and annotated structural variant VCF" } - pedigree: { - name: "PLINK pedigree (PED) format" + sample_metadata: { + name: "PLINK pedigree (PED) formatted lines." } lof_lookup: { name: "Gene symbol -> LoF score lookup table" @@ -569,7 +585,7 @@ task slivar_svpack_tsv { input { File filtered_vcf - File pedigree + Array[Array[String]] sample_metadata File lof_lookup File clinvar_lookup File phrank_lookup @@ -604,7 +620,7 @@ task slivar_svpack_tsv { --gene-description ~{lof_lookup} \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ - --ped ~{pedigree} \ + --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{filtered_vcf} \ | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ From 9e13d20f125563ca28a68d782a3da9ea9acae078 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Feb 2025 20:23:35 +0000 Subject: [PATCH 12/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 042cab3a..1c1c0b83 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -199,7 +199,7 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "bm3dufvjozq4pr2dkopvk3htphiruyjn", + "digest": "zr3mfwgv2xkjpnrlpdo4odn25ojbiyoj", "tests": [ { "inputs": { From 9dc9fec7384b05c77fa022129e58f0fcd855da39 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 21 Feb 2025 15:52:59 -0800 Subject: [PATCH 13/61] Update image manifest. --- image_manifest.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/image_manifest.txt b/image_manifest.txt index 66f4700f..abe5dbb4 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -6,12 +6,12 @@ quay.io/pacbio/paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d -quay.io/pacbio/pbsv@sha256:2134be37f71b5b2cb41f364736fe5ea14cf8e70403ba41af5005ce50b64086e4 quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 +quay.io/pacbio/sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba quay.io/pacbio/trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b quay.io/pacbio/wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136 -google/deepvariant:1.6.1 -google/deepvariant:1.6.1-gpu +google/deepvariant:1.8.0 +google/deepvariant:1.8.0-gpu pgkb/pharmcat:2.15.4 From 7c0e40c02c225e9c62af3ecd1a1cff7664ae9ca5 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 21 Feb 2025 15:53:12 -0800 Subject: [PATCH 14/61] Update version number in README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a8b3792b..9fd07700 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized: -1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v2.1.2): +1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.0-alpha1): ```bash - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v2.1.2/hifi-human-wgs-singleton.zip - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v2.1.2/hifi-human-wgs-family.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha1/hifi-human-wgs-singleton.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha1/hifi-human-wgs-family.zip ``` 2) Clone the repository and initialize the submodules: ```bash git clone \ - --depth 1 --branch v2.1.2 \ + --depth 1 --branch v3.0.0-alpha1 \ --recursive \ https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git ``` From 031b8b3cba36a501b49f65f13ac8a892f01b9651 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 25 Feb 2025 11:32:53 -0800 Subject: [PATCH 15/61] Updated to StarPhase 1.3.1. - now accepts phased small variant and structural variant VCFs --- docs/tools_containers.md | 2 +- wdl-ci.config.json | 14 ++++++++------ workflows/downstream/downstream.wdl | 18 ++++++++++-------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index f9c3494b..3d540dd0 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -18,7 +18,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | hiphase |
  • hiphase 1.4.5
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/1051d12818e165a2145526e0b58f0ed0d0dc023a/docker/hiphase) | [hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad](https://quay.io/repository/pacbio/hiphase/manifest/sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | -| pbstarphase |
  • pbstarphase 1.1.0
  • Database 20250110
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c92f009d1214cfac0c636dae8b94cb330767fc53/docker/pbstarphase) | [pbstarphase@sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d) | +| pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | | slivar |
  • slivar 0.3.1
  • `/opt/scripts/add_comphet_phase.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/5e1094fd6755203b4971fdac6dcb951bbc098bed/docker/slivar) | [slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa](https://quay.io/repository/pacbio/slivar/manifest/sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa) | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 1c1c0b83..f36aedb5 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1566,24 +1566,26 @@ { "inputs": { "sample_id": "HG002", - "phased_vcf": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.small_variants.vcf.gz", - "phased_vcf_index": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.small_variants.vcf.gz.tbi", - "aligned_bam": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.bam", - "aligned_bam_index": "${resources_file_path}/pbstarphase_diplotype/input/HG002.GRCh38.subset.bam.bai", + "phased_small_variant_vcf": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.small_variants.vcf.gz", + "phased_small_variant_vcf_index": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.small_variants.vcf.gz.tbi", + "phased_structural_variant_vcf": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.structural_variants.vcf.gz", + "phased_structural_variant_vcf_index": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.structural_variants.vcf.gz.tbi", + "aligned_bam": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.bam", + "aligned_bam_index": "${resources_file_path}/pbstarphase_diplotype/input/pbstarphase_1.3/HG002.GRCh38.subset.bam.bai", "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "out_json": { - "value": "${resources_file_path}/pbstarphase_diplotype/output/HG002.pbstarphase.json", + "value": "${resources_file_path}/pbstarphase_diplotype/output/pbstarphase_1.3/HG002.pbstarphase.json", "test_tasks": [ "compare_file_basename", "check_json" ] }, "pharmcat_tsv": { - "value": "${resources_file_path}/pbstarphase_diplotype/output/HG002.pharmcat.tsv", + "value": "${resources_file_path}/pbstarphase_diplotype/output/pbstarphase_1.3/HG002.pharmcat.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index 7971521e..8d72263a 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -138,14 +138,16 @@ workflow downstream { call Pbstarphase.pbstarphase_diplotype { input: - sample_id = sample_id, - phased_vcf = hiphase.phased_vcfs[0], - phased_vcf_index = hiphase.phased_vcf_indices[0], - aligned_bam = hiphase.haplotagged_bam, - aligned_bam_index = hiphase.haplotagged_bam_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - runtime_attributes = default_runtime_attributes + sample_id = sample_id, + phased_small_variant_vcf = hiphase.phased_vcfs[0], + phased_small_variant_vcf_index = hiphase.phased_vcf_indices[0], + phased_structural_variant_vcf = hiphase.phased_vcfs[1], + phased_structural_variant_vcf_index = hiphase.phased_vcf_indices[1], + aligned_bam = hiphase.haplotagged_bam, + aligned_bam_index = hiphase.haplotagged_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + runtime_attributes = default_runtime_attributes } call Pharmcat.pharmcat { From 036dd903ce9b04cff810ab4bf6b291f5d21cdd21 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 25 Feb 2025 11:36:59 -0800 Subject: [PATCH 16/61] Update image manifest. --- image_manifest.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/image_manifest.txt b/image_manifest.txt index abe5dbb4..e697b50b 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -4,8 +4,8 @@ quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c -quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 -quay.io/pacbio/pbstarphase@sha256:426764fb09eadbc5de8aea2450b5fe55000c1dd7aaa2ea7b0f5438f34ea63e3d +quay.io/pacbio/pbmm2@sha256:b58eef0645dd9adca850c9b4811f289ddcd57f630596d9f524701bc787467f30 +quay.io/pacbio/pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 quay.io/pacbio/sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa From b1368d4b8aeeddf3a0e850c75a9c26e700c0572e Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 19:58:31 +0000 Subject: [PATCH 17/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index f36aedb5..f4e90c71 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1561,7 +1561,7 @@ "tasks": { "pbstarphase_diplotype": { "key": "pbstarphase_diplotype", - "digest": "u5goqyzkczomtamorb2oniq6altzayha", + "digest": "2l7wicfoxgyl52na34gxo77yw2kuz2pm", "tests": [ { "inputs": { From 7d1fc952be4f0db73be58a9ca4defe1294b48789 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Wed, 26 Feb 2025 14:13:56 -0800 Subject: [PATCH 18/61] Update docs and image_manifest. --- image_manifest.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_manifest.txt b/image_manifest.txt index e697b50b..ffb1d46f 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -4,7 +4,7 @@ quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c -quay.io/pacbio/pbmm2@sha256:b58eef0645dd9adca850c9b4811f289ddcd57f630596d9f524701bc787467f30 +quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 quay.io/pacbio/sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b From 1381c48c99fa9623d32e83f1a2fb8a895e4dd5ad Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 18 Mar 2025 14:18:11 -0700 Subject: [PATCH 19/61] correctly link samtools_merge and hiphase in DAG --- docs/singleton.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/singleton.md b/docs/singleton.md index 823ac121..3ba39a91 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -35,6 +35,7 @@ flowchart TD samtools_merge --> trgt["TRGT"] samtools_merge --> trgt_dropouts["TR coverage dropouts"] samtools_merge --> deepvariant["DeepVariant"] + samtools_merge --> hiphase["HiPhase"] sawfish_discover --> sawfish_call["Sawfish call"] end subgraph "`**Phasing and Downstream**`" From 94c9792a9e7d6326f46fe9b08df6778dcca6cb46 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 18 Mar 2025 18:57:41 -0700 Subject: [PATCH 20/61] All tasks using sex now use the inferred sex from chrY coverage rather than the reported sex. Inferred sex is compared to the reported sex, and if there is a disagreement, a new `String qc_sex` message is emitted by upstream. These messages are gathered at top level `Array[String?] qc_messages` and emitted as output. --- workflows/family.wdl | 3 +++ workflows/singleton.wdl | 3 +++ workflows/upstream/upstream.wdl | 13 ++++++++++--- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/workflows/family.wdl b/workflows/family.wdl index 47f6f644..f1289feb 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -385,6 +385,9 @@ workflow humanwgs_family { File? tertiary_sv_filtered_vcf_index = tertiary_analysis.sv_filtered_vcf_index File? tertiary_sv_filtered_tsv = tertiary_analysis.sv_filtered_tsv + # qc messages + Array[String?] qc_messages = flatten([upstream.msg_qc_sex]) + # workflow metadata String workflow_name = "humanwgs_family" String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 351414a1..17fac42e 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -319,6 +319,9 @@ workflow humanwgs_singleton { File? tertiary_sv_filtered_vcf_index = tertiary_analysis.sv_filtered_vcf_index File? tertiary_sv_filtered_tsv = tertiary_analysis.sv_filtered_tsv + # qc messages + Array[String?] qc_messages = [upstream.msg_qc_sex] + # workflow metadata String workflow_name = "humanwgs_family" String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index 11efaff5..6bc1cc14 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -100,6 +100,10 @@ workflow upstream { runtime_attributes = default_runtime_attributes } + if (defined(sex) && (mosdepth.inferred_sex != sex)) { + String qc_sex = "~{sample_id}: Reported sex ~{sex} does not match inferred sex ~{mosdepth.inferred_sex}." + } + call DeepVariant.deepvariant { input: sample_id = sample_id, @@ -114,7 +118,7 @@ workflow upstream { call Sawfish.sawfish_discover { input: - sex = select_first([sex, mosdepth.inferred_sex]), + sex = mosdepth.inferred_sex, aligned_bam = aligned_bam_data, aligned_bam_index = aligned_bam_index, ref_fasta = ref_map["fasta"], # !FileCoercion @@ -128,7 +132,7 @@ workflow upstream { call Trgt.trgt { input: sample_id = sample_id, - sex = select_first([sex, mosdepth.inferred_sex]), + sex = mosdepth.inferred_sex, aligned_bam = aligned_bam_data, aligned_bam_index = aligned_bam_index, ref_fasta = ref_map["fasta"], # !FileCoercion @@ -151,7 +155,7 @@ workflow upstream { call Hificnv.hificnv { input: sample_id = sample_id, - sex = select_first([sex, mosdepth.inferred_sex]), + sex = mosdepth.inferred_sex, aligned_bam = aligned_bam_data, aligned_bam_index = aligned_bam_index, vcf = deepvariant.vcf, @@ -239,5 +243,8 @@ workflow upstream { String stat_cnv_DEL_count = hificnv.stat_DEL_count String stat_cnv_DUP_sum = hificnv.stat_DUP_sum String stat_cnv_DEL_sum = hificnv.stat_DEL_sum + + # qc messages + String? msg_qc_sex = qc_sex } } From c977dee6f55ba8bf395c7a7b8b4df9e89224f4f0 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Tue, 25 Mar 2025 15:49:41 -0700 Subject: [PATCH 21/61] Refactor tasks that extract stats from BAM files so that we only iterate through the BAM once. (#198) * Refactor tasks that extract stats from BAM files so that we only iterate through the BAM once. - remove components from pbmm2_align taskthat gather stats about read length and quality - remove task to merge per-bam stats and plot read length and quality - remove components from hiphasse task that gather stats about mapping and alignment - add a new bam_stats task that extracts all tasks in a single pass through the BAM and replaces the outputs from the removed components * update wdl-ci config file after successful tests --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- wdl-ci.config.json | 400 ++++++++-------------------- workflows/downstream/downstream.wdl | 30 ++- workflows/family.wdl | 34 +-- workflows/singleton.wdl | 34 +-- workflows/upstream/upstream.wdl | 18 -- 5 files changed, 168 insertions(+), 348 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index f4e90c71..957f2314 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -262,6 +262,113 @@ "description": "", "tasks": {} }, + "workflows/wdl-common/wdl/tasks/bam_stats.wdl": { + "key": "workflows/wdl-common/wdl/tasks/bam_stats.wdl", + "name": "", + "description": "", + "tasks": { + "bam_stats": { + "key": "bam_stats", + "digest": "uep66lgfvrh34igqnllxgz5buvwrajha", + "tests": [ + { + "inputs": { + "sample_id": "HG002", + "ref_name": "${ref_name}", + "bam": "${resources_file_path}/inputs/HG002.GRCh38.haplotagged.bam", + "bam_index": "${resources_file_path}/inputs/HG002.GRCh38.haplotagged.bam.bai", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "bam_statistics": { + "value": "${resources_file_path}/bam_stats/output/HG002.GRCh38.bam_statistics.tsv.gz", + "test_tasks": [ + "compare_file_basename", + "check_tab_delimited", + "count_columns", + "check_gzip" + ] + }, + "read_length_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.read_length_histogram.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "read_quality_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.read_quality_histogram.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "mapq_distribution_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.GRCh38.mapq_distribution.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "mg_distribution_plot": { + "value": "${resources_file_path}/bam_stats/output/HG002.GRCh38.mg_distribution.png", + "test_tasks": [ + "calculate_md5sum", + "compare_file_basename", + "png_validator" + ] + }, + "stat_num_reads": { + "value": "27398", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_length_mean": { + "value": "14149.12", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_length_median": { + "value": "14666.5", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_quality_mean": { + "value": "35.91", + "test_tasks": [ + "compare_string" + ] + }, + "stat_read_quality_median": { + "value": "34.0", + "test_tasks": [ + "compare_string" + ] + }, + "stat_mapped_read_count": { + "value": "27398", + "test_tasks": [ + "compare_string" + ] + }, + "stat_mapped_percent": { + "value": "100.0", + "test_tasks": [ + "compare_string" + ] + } + } + } + ] + } + } + }, "workflows/wdl-common/wdl/tasks/bcftools.wdl": { "key": "workflows/wdl-common/wdl/tasks/bcftools.wdl", "name": "", @@ -915,7 +1022,7 @@ "tasks": { "hiphase": { "key": "hiphase", - "digest": "evdi2klxze7sag3fs4p6g4h4ffqmiqzy", + "digest": "gmsoetfaqun5ppmdxwkrpu5z2jgnyoiq", "tests": [ { "inputs": { @@ -991,250 +1098,6 @@ "count_columns", "check_gzip" ] - }, - "stat_phased_basepairs": { - "value": "8972304", - "test_tasks": [ - "compare_string" - ] - }, - "stat_phase_block_ng50": { - "value": "0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_mapped_read_count": { - "value": "27398", - "test_tasks": [ - "compare_string" - ] - }, - "stat_mapped_percent": { - "value": "100", - "test_tasks": [ - "compare_string" - ] - }, - "mapq_distribution_plot": { - "value": "${resources_file_path}/hiphase/output/HG002/HG002.GRCh38.mapq_distribution.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "mg_distribution_plot": { - "value": "${resources_file_path}/hiphase/output/HG002/HG002.GRCh38.mg_distribution.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - } - } - } - ] - } - } - }, - "workflows/wdl-common/wdl/tasks/merge_bam_stats.wdl": { - "key": "workflows/wdl-common/wdl/tasks/merge_bam_stats.wdl", - "name": "", - "description": "", - "tasks": { - "merge_bam_stats": { - "key": "merge_bam_stats", - "digest": "mjd6zpbxtabbulmq3kwhcx4cnubxaf74", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "bam_stats": [ - "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "read_length_and_quality": { - "value": "${resources_file_path}/merge_bam_stats/one_input/HG002.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - }, - "read_length_plot": { - "value": "${resources_file_path}/merge_bam_stats/one_input/HG002.read_length_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "read_quality_plot": { - "value": "${resources_file_path}/merge_bam_stats/one_input/HG002.read_quality_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "stat_num_reads": { - "value": "10000", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_mean": { - "value": "23508.73", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_median": { - "value": "22855.5", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_mean": { - "value": "26.97", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_median": { - "value": "27.0", - "test_tasks": [ - "compare_string" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam_stats": [ - "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz", - "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.read_length_and_quality.tsv.gz" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "read_length_and_quality": { - "value": "${resources_file_path}/merge_bam_stats/two_inputs/HG002.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - }, - "read_length_plot": { - "value": "${resources_file_path}/merge_bam_stats/two_inputs/HG002.read_length_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "read_quality_plot": { - "value": "${resources_file_path}/merge_bam_stats/two_inputs/HG002.read_quality_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "stat_num_reads": { - "value": "20000", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_mean": { - "value": "22673.07", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_median": { - "value": "22281.5", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_mean": { - "value": "29.32", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_median": { - "value": "29.0", - "test_tasks": [ - "compare_string" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam_stats": [ - "${resources_file_path}/pbmm2_align_wgs/vega_10k_no_rq/HG002.vega_10k.no_rq.hifi_reads.read_length_and_quality.tsv.gz" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "read_length_and_quality": { - "value": "${resources_file_path}/merge_bam_stats/no_rq/HG002.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] - }, - "read_length_plot": { - "value": "${resources_file_path}/merge_bam_stats/no_rq/HG002.read_length_histogram.png", - "test_tasks": [ - "calculate_md5sum", - "compare_file_basename", - "png_validator" - ] - }, - "stat_num_reads": { - "value": "10000", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_mean": { - "value": "21837.4", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_length_median": { - "value": "21385.0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_mean": { - "value": "nan", - "test_tasks": [ - "compare_string" - ] - }, - "stat_read_quality_median": { - "value": "nan", - "test_tasks": [ - "compare_string" - ] } } } @@ -1407,7 +1270,7 @@ "tasks": { "pbmm2_align_wgs": { "key": "pbmm2_align_wgs", - "digest": "4pbv52jhlacjaylj5tgqzp43a77it5xh", + "digest": "tr6fdqrq5p33b6zynfqgpqlua3lodnos", "tests": [ { "inputs": { @@ -1425,15 +1288,6 @@ "compare_file_basename", "samtools_quickcheck" ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_aligned_10k/HG00733.sequelii_aligned_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] } } }, @@ -1453,15 +1307,6 @@ "compare_file_basename", "samtools_quickcheck" ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] } } }, @@ -1482,15 +1327,6 @@ "compare_file_basename", "samtools_quickcheck" ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k_strip_kinetics_false/HG00133.sequelii_kinetics_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] } } }, @@ -1510,15 +1346,6 @@ "compare_file_basename", "samtools_quickcheck" ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] } } }, @@ -1538,15 +1365,6 @@ "compare_file_basename", "samtools_quickcheck" ] - }, - "bam_stats": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k_no_rq/HG002.vega_10k.no_rq.hifi_reads.read_length_and_quality.tsv.gz", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_columns", - "check_gzip" - ] } } } diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index 8d72263a..dd742c7a 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -2,6 +2,7 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/hiphase.wdl" as Hiphase +import "../wdl-common/wdl/tasks/bam_stats.wdl" as Bamstats import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools import "../wdl-common/wdl/tasks/cpg_pileup.wdl" as Cpgpileup @@ -102,6 +103,15 @@ workflow downstream { # hiphase.phased_vcfs[1] -> phased SV VCF # hiphase.phased_vcfs[2] -> phased TRGT VCF + call Bamstats.bam_stats { + input: + sample_id = sample_id, + ref_name = ref_map["name"], + bam = hiphase.haplotagged_bam, + bam_index = hiphase.haplotagged_bam_index, + runtime_attributes = default_runtime_attributes + } + call Trgt.coverage_dropouts { input: aligned_bam = hiphase.haplotagged_bam, @@ -181,11 +191,21 @@ workflow downstream { File phase_haplotags = hiphase.phase_haplotags String stat_phased_basepairs = hiphase.stat_phased_basepairs String stat_phase_block_ng50 = hiphase.stat_phase_block_ng50 - String stat_mapped_read_count = hiphase.stat_mapped_read_count - String stat_mapped_percent = hiphase.stat_mapped_percent - File mapq_distribution_plot = hiphase.mapq_distribution_plot - File mg_distribution_plot = hiphase.mg_distribution_plot - File trgt_coverage_dropouts = coverage_dropouts.dropouts + + # bam stats + File bam_statistics = bam_stats.bam_statistics + File read_length_plot = bam_stats.read_length_plot + File? read_quality_plot = bam_stats.read_quality_plot + File mapq_distribution_plot = bam_stats.mapq_distribution_plot + File mg_distribution_plot = bam_stats.mg_distribution_plot + String stat_num_reads = bam_stats.stat_num_reads + String stat_read_length_mean = bam_stats.stat_read_length_mean + String stat_read_length_median = bam_stats.stat_read_length_median + String stat_read_quality_mean = bam_stats.stat_read_quality_mean + String stat_read_quality_median = bam_stats.stat_read_quality_median + String stat_mapped_read_count = bam_stats.stat_mapped_read_count + String stat_mapped_percent = bam_stats.stat_mapped_percent + File trgt_coverage_dropouts = coverage_dropouts.dropouts # small variant stats File small_variant_stats = bcftools_stats_roh_small_variants.stats diff --git a/workflows/family.wdl b/workflows/family.wdl index f1289feb..7375540c 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -168,11 +168,11 @@ workflow humanwgs_family { Map[String, Array[String]] stats = { 'sample_id': sample_id, - 'num_reads': upstream.stat_num_reads, - 'read_length_mean': upstream.stat_read_length_mean, - 'read_length_median': upstream.stat_read_length_median, - 'read_quality_mean': upstream.stat_read_quality_mean, - 'read_quality_median': upstream.stat_read_quality_median, + 'num_reads': downstream.stat_num_reads, + 'read_length_mean': downstream.stat_read_length_mean, + 'read_length_median': downstream.stat_read_length_median, + 'read_quality_mean': downstream.stat_read_quality_mean, + 'read_quality_median': downstream.stat_read_quality_median, 'mapped_read_count': downstream.stat_mapped_read_count, 'mapped_percent': downstream.stat_mapped_percent, 'mean_depth': upstream.stat_mean_depth, @@ -258,22 +258,22 @@ workflow humanwgs_family { File stats_file = consolidate_stats.output_tsv # bam stats - Array[File] bam_stats = upstream.read_length_and_quality - Array[File] read_length_plot = upstream.read_length_plot - Array[File?] read_quality_plot = upstream.read_quality_plot - Array[String] stat_num_reads = upstream.stat_num_reads - Array[String] stat_read_length_mean = upstream.stat_read_length_mean - Array[String] stat_read_length_median = upstream.stat_read_length_median - Array[String] stat_read_quality_mean = upstream.stat_read_quality_mean - Array[String] stat_read_quality_median = upstream.stat_read_quality_median + Array[File] bam_statistics = downstream.bam_statistics + Array[File] read_length_plot = downstream.read_length_plot + Array[File?] read_quality_plot = downstream.read_quality_plot + Array[File] mapq_distribution_plot = downstream.mapq_distribution_plot + Array[File] mg_distribution_plot = downstream.mg_distribution_plot + Array[String] stat_num_reads = downstream.stat_num_reads + Array[String] stat_read_length_mean = downstream.stat_read_length_mean + Array[String] stat_read_length_median = downstream.stat_read_length_median + Array[String] stat_read_quality_mean = downstream.stat_read_quality_mean + Array[String] stat_read_quality_median = downstream.stat_read_quality_median + Array[String] stat_mapped_read_count = downstream.stat_mapped_read_count + Array[String] stat_mapped_percent = downstream.stat_mapped_percent # merged, haplotagged alignments Array[File] merged_haplotagged_bam = downstream.merged_haplotagged_bam Array[File] merged_haplotagged_bam_index = downstream.merged_haplotagged_bam_index - Array[String] stat_mapped_read_count = downstream.stat_mapped_read_count - Array[String] stat_mapped_percent = downstream.stat_mapped_percent - Array[File] mapq_distribution_plot = downstream.mapq_distribution_plot - Array[File] mg_distribution_plot = downstream.mg_distribution_plot # mosdepth outputs Array[File] mosdepth_summary = upstream.mosdepth_summary diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 17fac42e..3222904b 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -125,11 +125,11 @@ workflow humanwgs_singleton { Map[String, Array[String]] stats = { 'sample_id': [sample_id], - 'num_reads': [upstream.stat_num_reads], - 'read_length_mean': [upstream.stat_read_length_mean], - 'read_length_median': [upstream.stat_read_length_median], - 'read_quality_mean': [upstream.stat_read_quality_mean], - 'read_quality_median': [upstream.stat_read_quality_median], + 'num_reads': [downstream.stat_num_reads], + 'read_length_mean': [downstream.stat_read_length_mean], + 'read_length_median': [downstream.stat_read_length_median], + 'read_quality_mean': [downstream.stat_read_quality_mean], + 'read_quality_median': [downstream.stat_read_quality_median], 'mapped_read_count': [downstream.stat_mapped_read_count], 'mapped_percent': [downstream.stat_mapped_percent], 'mean_depth': [upstream.stat_mean_depth], @@ -200,22 +200,22 @@ workflow humanwgs_singleton { File stats_file = consolidate_stats.output_tsv # bam stats - File bam_stats = upstream.read_length_and_quality - File read_length_plot = upstream.read_length_plot - File? read_quality_plot = upstream.read_quality_plot - String stat_num_reads = upstream.stat_num_reads - String stat_read_length_mean = upstream.stat_read_length_mean - String stat_read_length_median = upstream.stat_read_length_median - String stat_read_quality_mean = upstream.stat_read_quality_mean - String stat_read_quality_median = upstream.stat_read_quality_median + File bam_statistics = downstream.bam_statistics + File read_length_plot = downstream.read_length_plot + File? read_quality_plot = downstream.read_quality_plot + File mapq_distribution_plot = downstream.mapq_distribution_plot + File mg_distribution_plot = downstream.mg_distribution_plot + String stat_num_reads = downstream.stat_num_reads + String stat_read_length_mean = downstream.stat_read_length_mean + String stat_read_length_median = downstream.stat_read_length_median + String stat_read_quality_mean = downstream.stat_read_quality_mean + String stat_read_quality_median = downstream.stat_read_quality_median + String stat_mapped_read_count = downstream.stat_mapped_read_count + String stat_mapped_percent = downstream.stat_mapped_percent # merged, haplotagged alignments File merged_haplotagged_bam = downstream.merged_haplotagged_bam File merged_haplotagged_bam_index = downstream.merged_haplotagged_bam_index - String stat_mapped_read_count = downstream.stat_mapped_read_count - String stat_mapped_percent = downstream.stat_mapped_percent - File mapq_distribution_plot = downstream.mapq_distribution_plot - File mg_distribution_plot = downstream.mg_distribution_plot # mosdepth outputs File mosdepth_summary = upstream.mosdepth_summary diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index 6bc1cc14..a9020c1c 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -2,7 +2,6 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/pbmm2.wdl" as Pbmm2 -import "../wdl-common/wdl/tasks/merge_bam_stats.wdl" as MergeBamStats import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/workflows/deepvariant/deepvariant.wdl" as DeepVariant import "../wdl-common/wdl/tasks/samtools.wdl" as Samtools @@ -69,13 +68,6 @@ workflow upstream { } } - call MergeBamStats.merge_bam_stats { - input: - sample_id = sample_id, - bam_stats = pbmm2_align.bam_stats, - runtime_attributes = default_runtime_attributes - } - # merge aligned bams if there are multiple if (length(pbmm2_align.aligned_bam) > 1) { call Samtools.samtools_merge { @@ -184,16 +176,6 @@ workflow upstream { } output { - # bam stats - File read_length_and_quality = merge_bam_stats.read_length_and_quality - File read_length_plot = merge_bam_stats.read_length_plot - File? read_quality_plot = merge_bam_stats.read_quality_plot - String stat_num_reads = merge_bam_stats.stat_num_reads - String stat_read_length_mean = merge_bam_stats.stat_read_length_mean - String stat_read_length_median = merge_bam_stats.stat_read_length_median - String stat_read_quality_mean = merge_bam_stats.stat_read_quality_mean - String stat_read_quality_median = merge_bam_stats.stat_read_quality_median - # alignments File out_bam = aligned_bam_data File out_bam_index = aligned_bam_index From 04b4f14c400f4e0bd56ca8571a80e224624693be Mon Sep 17 00:00:00 2001 From: William Rowell Date: Mon, 31 Mar 2025 18:53:49 -0700 Subject: [PATCH 22/61] Update to HiPhase 1.5.0 --- docs/tools_containers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 3d540dd0..213e9c8a 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -15,7 +15,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | | sawfish |
  • sawfish 0.12.7
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/74325e0c73d53da2fef43d01ce9d0c7d0cd49c7a/docker/sawfish) | [sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b](https://quay.io/repository/pacbio/sawfish/manifest/sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b) | | trgt |
  • trgt 1.5.1
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/641ed67d29128381f27daeca9936fbc1e41bf58d/docker/trgt) | [trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b](https://quay.io/repository/pacbio/trgt/manifest/sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b) | -| hiphase |
  • hiphase 1.4.5
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/1051d12818e165a2145526e0b58f0ed0d0dc023a/docker/hiphase) | [hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad](https://quay.io/repository/pacbio/hiphase/manifest/sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad) | +| hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | | pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | From 422ff1e779e18c3dd5c6c0b01a846427595f65dd Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Apr 2025 02:14:42 +0000 Subject: [PATCH 23/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 957f2314..ca4727b8 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1022,7 +1022,7 @@ "tasks": { "hiphase": { "key": "hiphase", - "digest": "gmsoetfaqun5ppmdxwkrpu5z2jgnyoiq", + "digest": "kavkm5q6klq4ozjxnkxneckbwsoybfnv", "tests": [ { "inputs": { From d100147c7ca643b15f1995467fd756daba0701d1 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Mon, 31 Mar 2025 20:01:11 -0700 Subject: [PATCH 24/61] Update all runtime.memory attributes to use GiB instead of GB. Increase memory/cpu attributes for some tasks based on typical available cloud node types. --- workflows/tertiary/tertiary.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/tertiary/tertiary.wdl b/workflows/tertiary/tertiary.wdl index d283287c..8d4edc66 100644 --- a/workflows/tertiary/tertiary.wdl +++ b/workflows/tertiary/tertiary.wdl @@ -335,7 +335,7 @@ task slivar_small_variant { String vcf_basename = basename(vcf, ".vcf.gz") Int threads = 8 - Int mem_gb = 2 * threads + Int mem_gb = 16 Int disk_size = ceil((size(vcf, "GB") + size(reference, "GB") + size(gnotate_files, "GB") + size(gff, "GB") + size(lof_lookup, "GB") + size(clinvar_lookup, "GB") + size(phrank_lookup, "GB")) * 2 + 20) command <<< @@ -444,7 +444,7 @@ task slivar_small_variant { runtime { docker: "~{runtime_attributes.container_registry}/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa" cpu: threads - memory: mem_gb + " GB" + memory: mem_gb + " GiB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries @@ -543,7 +543,7 @@ task svpack_filter_annotated { runtime { docker: "~{runtime_attributes.container_registry}/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba" cpu: threads - memory: mem_gb + " GB" + memory: mem_gb + " GiB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries @@ -634,7 +634,7 @@ task slivar_svpack_tsv { runtime { docker: "~{runtime_attributes.container_registry}/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa" cpu: threads - memory: mem_gb + " GB" + memory: mem_gb + " GiB" disk: disk_size + " GB" disks: "local-disk " + disk_size + " HDD" preemptible: runtime_attributes.preemptible_tries From 389f440d561f5114cf5890a1e746924a7d4818d3 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Apr 2025 06:55:11 +0000 Subject: [PATCH 25/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 64 +++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index ca4727b8..9ae92a64 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,7 +37,7 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "pssprgcwn5b4liynj6ujl4poz6xx6tay", + "digest": "jbr45hrej5peuxvjzmqswrzop3zuqinz", "tests": [ { "inputs": { @@ -142,7 +142,7 @@ }, "svpack_filter_annotated": { "key": "svpack_filter_annotated", - "digest": "62s5ilqjjfenj2earblxjez57cca6xrp", + "digest": "saeebupdw3mte56psdpfwoel7qfare26", "tests": [ { "inputs": { @@ -199,7 +199,7 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "zr3mfwgv2xkjpnrlpdo4odn25ojbiyoj", + "digest": "qyrthqwtbry2csopde7ot4hm7lpokqqp", "tests": [ { "inputs": { @@ -269,7 +269,7 @@ "tasks": { "bam_stats": { "key": "bam_stats", - "digest": "uep66lgfvrh34igqnllxgz5buvwrajha", + "digest": "lynh3fgck5w62a7dosebmhyp35eq2cki", "tests": [ { "inputs": { @@ -376,7 +376,7 @@ "tasks": { "bcftools_stats_roh_small_variants": { "key": "bcftools_stats_roh_small_variants", - "digest": "abrbvw7exjpg4zshsttwosmgoo75f722", + "digest": "jjlxdrxewzw43s6sisczj7txhq55e6di", "tests": [ { "inputs": { @@ -525,7 +525,7 @@ }, "concat_pbsv_vcf": { "key": "concat_pbsv_vcf", - "digest": "xnv22objo4npixd7xx7rbiri7afensy6", + "digest": "pfr7cljxewp6n6pld4pl4wya3kzdy3jk", "tests": [ { "inputs": { @@ -555,7 +555,7 @@ }, "split_vcf_by_sample": { "key": "split_vcf_by_sample", - "digest": "2hvcuqdk75vxcbe3svrz2je54o7x5umn", + "digest": "dydfywxit4imoywu7rfyqyvk3wetiowk", "tests": [ { "inputs": { @@ -633,7 +633,7 @@ }, "bcftools_merge": { "key": "bcftools_merge", - "digest": "7b7uqtr435y7abbnap6u5oo3jqlkyzpi", + "digest": "hm5g7jj2hm73dmieksaauu3ufmvtytoc", "tests": [ { "inputs": { @@ -774,7 +774,7 @@ "tasks": { "cpg_pileup": { "key": "cpg_pileup", - "digest": "3gzayuvi7ky5t3kaup3f4sgsoqsxnafq", + "digest": "p36yif35pnwhh7ejpbsc2izmmye4aehs", "tests": [ { "inputs": { @@ -895,7 +895,7 @@ "tasks": { "glnexus": { "key": "glnexus", - "digest": "3qe7sprlc7n62p6ozbxkva2xkkn4snlq", + "digest": "hyyvwfb42obz4ieydkko4uef6hhc2vxl", "tests": [ { "inputs": { @@ -935,7 +935,7 @@ "tasks": { "hificnv": { "key": "hificnv", - "digest": "kswxkn6zzlkuucuumxqbiaq5cx2sdiki", + "digest": "bfpv64leadkxosmbgdg2viaugtmrmupo", "tests": [ { "inputs": { @@ -1022,7 +1022,7 @@ "tasks": { "hiphase": { "key": "hiphase", - "digest": "kavkm5q6klq4ozjxnkxneckbwsoybfnv", + "digest": "mcezwxqtvtt4hptlcegtkom322yf5jnb", "tests": [ { "inputs": { @@ -1112,7 +1112,7 @@ "tasks": { "mosdepth": { "key": "mosdepth", - "digest": "57rmcwxx2lmhgupdu4unxkldevrr3n3u", + "digest": "mwjp4365vu3k7nccm5k4rbd2ga2lqmgw", "tests": [ { "inputs": { @@ -1224,7 +1224,7 @@ "tasks": { "paraphase": { "key": "paraphase", - "digest": "iz5imdibf3z2jszierngsa43lubsthss", + "digest": "thdat4rkjeqlyo7fc3frju4caujej4rb", "tests": [ { "inputs": { @@ -1270,7 +1270,7 @@ "tasks": { "pbmm2_align_wgs": { "key": "pbmm2_align_wgs", - "digest": "tr6fdqrq5p33b6zynfqgpqlua3lodnos", + "digest": "eupj2k52j7y4oifnqvbz2bwvu5gmwzsx", "tests": [ { "inputs": { @@ -1379,7 +1379,7 @@ "tasks": { "pbstarphase_diplotype": { "key": "pbstarphase_diplotype", - "digest": "2l7wicfoxgyl52na34gxo77yw2kuz2pm", + "digest": "vz6illqka3w4pefnou4uoryckd6cr3hu", "tests": [ { "inputs": { @@ -1423,7 +1423,7 @@ "tasks": { "samtools_merge": { "key": "samtools_merge", - "digest": "lmc2sfyir2g2cile2vwmaatp46vzddbs", + "digest": "uuwtzd6saw3mxd5pbg4wvalvi4tmal7a", "tests": [ { "inputs": { @@ -1465,7 +1465,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "h4unpn2yf3cc23yiieabdgov7bm7pyip", + "digest": "c63pmlsvyejtse23754nll467tk3x2qn", "tests": [ { "inputs": { @@ -1554,7 +1554,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "yvjau74pm7ylqj2p47l2lyqyn35kozv7", + "digest": "wj3x5qdjnlsvhyo6mphy3hk6vvbbtjzk", "tests": [ { "inputs": { @@ -1588,7 +1588,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "wkgohijal32smgigy7olmcrdxpmibpwt", + "digest": "5ekiibz2pvpouhtkk655lei5lk2ubtzg", "tests": [ { "inputs": { @@ -1620,7 +1620,7 @@ "tasks": { "split_string": { "key": "split_string", - "digest": "q7byrkraoxoid54g6rnw32zejprmobk6", + "digest": "sia2b6zbcnw34kvg5qsyawoe4w3dvpdu", "tests": [ { "inputs": { @@ -1644,7 +1644,7 @@ }, "consolidate_stats": { "key": "consolidate_stats", - "digest": "3w2ttlxevhy2vkao2q2tj7c6n5qilxni", + "digest": "olxrzy2hoyifsvy7unp7v6t6ur5hrujv", "tests": [ { "inputs": { @@ -1685,7 +1685,7 @@ "tasks": { "write_phrank": { "key": "write_phrank", - "digest": "hfknzcdeg3y5whspi5ndsoo2eut7wpd7", + "digest": "si2kfc6pzsrweaqed3jf5dpkpk4ysnhc", "tests": [ { "inputs": { @@ -1721,7 +1721,7 @@ "tasks": { "deepvariant_make_examples": { "key": "deepvariant_make_examples", - "digest": "kenkd6hpkprqjxzrr67ws7jsyflofniu", + "digest": "ldldviaalmgztiglsdugxauqgx6rfbrk", "tests": [ { "inputs": { @@ -1761,7 +1761,7 @@ }, "deepvariant_call_variants_cpu": { "key": "deepvariant_call_variants_cpu", - "digest": "7atkl2o6dbmjxf7uxae2bjnfnt4lwan4", + "digest": "3lfcmsaddhri77rtk4yecs77l7edxoc7", "tests": [ { "inputs": { @@ -1795,7 +1795,7 @@ }, "deepvariant_call_variants_gpu": { "key": "deepvariant_call_variants_gpu", - "digest": "nsrzbzbmry52xlmgtq3nokymfcynkesh", + "digest": "bhzsa64abtjmqd25mgknrrqu2qkwo3xe", "tests": [ { "inputs": { @@ -1829,7 +1829,7 @@ }, "deepvariant_postprocess_variants": { "key": "deepvariant_postprocess_variants", - "digest": "nx7xlzdxdgso2oviegbruwywwd7cjdgl", + "digest": "ih6q34f2apm6eqmyalje4cylajumvinw", "tests": [ { "inputs": { @@ -1904,7 +1904,7 @@ "tasks": { "pharmcat_preprocess": { "key": "pharmcat_preprocess", - "digest": "2cfpsi2cznnpivx2ehkthn22rv7hkvdr", + "digest": "y7e4xvcwoiod7gakvbbimnz5ylkalmhk", "tests": [ { "inputs": { @@ -1938,7 +1938,7 @@ }, "filter_preprocessed_vcf": { "key": "filter_preprocessed_vcf", - "digest": "4jjtxpixe6gq6p3v7lnuyxhlzottg2ib", + "digest": "qnxypqjvvofeigp4v27xpnp5dfv76duz", "tests": [ { "inputs": { @@ -1963,7 +1963,7 @@ }, "run_pharmcat": { "key": "run_pharmcat", - "digest": "nsabvlc6qz2u6y7ikis4pa4oljfanewp", + "digest": "itfw64hpuphtqb5t6xqspch4mtscaz7f", "tests": [ { "inputs": { @@ -2058,7 +2058,7 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "eh67skuq3swjgkbrinqhzfxf2wfea2hp", + "digest": "fjjjgwzjez4nzzx43o67v5g6b75k3q42", "tests": [ { "inputs": { @@ -2126,7 +2126,7 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "gcc2gfurgryq2ziqgyfgxc5a2k3dtkyo", + "digest": "z3l3faihcbvwpi2lkd6k4hsqmg6y552n", "tests": [ { "inputs": { From 9eafe70856a162e404302aabbefc4e61a3ec0285 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 1 Apr 2025 08:02:01 -0700 Subject: [PATCH 26/61] Update test for sv_stats. --- wdl-ci.config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 9ae92a64..2010031d 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -726,7 +726,7 @@ }, "output_tests": { "stat_sv_DUP_count": { - "value": "2", + "value": "0", "test_tasks": [ "compare_string" ] From f2ae3d5fc6ddff42c4576661a57e7d6c7730af1a Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Apr 2025 15:07:00 +0000 Subject: [PATCH 27/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 2010031d..1f14c772 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -717,7 +717,7 @@ }, "sv_stats": { "key": "sv_stats", - "digest": "foqixa2ryrx7e64ymqylurlfad7gpdpi", + "digest": "bru36vhjvldfcytkj2433t4naekjgctl", "tests": [ { "inputs": { From bb28a7a0e144142e4ec0335fa7d31123276e2576 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 3 Apr 2025 14:25:47 -0700 Subject: [PATCH 28/61] Add the `cpuPlatform` runtime attribute to support the workflow in GCP zones that lack n1 nodes. (#207) * Add the `cpuPlatform` runtime attribute to support the workflow in GCP zones that lack n1 nodes. - add cpuPlatform to every task - add cpuPlatform to backend_configuration - add cpuPlatform input to family and singleton entrypoints - document input in singleton and family docs - document input in backend-gcp doc * update wdl-ci config file after successful tests * Fix spacing for runtime.zones. Add cpuPlatform to subworkflow inputs. * update wdl-ci config file after successful tests --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- docs/backend-gcp.md | 4 ++ docs/family.md | 1 + docs/singleton.md | 1 + wdl-ci.config.json | 68 ++++++++++++++++---------------- workflows/downstream/inputs.json | 3 +- workflows/family.inputs.json | 1 + workflows/family.wdl | 5 +++ workflows/joint/inputs.json | 3 +- workflows/singleton.inputs.json | 1 + workflows/singleton.wdl | 5 +++ workflows/tertiary/inputs.json | 3 +- workflows/tertiary/tertiary.wdl | 3 ++ workflows/upstream/inputs.json | 3 +- 13 files changed, 64 insertions(+), 37 deletions(-) diff --git a/docs/backend-gcp.md b/docs/backend-gcp.md index dca0abbf..692913a2 100644 --- a/docs/backend-gcp.md +++ b/docs/backend-gcp.md @@ -20,6 +20,10 @@ gcloud compute zones list | grep For example, the zones in region `us-central1` are `"us-central1-a us-central1-b us-central1c us-central1f"`. +#### Setting the optional cpuPlatform parameter + +Some GCP zones, for example `me-central1`, lack the n1 nodes used by many tasks in the workflow. As a workaround, you can specify the minimum cpu platform to be used by the workflow to `"Intel Cascade Lake"`. There is no need to specify the `cpuPlatform` input unless you encounter this issue. + ## Running the workflow via Google's genomics Pipelines API [Cromwell's documentation](https://cromwell.readthedocs.io/en/stable/tutorials/PipelinesApi101/) on getting started with Google's genomics Pipelines API can be used as an example for how to run the workflow. diff --git a/docs/family.md b/docs/family.md index f7b59e04..7a1d84d4 100644 --- a/docs/family.md +++ b/docs/family.md @@ -82,6 +82,7 @@ flowchart TD | Boolean | gpu | Use GPU when possible

Default: `false` | [GPU support](./gpu.md#gpu-support) | | String | backend | Backend where the workflow will be executed

`["GCP", "Azure", "AWS-HealthOmics", "HPC"]` | | | String? | zones | Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'. | [Determining available zones in GCP](./backends.md/gcp#determining-available-zones) | +| String? | cpuPlatform | Minimum CPU platform to use for tasks on GCP | Optional, only necessary in certain zones lacking n1 nodes. | | String? | gpuType | GPU type to use; required if gpu is set to `true` for cloud backends; must match backend | [Available GPU types](./gpu.md#gpu-types) | | String? | container_registry | Container registry where workflow images are hosted.

Default: `"quay.io/pacbio"` | If omitted, [PacBio's public Quay.io registry](https://quay.io/organization/pacbio) will be used.

Custom container_registry must be set if backend is set to 'AWS-HealthOmics'. | | Boolean | preemptible | Where possible, run tasks preemptibly

`[true, false]`

Default: `true` | If set to `true`, run tasks preemptibly where possible. If set to `false`, on-demand VMs will be used for every task. Ignored if backend is set to HPC. | diff --git a/docs/singleton.md b/docs/singleton.md index 3ba39a91..64295a4a 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -70,6 +70,7 @@ flowchart TD | Boolean | gpu | Use GPU when possible

Default: `false` | [GPU support](./gpu.md#gpu-support) | | String | backend | Backend where the workflow will be executed

`["GCP", "Azure", "AWS-AGC", "AWS-HealthOmics", "HPC"]` | | | String? | zones | Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'. | [Determining available zones in GCP](./backends/gcp.md#determining-available-zones) | +| String? | cpuPlatform | Minimum CPU platform to use for tasks on GCP | Optional, only necessary in certain zones lacking n1 nodes. | | String? | gpuType | GPU type to use; required if gpu is set to `true` for cloud backends; must match backend | [Available GPU types](./gpu.md#gpu-types) | | String? | container_registry | Container registry where workflow images are hosted.

Default: `"quay.io/pacbio"` | If omitted, [PacBio's public Quay.io registry](https://quay.io/organization/pacbio) will be used.

Custom container_registry must be set if backend is set to 'AWS-HealthOmics'. | | Boolean | preemptible | Where possible, run tasks preemptibly

`[true, false]`

Default: `true` | If set to `true`, run tasks preemptibly where possible. If set to `false`, on-demand VMs will be used for every task. Ignored if backend is set to HPC. | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 1f14c772..b4a85144 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,7 +37,7 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "jbr45hrej5peuxvjzmqswrzop3zuqinz", + "digest": "hk7smb3fjdzicbvjwes6oist7iwnefwm", "tests": [ { "inputs": { @@ -142,7 +142,7 @@ }, "svpack_filter_annotated": { "key": "svpack_filter_annotated", - "digest": "saeebupdw3mte56psdpfwoel7qfare26", + "digest": "dogptxnqarr6sgsxs53l4npm7mtktpdi", "tests": [ { "inputs": { @@ -199,7 +199,7 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "qyrthqwtbry2csopde7ot4hm7lpokqqp", + "digest": "mo3z272srlp4old7xfa3cakvycayrfqg", "tests": [ { "inputs": { @@ -269,7 +269,7 @@ "tasks": { "bam_stats": { "key": "bam_stats", - "digest": "lynh3fgck5w62a7dosebmhyp35eq2cki", + "digest": "wcruy2g5ce7fexnf7sctfshfcj3alw5m", "tests": [ { "inputs": { @@ -376,7 +376,7 @@ "tasks": { "bcftools_stats_roh_small_variants": { "key": "bcftools_stats_roh_small_variants", - "digest": "jjlxdrxewzw43s6sisczj7txhq55e6di", + "digest": "a2jlnfpnjbi7gmodm5ooyo2juxbgizzo", "tests": [ { "inputs": { @@ -525,7 +525,7 @@ }, "concat_pbsv_vcf": { "key": "concat_pbsv_vcf", - "digest": "pfr7cljxewp6n6pld4pl4wya3kzdy3jk", + "digest": "dufpg3hbjkz4fmb53sdbok3hulivtier", "tests": [ { "inputs": { @@ -555,7 +555,7 @@ }, "split_vcf_by_sample": { "key": "split_vcf_by_sample", - "digest": "dydfywxit4imoywu7rfyqyvk3wetiowk", + "digest": "wrmspgbtubqm4xdskkbakz6krc5iw65k", "tests": [ { "inputs": { @@ -633,7 +633,7 @@ }, "bcftools_merge": { "key": "bcftools_merge", - "digest": "hm5g7jj2hm73dmieksaauu3ufmvtytoc", + "digest": "jrbphhh6bnjlyqyi6dxrmp7nreelbk4g", "tests": [ { "inputs": { @@ -717,7 +717,7 @@ }, "sv_stats": { "key": "sv_stats", - "digest": "bru36vhjvldfcytkj2433t4naekjgctl", + "digest": "i5iptmzk472kcck6varsvun7ip6pd4tf", "tests": [ { "inputs": { @@ -774,7 +774,7 @@ "tasks": { "cpg_pileup": { "key": "cpg_pileup", - "digest": "p36yif35pnwhh7ejpbsc2izmmye4aehs", + "digest": "iuyxupdqsyivt4ozffri4qhh6ps2ihmp", "tests": [ { "inputs": { @@ -895,7 +895,7 @@ "tasks": { "glnexus": { "key": "glnexus", - "digest": "hyyvwfb42obz4ieydkko4uef6hhc2vxl", + "digest": "o2kth2c3iwky74yz5bfpsjymil3sbzd7", "tests": [ { "inputs": { @@ -935,7 +935,7 @@ "tasks": { "hificnv": { "key": "hificnv", - "digest": "bfpv64leadkxosmbgdg2viaugtmrmupo", + "digest": "tt3lk4sjqfnfpuukaghkmg6v33bnt3ic", "tests": [ { "inputs": { @@ -1022,7 +1022,7 @@ "tasks": { "hiphase": { "key": "hiphase", - "digest": "mcezwxqtvtt4hptlcegtkom322yf5jnb", + "digest": "bi5osn4mwiklp3fre6mxcfpw5cmhjhcx", "tests": [ { "inputs": { @@ -1112,7 +1112,7 @@ "tasks": { "mosdepth": { "key": "mosdepth", - "digest": "mwjp4365vu3k7nccm5k4rbd2ga2lqmgw", + "digest": "4drmk2f7kwb57hftqv6udfy5fh4eol7d", "tests": [ { "inputs": { @@ -1224,7 +1224,7 @@ "tasks": { "paraphase": { "key": "paraphase", - "digest": "thdat4rkjeqlyo7fc3frju4caujej4rb", + "digest": "fpnnrdrj72irojbdzcrrf6ylan5ztrj3", "tests": [ { "inputs": { @@ -1270,7 +1270,7 @@ "tasks": { "pbmm2_align_wgs": { "key": "pbmm2_align_wgs", - "digest": "eupj2k52j7y4oifnqvbz2bwvu5gmwzsx", + "digest": "h4ehzyo7xln5zovolppsz3sk5tz3yvro", "tests": [ { "inputs": { @@ -1379,7 +1379,7 @@ "tasks": { "pbstarphase_diplotype": { "key": "pbstarphase_diplotype", - "digest": "vz6illqka3w4pefnou4uoryckd6cr3hu", + "digest": "6opgzazl7wm42vg4ftqbmvqlq3b4xsxl", "tests": [ { "inputs": { @@ -1423,7 +1423,7 @@ "tasks": { "samtools_merge": { "key": "samtools_merge", - "digest": "uuwtzd6saw3mxd5pbg4wvalvi4tmal7a", + "digest": "xpto234x4hmpw3uvsk3tgyc7tb2ytcaa", "tests": [ { "inputs": { @@ -1465,7 +1465,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "c63pmlsvyejtse23754nll467tk3x2qn", + "digest": "tqgjvecbpne5cnz4ff4fhjztkpub4xtg", "tests": [ { "inputs": { @@ -1554,7 +1554,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "wj3x5qdjnlsvhyo6mphy3hk6vvbbtjzk", + "digest": "dmgbkalwvudhxs6lr3xkwopq6hkwjttv", "tests": [ { "inputs": { @@ -1588,7 +1588,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "5ekiibz2pvpouhtkk655lei5lk2ubtzg", + "digest": "oc5xxjmuyfnoobc6zqe5sqtfsfgkgak7", "tests": [ { "inputs": { @@ -1620,7 +1620,7 @@ "tasks": { "split_string": { "key": "split_string", - "digest": "sia2b6zbcnw34kvg5qsyawoe4w3dvpdu", + "digest": "s4v67veguw3zkfbixrfgn5foulf7kv4p", "tests": [ { "inputs": { @@ -1644,7 +1644,7 @@ }, "consolidate_stats": { "key": "consolidate_stats", - "digest": "olxrzy2hoyifsvy7unp7v6t6ur5hrujv", + "digest": "zri2goulcgcaueaezcic3po2ijgb72k4", "tests": [ { "inputs": { @@ -1685,7 +1685,7 @@ "tasks": { "write_phrank": { "key": "write_phrank", - "digest": "si2kfc6pzsrweaqed3jf5dpkpk4ysnhc", + "digest": "qkxmuzajagtxsk3lfl4nqif6i5zmgj6f", "tests": [ { "inputs": { @@ -1721,7 +1721,7 @@ "tasks": { "deepvariant_make_examples": { "key": "deepvariant_make_examples", - "digest": "ldldviaalmgztiglsdugxauqgx6rfbrk", + "digest": "zcg7prtdcfoir3lj7kvsb2mssi4gcquh", "tests": [ { "inputs": { @@ -1761,7 +1761,7 @@ }, "deepvariant_call_variants_cpu": { "key": "deepvariant_call_variants_cpu", - "digest": "3lfcmsaddhri77rtk4yecs77l7edxoc7", + "digest": "krunswnjbnsletxpge2egpzflynzjjyr", "tests": [ { "inputs": { @@ -1795,7 +1795,7 @@ }, "deepvariant_call_variants_gpu": { "key": "deepvariant_call_variants_gpu", - "digest": "bhzsa64abtjmqd25mgknrrqu2qkwo3xe", + "digest": "567aoezgiryo64cbsy2c2plt3vblrbpy", "tests": [ { "inputs": { @@ -1829,7 +1829,7 @@ }, "deepvariant_postprocess_variants": { "key": "deepvariant_postprocess_variants", - "digest": "ih6q34f2apm6eqmyalje4cylajumvinw", + "digest": "bpmzthuph6iudzhtwxu2uw5myotxykva", "tests": [ { "inputs": { @@ -1904,7 +1904,7 @@ "tasks": { "pharmcat_preprocess": { "key": "pharmcat_preprocess", - "digest": "y7e4xvcwoiod7gakvbbimnz5ylkalmhk", + "digest": "laipqonb5jd34wvmzmaja6rbm6ndlsn6", "tests": [ { "inputs": { @@ -1938,7 +1938,7 @@ }, "filter_preprocessed_vcf": { "key": "filter_preprocessed_vcf", - "digest": "qnxypqjvvofeigp4v27xpnp5dfv76duz", + "digest": "juenwyp6ursmgzc7q3rjbzgqsjgstzzr", "tests": [ { "inputs": { @@ -1963,7 +1963,7 @@ }, "run_pharmcat": { "key": "run_pharmcat", - "digest": "itfw64hpuphtqb5t6xqspch4mtscaz7f", + "digest": "oqllo6jjt64smy5j34odu7n7xiv5nawn", "tests": [ { "inputs": { @@ -2058,7 +2058,7 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "fjjjgwzjez4nzzx43o67v5g6b75k3q42", + "digest": "dg2xlw6lyteu3qa544blaif6clnijkpr", "tests": [ { "inputs": { @@ -2126,7 +2126,7 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "z3l3faihcbvwpi2lkd6k4hsqmg6y552n", + "digest": "wddutc4qigu7gtk6dgzwcvsumhu6p6u5", "tests": [ { "inputs": { @@ -2231,6 +2231,7 @@ "preemptible_tries": 0, "max_retries": 0, "zones": "", + "cpuPlatform": "", "gpuType": "ampere", "container_registry": "quay.io/pacbio" }, @@ -2239,6 +2240,7 @@ "preemptible_tries": 0, "max_retries": 0, "zones": "", + "cpuPlatform": "", "gpuType": "ampere", "container_registry": "quay.io/pacbio" } diff --git a/workflows/downstream/inputs.json b/workflows/downstream/inputs.json index 7bbf1517..c8907973 100644 --- a/workflows/downstream/inputs.json +++ b/workflows/downstream/inputs.json @@ -16,6 +16,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/family.inputs.json b/workflows/family.inputs.json index cf47370c..e1acea28 100644 --- a/workflows/family.inputs.json +++ b/workflows/family.inputs.json @@ -22,6 +22,7 @@ "humanwgs_family.gpu": "Boolean (optional, default = false)", "humanwgs_family.backend": "String", "humanwgs_family.zones": "String? (optional)", + "humanwgs_family.cpuPlatform": "String? (optional)", "humanwgs_family.gpuType": "String? (optional)", "humanwgs_family.container_registry": "String? (optional)", "humanwgs_family.container_namespace": "String? (optional)", diff --git a/workflows/family.wdl b/workflows/family.wdl index 7375540c..5c5335cc 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -45,6 +45,9 @@ workflow humanwgs_family { zones: { name: "Zones where compute will take place; required if backend is set to 'GCP'" } + cpuPlatform: { + help: "Optional minimum CPU platform to use for tasks on GCP" + } gpuType: { name: "GPU type to use; required if gpu is set to `true` for cloud backends; must match backend" } @@ -76,6 +79,7 @@ workflow humanwgs_family { # Backend configuration String backend String? zones + String? cpuPlatform String? gpuType String? container_registry @@ -88,6 +92,7 @@ workflow humanwgs_family { input: backend = backend, zones = zones, + cpuPlatform = cpuPlatform, gpuType = gpuType, container_registry = container_registry } diff --git a/workflows/joint/inputs.json b/workflows/joint/inputs.json index 90e3de13..c779750b 100644 --- a/workflows/joint/inputs.json +++ b/workflows/joint/inputs.json @@ -14,6 +14,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/singleton.inputs.json b/workflows/singleton.inputs.json index 57f5e614..47b6ef50 100644 --- a/workflows/singleton.inputs.json +++ b/workflows/singleton.inputs.json @@ -11,6 +11,7 @@ "humanwgs_singleton.gpu": "Boolean (optional, default = false)", "humanwgs_singleton.backend": "String", "humanwgs_singleton.zones": "String? (optional)", + "humanwgs_singleton.cpuPlatform": "String? (optional)", "humanwgs_singleton.gpuType": "String? (optional)", "humanwgs_singleton.container_registry": "String? (optional)", "humanwgs_singleton.container_namespace": "String? (optional)", diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 3222904b..3443c220 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -46,6 +46,9 @@ workflow humanwgs_singleton { zones: { name: "Zones where compute will take place; required if backend is set to 'GCP'" } + cpuPlatform: { + help: "Optional minimum CPU platform to use for tasks on GCP" + } gpuType: { name: "GPU type to use; required if gpu is set to `true` for cloud backends; must match backend" } @@ -78,6 +81,7 @@ workflow humanwgs_singleton { # Backend configuration String backend String? zones + String? cpuPlatform String? gpuType String? container_registry @@ -90,6 +94,7 @@ workflow humanwgs_singleton { input: backend = backend, zones = zones, + cpuPlatform = cpuPlatform, gpuType = gpuType, container_registry = container_registry } diff --git a/workflows/tertiary/inputs.json b/workflows/tertiary/inputs.json index 3116e69f..8e76a0c9 100644 --- a/workflows/tertiary/inputs.json +++ b/workflows/tertiary/inputs.json @@ -13,6 +13,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file diff --git a/workflows/tertiary/tertiary.wdl b/workflows/tertiary/tertiary.wdl index 8d4edc66..7cf965fe 100644 --- a/workflows/tertiary/tertiary.wdl +++ b/workflows/tertiary/tertiary.wdl @@ -451,6 +451,7 @@ task slivar_small_variant { maxRetries: runtime_attributes.max_retries awsBatchRetryAttempts: runtime_attributes.max_retries zones: runtime_attributes.zones + cpuPlatform: runtime_attributes.cpuPlatform } } @@ -550,6 +551,7 @@ task svpack_filter_annotated { maxRetries: runtime_attributes.max_retries awsBatchRetryAttempts: runtime_attributes.max_retries zones: runtime_attributes.zones + cpuPlatform: runtime_attributes.cpuPlatform } } @@ -641,5 +643,6 @@ task slivar_svpack_tsv { maxRetries: runtime_attributes.max_retries awsBatchRetryAttempts: runtime_attributes.max_retries zones: runtime_attributes.zones + cpuPlatform: runtime_attributes.cpuPlatform } } diff --git a/workflows/upstream/inputs.json b/workflows/upstream/inputs.json index 5174d8d2..12fc93b6 100644 --- a/workflows/upstream/inputs.json +++ b/workflows/upstream/inputs.json @@ -11,6 +11,7 @@ "gpuType": "String", "backend": "String", "preemptible_tries": "Int", - "zones": "String" + "zones": "String", + "cpuPlatform": "String" } } \ No newline at end of file From 90781792696b3da2d6b408f271d6303685da1b65 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 3 Apr 2025 18:08:56 -0700 Subject: [PATCH 29/61] Parallelize pbmm2 alignments by chunking input. (#205) * Change `msg` syntax. Change `msg` concept so that task output `msg` is always `Array[String]`. Any empty messages are pruned by the `Utilities.consolidate_stats` task, which outputs all raised messages to a text file. * Parallelize alignment by chunking input. - the split_input_bam task: - checks inputs for alignments and kinetics/basemod tags - resets alignments and kinetics tags - if max_reads_per_chunk is greater than 0, the input is indexed and split into chunks of no more than max_reads_per_chunk records Note: We are investigating native chunking features in pbmm2, so this implementation may change. --- wdl-ci.config.json | 216 ++++++++++++++------------------ workflows/family.inputs.json | 3 +- workflows/family.wdl | 130 ++++++++++--------- workflows/singleton.inputs.json | 3 +- workflows/singleton.wdl | 107 +++++++++------- workflows/upstream/inputs.json | 1 + workflows/upstream/upstream.wdl | 46 ++++--- 7 files changed, 259 insertions(+), 247 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index b4a85144..750cc4c9 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -269,7 +269,7 @@ "tasks": { "bam_stats": { "key": "bam_stats", - "digest": "wcruy2g5ce7fexnf7sctfshfcj3alw5m", + "digest": "orfdav6zki7q3pvp6vpikfns2fjdkzrx", "tests": [ { "inputs": { @@ -935,7 +935,7 @@ "tasks": { "hificnv": { "key": "hificnv", - "digest": "tt3lk4sjqfnfpuukaghkmg6v33bnt3ic", + "digest": "45dcdjjpmnwhymg3vfjeo7yxbyczxart", "tests": [ { "inputs": { @@ -1263,115 +1263,6 @@ } } }, - "workflows/wdl-common/wdl/tasks/pbmm2.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pbmm2.wdl", - "name": "", - "description": "", - "tasks": { - "pbmm2_align_wgs": { - "key": "pbmm2_align_wgs", - "digest": "h4ehzyo7xln5zovolppsz3sk5tz3yvro", - "tests": [ - { - "inputs": { - "sample_id": "HG00733", - "bam": "${resources_file_path}/inputs/sequelii_aligned_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_aligned_10k/HG00733.sequelii_aligned_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG00133", - "bam": "${resources_file_path}/inputs/sequelii_kinetics_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k/HG00133.sequelii_kinetics_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG00133", - "bam": "${resources_file_path}/inputs/sequelii_kinetics_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "strip_kinetics": false, - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/sequelii_kinetics_10k_strip_kinetics_false/HG00133.sequelii_kinetics_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam": "${resources_file_path}/inputs/vega_10k.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k/HG002.vega_10k.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "bam": "${resources_file_path}/inputs/vega_10k.no_rq.hifi_reads.bam", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "aligned_bam": { - "value": "${resources_file_path}/pbmm2_align_wgs/vega_10k_no_rq/HG002.vega_10k.no_rq.hifi_reads.GRCh38.aligned.bam", - "test_tasks": [ - "compare_file_basename", - "samtools_quickcheck" - ] - } - } - } - ] - } - } - }, "workflows/wdl-common/wdl/tasks/pbstarphase.wdl": { "key": "workflows/wdl-common/wdl/tasks/pbstarphase.wdl", "name": "", @@ -1423,7 +1314,7 @@ "tasks": { "samtools_merge": { "key": "samtools_merge", - "digest": "xpto234x4hmpw3uvsk3tgyc7tb2ytcaa", + "digest": "uenrqqsd3frv7cc4rriwaljrh2kpdq2d", "tests": [ { "inputs": { @@ -1436,7 +1327,7 @@ }, "output_tests": { "merged_bam": { - "value": "${resources_file_path}/samtools_merge/output/two_inputs/HG002HG00133.GRCh38.bam", + "value": "${resources_file_path}/samtools_merge/output/two_inputs_combine_rg_pg/HG002HG00133.GRCh38.bam", "test_tasks": [ "compare_file_basename", "samtools_quickcheck" @@ -1465,7 +1356,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "tqgjvecbpne5cnz4ff4fhjztkpub4xtg", + "digest": "g6ywayioaobdtzqdajkngnqfqwmxjtyk", "tests": [ { "inputs": { @@ -1644,7 +1535,7 @@ }, "consolidate_stats": { "key": "consolidate_stats", - "digest": "zri2goulcgcaueaezcic3po2ijgb72k4", + "digest": "pot7gxel373bywwte62q3rc7qyb37jgb", "tests": [ { "inputs": { @@ -1661,6 +1552,11 @@ "3" ] }, + "msg_array": [ + "hello", + "", + "world" + ], "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { @@ -1671,6 +1567,13 @@ "check_tab_delimited", "count_columns" ] + }, + "messages": { + "value": "${resources_file_path}/consolidate_stats/String.messages.txt", + "test_tasks": [ + "compare_file_basename", + "calculate_md5sum" + ] } } } @@ -1885,15 +1788,86 @@ } } }, - "workflows/wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl": { - "key": "workflows/wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl", + "workflows/wdl-common/wdl/workflows/pbmm2/pbmm2.wdl": { + "key": "workflows/wdl-common/wdl/workflows/pbmm2/pbmm2.wdl", "name": "", "description": "", "tasks": { - "read_pbsv_splits": { - "key": "read_pbsv_splits", - "digest": "", - "tests": [] + "pbmm2_align_wgs": { + "key": "pbmm2_align_wgs", + "digest": "kobmohrwnpy2pzlsd7l2qvoo7ulkxth2", + "tests": [ + { + "inputs": { + "sample_id": "HG002", + "bam": "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_00.bam", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "ref_name": "${ref_name}", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "aligned_bam": { + "value": "${resources_file_path}/pbmm2_align_wgs/split_vega_1k/HG002.vega_10k.hifi_reads.chunk_00.GRCh38.aligned.bam", + "test_tasks": [ + "compare_file_basename", + "samtools_quickcheck" + ] + } + } + } + ] + }, + "split_input_bam": { + "key": "split_input_bam", + "digest": "b7xnykjq4l2ecpvku2gxuswdnrnk3xka", + "tests": [ + { + "inputs": { + "bam": "${resources_file_path}/inputs/vega_10k.hifi_reads.bam", + "max_reads_per_chunk": 1000, + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "chunks": { + "value": [ + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_00.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_01.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_02.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_03.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_04.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_05.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_06.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_07.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_08.bam", + "${resources_file_path}/split_input_bam/output/vega_10_chunks/vega_10k.hifi_reads.chunk_09.bam" + ], + "test_tasks": [ + "compare_file_basename", + "samtools_quickcheck" + ] + } + } + }, + { + "inputs": { + "bam": "${resources_file_path}/inputs/sequelii_aligned_10k.hifi_reads.bam", + "max_reads_per_chunk": 0, + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "chunks": { + "value": [ + "${resources_file_path}/split_input_bam/output/aligned_input/sequelii_aligned_10k.hifi_reads.reset.bam" + ], + "test_tasks": [ + "compare_file_basename", + "samtools_quickcheck" + ] + } + } + } + ] } } }, diff --git a/workflows/family.inputs.json b/workflows/family.inputs.json index e1acea28..0768bb4b 100644 --- a/workflows/family.inputs.json +++ b/workflows/family.inputs.json @@ -16,8 +16,9 @@ }, "humanwgs_family.phenotypes": "String? (optional)", "humanwgs_family.ref_map_file": "File", - "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.tertiary_map_file": "File? (optional)", + "humanwgs_family.max_reads_per_alignment_chunk": "Int (optional, default = 500000)", + "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.glnexus_mem_gb": "Int? (optional)", "humanwgs_family.gpu": "Boolean (optional, default = false)", "humanwgs_family.backend": "String", diff --git a/workflows/family.wdl b/workflows/family.wdl index 5c5335cc..944aeaa8 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -20,18 +20,21 @@ workflow humanwgs_family { family: { name: "Family struct describing samples, relationships, and unaligned BAM paths" } - ref_map_file: { - name: "TSV containing reference genome file paths; must match backend" - } - pharmcat_min_coverage: { - name: "Minimum coverage for PharmCAT" - } phenotypes: { name: "Comma-delimited list of HPO codes for phenotypes" } + ref_map_file: { + name: "TSV containing reference genome file paths; must match backend" + } tertiary_map_file: { name: "TSV containing tertiary analysis file paths and thresholds; must match backend" } + max_reads_per_alignment_chunk: { + name: "Maximum reads per alignment chunk" + } + pharmcat_min_coverage: { + name: "Minimum coverage for PharmCAT" + } glnexus_mem_gb: { name: "Override GLnexus memory request (GB)" } @@ -65,13 +68,13 @@ workflow humanwgs_family { input { Family family - File ref_map_file - - Int pharmcat_min_coverage = 10 - String phenotypes = "HP:0000001" + + File ref_map_file File? tertiary_map_file + Int max_reads_per_alignment_chunk = 500000 + Int pharmcat_min_coverage = 10 Int? glnexus_mem_gb Boolean gpu = false @@ -116,13 +119,14 @@ workflow humanwgs_family { call Upstream.upstream { input: - sample_id = sample.sample_id, - sex = sample.sex, - hifi_reads = sample.hifi_reads, - ref_map_file = ref_map_file, - single_sample = single_sample, - gpu = gpu, - default_runtime_attributes = default_runtime_attributes + sample_id = sample.sample_id, + sex = sample.sex, + hifi_reads = sample.hifi_reads, + ref_map_file = ref_map_file, + max_reads_per_alignment_chunk = max_reads_per_alignment_chunk, + single_sample = single_sample, + gpu = gpu, + default_runtime_attributes = default_runtime_attributes } # write sample metadata similar to pedigree format @@ -171,47 +175,6 @@ workflow humanwgs_family { } } - Map[String, Array[String]] stats = { - 'sample_id': sample_id, - 'num_reads': downstream.stat_num_reads, - 'read_length_mean': downstream.stat_read_length_mean, - 'read_length_median': downstream.stat_read_length_median, - 'read_quality_mean': downstream.stat_read_quality_mean, - 'read_quality_median': downstream.stat_read_quality_median, - 'mapped_read_count': downstream.stat_mapped_read_count, - 'mapped_percent': downstream.stat_mapped_percent, - 'mean_depth': upstream.stat_mean_depth, - 'inferred_sex': upstream.inferred_sex, - 'stat_phased_basepairs': downstream.stat_phased_basepairs, - 'phase_block_ng50': downstream.stat_phase_block_ng50, - 'cpg_combined_count': downstream.stat_combined_cpg_count, - 'cpg_hap1_count': downstream.stat_hap1_cpg_count, - 'cpg_hap2_count': downstream.stat_hap2_cpg_count, - 'SNV_count': downstream.stat_SNV_count, - 'TSTV_ratio': downstream.stat_TSTV_ratio, - 'HETHOM_ratio': downstream.stat_HETHOM_ratio, - 'INDEL_count': downstream.stat_INDEL_count, - 'sv_DUP_count': downstream.stat_sv_DUP_count, - 'sv_DEL_count': downstream.stat_sv_DEL_count, - 'sv_INS_count': downstream.stat_sv_INS_count, - 'sv_INV_count': downstream.stat_sv_INV_count, - 'sv_INVBND_count': downstream.stat_sv_INVBND_count, - 'sv_BND_count': downstream.stat_sv_BND_count, - 'cnv_DUP_count': upstream.stat_cnv_DUP_count, - 'cnv_DEL_count': upstream.stat_cnv_DEL_count, - 'cnv_DUP_sum': upstream.stat_cnv_DUP_sum, - 'cnv_DEL_sum': upstream.stat_cnv_DEL_sum, - 'trgt_genotyped_count': upstream.stat_trgt_genotyped_count, - 'trgt_uncalled_count': upstream.stat_trgt_uncalled_count - } - - call Utilities.consolidate_stats { - input: - id = family.family_id, - stats = stats, - runtime_attributes = default_runtime_attributes - } - if (!single_sample) { call Bcftools.bcftools_merge as merge_small_variant_vcfs { input: @@ -257,10 +220,53 @@ workflow humanwgs_family { } } + Map[String, Array[String]] stats = { + 'sample_id': sample_id, + 'num_reads': downstream.stat_num_reads, + 'read_length_mean': downstream.stat_read_length_mean, + 'read_length_median': downstream.stat_read_length_median, + 'read_quality_mean': downstream.stat_read_quality_mean, + 'read_quality_median': downstream.stat_read_quality_median, + 'mapped_read_count': downstream.stat_mapped_read_count, + 'mapped_percent': downstream.stat_mapped_percent, + 'mean_depth': upstream.stat_mean_depth, + 'inferred_sex': upstream.inferred_sex, + 'stat_phased_basepairs': downstream.stat_phased_basepairs, + 'phase_block_ng50': downstream.stat_phase_block_ng50, + 'cpg_combined_count': downstream.stat_combined_cpg_count, + 'cpg_hap1_count': downstream.stat_hap1_cpg_count, + 'cpg_hap2_count': downstream.stat_hap2_cpg_count, + 'SNV_count': downstream.stat_SNV_count, + 'TSTV_ratio': downstream.stat_TSTV_ratio, + 'HETHOM_ratio': downstream.stat_HETHOM_ratio, + 'INDEL_count': downstream.stat_INDEL_count, + 'sv_DUP_count': downstream.stat_sv_DUP_count, + 'sv_DEL_count': downstream.stat_sv_DEL_count, + 'sv_INS_count': downstream.stat_sv_INS_count, + 'sv_INV_count': downstream.stat_sv_INV_count, + 'sv_INVBND_count': downstream.stat_sv_INVBND_count, + 'sv_BND_count': downstream.stat_sv_BND_count, + 'cnv_DUP_count': upstream.stat_cnv_DUP_count, + 'cnv_DEL_count': upstream.stat_cnv_DEL_count, + 'cnv_DUP_sum': upstream.stat_cnv_DUP_sum, + 'cnv_DEL_sum': upstream.stat_cnv_DEL_sum, + 'trgt_genotyped_count': upstream.stat_trgt_genotyped_count, + 'trgt_uncalled_count': upstream.stat_trgt_uncalled_count + } + + call Utilities.consolidate_stats { + input: + id = family.family_id, + stats = stats, + msg_array = flatten([flatten(upstream.msg)]), + runtime_attributes = default_runtime_attributes + } + output { # to maintain order of samples Array[String] sample_ids = sample_id - File stats_file = consolidate_stats.output_tsv + File stats_file = consolidate_stats.output_tsv + File msg_file = consolidate_stats.messages # bam stats Array[File] bam_statistics = downstream.bam_statistics @@ -391,7 +397,11 @@ workflow humanwgs_family { File? tertiary_sv_filtered_tsv = tertiary_analysis.sv_filtered_tsv # qc messages - Array[String?] qc_messages = flatten([upstream.msg_qc_sex]) + Array[String] msg = flatten( + [ + flatten(upstream.msg) + ] + ) # workflow metadata String workflow_name = "humanwgs_family" diff --git a/workflows/singleton.inputs.json b/workflows/singleton.inputs.json index 47b6ef50..3e0d1b29 100644 --- a/workflows/singleton.inputs.json +++ b/workflows/singleton.inputs.json @@ -6,8 +6,9 @@ ], "humanwgs_singleton.phenotypes": "String? (optional)", "humanwgs_singleton.ref_map_file": "File", - "humanwgs_singleton.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_singleton.tertiary_map_file": "File? (optional)", + "humanwgs_singleton.max_reads_per_alignment_chunk": "Int (optional, default = 500000)", + "humanwgs_singleton.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_singleton.gpu": "Boolean (optional, default = false)", "humanwgs_singleton.backend": "String", "humanwgs_singleton.zones": "String? (optional)", diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 3443c220..89f2d24c 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -24,18 +24,21 @@ workflow humanwgs_singleton { hifi_reads: { name: "Array of paths to HiFi reads in unaligned BAM format." } - ref_map_file: { - name: "TSV containing reference genome file paths; must match backend" - } - pharmcat_min_coverage: { - name: "Minimum coverage for PharmCAT" - } phenotypes: { name: "Comma-delimited list of HPO codes for phenotypes" } + ref_map_file: { + name: "TSV containing reference genome file paths; must match backend" + } tertiary_map_file: { name: "TSV containing tertiary analysis file paths and thresholds; must match backend" } + max_reads_per_alignment_chunk: { + name: "Maximum reads per alignment chunk" + } + pharmcat_min_coverage: { + name: "Minimum coverage for PharmCAT" + } gpu: { name: "Use GPU when possible" } @@ -69,13 +72,14 @@ workflow humanwgs_singleton { String? sex Array[File] hifi_reads + String phenotypes = "HP:0000001" + File ref_map_file + File? tertiary_map_file + Int max_reads_per_alignment_chunk = 500000 Int pharmcat_min_coverage = 10 - String phenotypes = "HP:0000001" - File? tertiary_map_file - Boolean gpu = false # Backend configuration @@ -103,13 +107,14 @@ workflow humanwgs_singleton { call Upstream.upstream { input: - sample_id = sample_id, - sex = sex, - hifi_reads = hifi_reads, - ref_map_file = ref_map_file, - single_sample = true, - gpu = gpu, - default_runtime_attributes = default_runtime_attributes + sample_id = sample_id, + sex = sex, + hifi_reads = hifi_reads, + ref_map_file = ref_map_file, + max_reads_per_alignment_chunk = max_reads_per_alignment_chunk, + single_sample = true, + gpu = gpu, + default_runtime_attributes = default_runtime_attributes } call Downstream.downstream { @@ -128,6 +133,37 @@ workflow humanwgs_singleton { default_runtime_attributes = default_runtime_attributes } + Map[String, String] pedigree_sex = { + "MALE": "1", + "FEMALE": "2", + "": "." + } + + # write sample metadata similar to pedigree format + # family_id, sample_id, father_id, mother_id, sex, affected + Array[String] sample_metadata = [ + sample_id, sample_id, + ".", ".", + pedigree_sex[upstream.inferred_sex], "2" + ] + + if (defined(tertiary_map_file)) { + call TertiaryAnalysis.tertiary_analysis { + input: + sample_metadata = [sample_metadata], + phenotypes = phenotypes, + is_trio_kid = [false], + is_duo_kid = [false], + small_variant_vcf = downstream.phased_small_variant_vcf, + small_variant_vcf_index = downstream.phased_small_variant_vcf_index, + sv_vcf = downstream.phased_sv_vcf, + sv_vcf_index = downstream.phased_sv_vcf_index, + ref_map_file = ref_map_file, + tertiary_map_file = select_first([tertiary_map_file]), + default_runtime_attributes = default_runtime_attributes + } + } + Map[String, Array[String]] stats = { 'sample_id': [sample_id], 'num_reads': [downstream.stat_num_reads], @@ -166,43 +202,14 @@ workflow humanwgs_singleton { input: id = sample_id, stats = stats, + msg_array = flatten([upstream.msg]), runtime_attributes = default_runtime_attributes } - Map[String, String] pedigree_sex = { - "MALE": "1", - "FEMALE": "2", - "": "." - } - - # write sample metadata similar to pedigree format - # family_id, sample_id, father_id, mother_id, sex, affected - Array[String] sample_metadata = [ - sample_id, sample_id, - ".", ".", - pedigree_sex[upstream.inferred_sex], "2" - ] - - if (defined(tertiary_map_file)) { - call TertiaryAnalysis.tertiary_analysis { - input: - sample_metadata = [sample_metadata], - phenotypes = phenotypes, - is_trio_kid = [false], - is_duo_kid = [false], - small_variant_vcf = downstream.phased_small_variant_vcf, - small_variant_vcf_index = downstream.phased_small_variant_vcf_index, - sv_vcf = downstream.phased_sv_vcf, - sv_vcf_index = downstream.phased_sv_vcf_index, - ref_map_file = ref_map_file, - tertiary_map_file = select_first([tertiary_map_file]), - default_runtime_attributes = default_runtime_attributes - } - } - output { # consolidated stats File stats_file = consolidate_stats.output_tsv + File msg_file = consolidate_stats.messages # bam stats File bam_statistics = downstream.bam_statistics @@ -325,7 +332,11 @@ workflow humanwgs_singleton { File? tertiary_sv_filtered_tsv = tertiary_analysis.sv_filtered_tsv # qc messages - Array[String?] qc_messages = [upstream.msg_qc_sex] + Array[String] msg = flatten( + [ + upstream.msg + ] + ) # workflow metadata String workflow_name = "humanwgs_family" diff --git a/workflows/upstream/inputs.json b/workflows/upstream/inputs.json index 12fc93b6..f007688a 100644 --- a/workflows/upstream/inputs.json +++ b/workflows/upstream/inputs.json @@ -3,6 +3,7 @@ "upstream.sex": "String? (optional)", "upstream.hifi_reads": "Array[File]", "upstream.ref_map_file": "File", + "upstream.max_reads_per_alignment_chunk": "Int", "upstream.single_sample": "Boolean (optional, default = false)", "upstream.gpu": "Boolean", "upstream.default_runtime_attributes": { diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index a9020c1c..b2240134 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -1,7 +1,7 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" -import "../wdl-common/wdl/tasks/pbmm2.wdl" as Pbmm2 +import "../wdl-common/wdl/workflows/pbmm2/pbmm2.wdl" as Pbmm2 import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/workflows/deepvariant/deepvariant.wdl" as DeepVariant import "../wdl-common/wdl/tasks/samtools.wdl" as Samtools @@ -29,6 +29,9 @@ workflow upstream { ref_map_file: { name: "TSV containing reference genome information" } + max_reads_per_alignment_chunk: { + name: "Maximum reads per alignment chunk" + } single_sample: { name: "Single sample workflow" } @@ -47,6 +50,8 @@ workflow upstream { File ref_map_file + Int max_reads_per_alignment_chunk + Boolean single_sample = false Boolean gpu @@ -57,30 +62,31 @@ workflow upstream { Map[String, String] ref_map = read_map(ref_map_file) scatter (hifi_read_bam in hifi_reads) { - call Pbmm2.pbmm2_align_wgs as pbmm2_align { + call Pbmm2.pbmm2 as pbmm2 { input: - sample_id = sample_id, - bam = hifi_read_bam, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - runtime_attributes = default_runtime_attributes + sample_id = sample_id, + bam = hifi_read_bam, + max_reads_per_chunk = max_reads_per_alignment_chunk, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + ref_name = ref_map["name"], + default_runtime_attributes = default_runtime_attributes } } # merge aligned bams if there are multiple - if (length(pbmm2_align.aligned_bam) > 1) { + if (length(flatten(pbmm2.aligned_bams)) > 1) { call Samtools.samtools_merge { input: - bams = pbmm2_align.aligned_bam, + bams = flatten(pbmm2.aligned_bams), out_prefix = "~{sample_id}.~{ref_map['name']}", runtime_attributes = default_runtime_attributes } } # select the merged bam if it exists, otherwise select the first (only) aligned bam - File aligned_bam_data = select_first([samtools_merge.merged_bam, pbmm2_align.aligned_bam[0]]) - File aligned_bam_index = select_first([samtools_merge.merged_bam_index, pbmm2_align.aligned_bam_index[0]]) + File aligned_bam_data = select_first([samtools_merge.merged_bam, flatten(pbmm2.aligned_bams)[0]]) + File aligned_bam_index = select_first([samtools_merge.merged_bam_index, flatten(pbmm2.aligned_bam_indices)[0]]) call Mosdepth.mosdepth { input: @@ -92,9 +98,10 @@ workflow upstream { runtime_attributes = default_runtime_attributes } - if (defined(sex) && (mosdepth.inferred_sex != sex)) { - String qc_sex = "~{sample_id}: Reported sex ~{sex} does not match inferred sex ~{mosdepth.inferred_sex}." - } + String qc_sex = + if (defined(sex) && (mosdepth.inferred_sex != sex)) + then "~{sample_id}: Reported sex ~{sex} does not match inferred sex ~{mosdepth.inferred_sex}." + else "" call DeepVariant.deepvariant { input: @@ -227,6 +234,13 @@ workflow upstream { String stat_cnv_DEL_sum = hificnv.stat_DEL_sum # qc messages - String? msg_qc_sex = qc_sex + Array[String] msg = flatten( + [ + flatten(pbmm2.msg), + [qc_sex], + trgt.msg, + hificnv.msg + ] + ) } } From 73ece45b84d88f7ebc45bb6669bd04b4b9ef1d9a Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 3 Apr 2025 18:44:50 -0700 Subject: [PATCH 30/61] Update to sawfish 0.12.10 (#206) * Update to sawfish 0.12.10 --- docs/tools_containers.md | 2 +- wdl-ci.config.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 213e9c8a..376d5c98 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -13,7 +13,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pb_wdl_base |
  • htslib 1.20
  • bcftools 1.20
  • samtools 1.20
  • bedtools 2.31.0
  • python3.9
  • numpy 1.24.24
  • pandas 2.0.3
  • matplotlib 3.7.5
  • seaborn 0.13.2
  • pysam 0.22.1
  • vcfpy 0.13.8
  • biopython 1.83
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/pb_wdl_base) | [sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87](https://quay.io/repository/pacbio/pb_wdl_base/manifest/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87) | | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | -| sawfish |
  • sawfish 0.12.7
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/74325e0c73d53da2fef43d01ce9d0c7d0cd49c7a/docker/sawfish) | [sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b](https://quay.io/repository/pacbio/sawfish/manifest/sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b) | +| sawfish |
  • sawfish 0.12.10
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/b20d14fc62026b465970db1bd98d78d516014030/docker/sawfish) | [sawfish@sha256:ba4334987f6ae249b615f2c6788499950a513ac9cbd06156c39086bd3ed015f5](https://quay.io/repository/pacbio/sawfish/manifest/sha256:ba4334987f6ae249b615f2c6788499950a513ac9cbd06156c39086bd3ed015f5) | | trgt |
  • trgt 1.5.1
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/641ed67d29128381f27daeca9936fbc1e41bf58d/docker/trgt) | [trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b](https://quay.io/repository/pacbio/trgt/manifest/sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 750cc4c9..fbf26478 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -2032,7 +2032,7 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "dg2xlw6lyteu3qa544blaif6clnijkpr", + "digest": "wbvv3v5qtbdrgvqvtqgeeamqmhgzsw4p", "tests": [ { "inputs": { @@ -2100,7 +2100,7 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "wddutc4qigu7gtk6dgzwcvsumhu6p6u5", + "digest": "rfuh5khjsyiajhrxz4the5j4fv3kzzhb", "tests": [ { "inputs": { From e900d1dfa443d112528827d430ec8efb4f334805 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Tue, 8 Apr 2025 17:31:09 -0700 Subject: [PATCH 31/61] Updates to sawfish and sv_stats. (#208) * Update to sawfish and sv_stats. sawfish: - Remove sex from inputs. - Add `--verbose` to all fs operations. - Switch to long form arguments. - Clean up compressed/decompressed files at the end of each task. - Pass supporting_reads_json up to main workflow outputs. sv_stats: - Remove InversionBND filtered events. - Require that all events are FILTER="PASS" and non REF genotypes. - Parameterize minimum variant size and maximum "scar" size (for sequence swaps) - Filter "sequence swap" (len(REF) and len(ALT) both >10bp) complex variants from the INS and DEL counts, and put them in a separate SWAP category. These are variants where both the REF and ALT are longer than 1bp. * update wdl-ci config file after successful tests --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- docs/family.md | 3 ++- docs/singleton.md | 3 ++- wdl-ci.config.json | 10 +++++----- workflows/downstream/downstream.wdl | 12 ++++++------ workflows/family.wdl | 15 ++++++++------- workflows/joint/joint.wdl | 4 ++++ workflows/singleton.wdl | 15 ++++++++------- workflows/upstream/upstream.wdl | 8 +++----- 8 files changed, 38 insertions(+), 32 deletions(-) diff --git a/docs/family.md b/docs/family.md index 7a1d84d4..d28e5b09 100644 --- a/docs/family.md +++ b/docs/family.md @@ -168,8 +168,9 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Array\[String\] | stat_sv_DEL_count | Structural variant DEL count | (PASS variants) | | Array\[String\] | stat_sv_INS_count | Structural variant INS count | (PASS variants) | | Array\[String\] | stat_sv_INV_count | Structural variant INV count | (PASS variants) | -| Array\[String\] | stat_sv_INVBND_count | Structural variant INVBND count | (PASS variants) | | Array\[String\] | stat_sv_BND_count | Structural variant BND count | (PASS variants) | +| Array\[String\] | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) | +| File | sv_supporting_reads | Supporting reads for structural variants | | | Array\[File\] | bcftools_roh_out | ROH calling | `bcftools roh` | | Array\[File\] | bcftools_roh_bed | Generated from above, without filtering | | | File? | joint_sv_vcf | Joint-called structural variant VCF | | diff --git a/docs/singleton.md b/docs/singleton.md index 64295a4a..c42642b4 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -131,8 +131,9 @@ flowchart TD | String | stat_sv_DEL_count | Structural variant DEL count | (PASS variants) | | String | stat_sv_INS_count | Structural variant INS count | (PASS variants) | | String | stat_sv_INV_count | Structural variant INV count | (PASS variants) | -| String | stat_sv_INVBND_count | Structural variant INVBND count | (PASS variants) | | String | stat_sv_BND_count | Structural variant BND count | (PASS variants) | +| String | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) | +| File | sv_supporting_reads | Supporting reads for structural variants | | | File | bcftools_roh_out | ROH calling | `bcftools roh` | | File | bcftools_roh_bed | Generated from above, without filtering | | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index fbf26478..e09f004b 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -717,7 +717,7 @@ }, "sv_stats": { "key": "sv_stats", - "digest": "i5iptmzk472kcck6varsvun7ip6pd4tf", + "digest": "y36kt5y2lun65yutrgqtspgheaezbbgh", "tests": [ { "inputs": { @@ -755,8 +755,8 @@ "compare_string" ] }, - "stat_sv_INVBND_count": { - "value": "4", + "stat_sv_SWAP_count": { + "value": "0", "test_tasks": [ "compare_string" ] @@ -2032,7 +2032,7 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "wbvv3v5qtbdrgvqvtqgeeamqmhgzsw4p", + "digest": "fhepxt5mh25uz4eg267h6al6nclhl7vl", "tests": [ { "inputs": { @@ -2100,7 +2100,7 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "rfuh5khjsyiajhrxz4the5j4fv3kzzhb", + "digest": "kjha4llh7xjs55xbkowbvfikvfnwtct4", "tests": [ { "inputs": { diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index dd742c7a..494c110d 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -219,12 +219,12 @@ workflow downstream { File indel_distribution_plot = bcftools_stats_roh_small_variants.indel_distribution_plot # sv stats - String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count - String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count - String stat_sv_INS_count = sv_stats.stat_sv_INS_count - String stat_sv_INV_count = sv_stats.stat_sv_INV_count - String stat_sv_INVBND_count = sv_stats.stat_sv_INVBND_count - String stat_sv_BND_count = sv_stats.stat_sv_BND_count + String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count + String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count + String stat_sv_INS_count = sv_stats.stat_sv_INS_count + String stat_sv_INV_count = sv_stats.stat_sv_INV_count + String stat_sv_BND_count = sv_stats.stat_sv_BND_count + String stat_sv_SWAP_count = sv_stats.stat_sv_SWAP_count # cpg_pileup outputs File? cpg_combined_bed = cpg_pileup.combined_bed diff --git a/workflows/family.wdl b/workflows/family.wdl index 944aeaa8..d0f79a4e 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -244,7 +244,7 @@ workflow humanwgs_family { 'sv_DEL_count': downstream.stat_sv_DEL_count, 'sv_INS_count': downstream.stat_sv_INS_count, 'sv_INV_count': downstream.stat_sv_INV_count, - 'sv_INVBND_count': downstream.stat_sv_INVBND_count, + 'sv_SWAP_count': downstream.stat_sv_SWAP_count, 'sv_BND_count': downstream.stat_sv_BND_count, 'cnv_DUP_count': upstream.stat_cnv_DUP_count, 'cnv_DEL_count': upstream.stat_cnv_DEL_count, @@ -318,14 +318,15 @@ workflow humanwgs_family { # sv outputs Array[File] phased_sv_vcf = downstream.phased_sv_vcf Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index + File sv_supporting_reads = select_first([joint.sv_supporting_reads, upstream.sv_supporting_reads[0]]) # sv stats - Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count - Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count - Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count - Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count - Array[String] stat_sv_INVBND_count = downstream.stat_sv_INVBND_count - Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count + Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count + Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count + Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count + Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count + Array[String] stat_sv_SWAP_count = downstream.stat_sv_SWAP_count + Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs Array[File] phased_small_variant_vcf = downstream.phased_small_variant_vcf diff --git a/workflows/joint/joint.wdl b/workflows/joint/joint.wdl index dac45709..215800fa 100644 --- a/workflows/joint/joint.wdl +++ b/workflows/joint/joint.wdl @@ -53,6 +53,9 @@ workflow joint { split_joint_small_variant_vcf_indices: { name: "Joint-call small variant VCF indices, split by sample" } + sv_supporting_reads: { + name: "Supporting reads JSON" + } } input { @@ -135,5 +138,6 @@ workflow joint { Array[File] split_joint_structural_variant_vcf_indices = split_sawfish.split_vcf_indices Array[File] split_joint_small_variant_vcfs = split_glnexus.split_vcfs Array[File] split_joint_small_variant_vcf_indices = split_glnexus.split_vcf_indices + File sv_supporting_reads = select_first([sawfish_call.supporting_reads]) } } diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 89f2d24c..189e271d 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -188,7 +188,7 @@ workflow humanwgs_singleton { 'sv_DEL_count': [downstream.stat_sv_DEL_count], 'sv_INS_count': [downstream.stat_sv_INS_count], 'sv_INV_count': [downstream.stat_sv_INV_count], - 'sv_INVBND_count': [downstream.stat_sv_INVBND_count], + 'sv_SWAP_count': [downstream.stat_sv_SWAP_count], 'sv_BND_count': [downstream.stat_sv_BND_count], 'cnv_DUP_count': [upstream.stat_cnv_DUP_count], 'cnv_DEL_count': [upstream.stat_cnv_DEL_count], @@ -261,14 +261,15 @@ workflow humanwgs_singleton { # sv outputs File phased_sv_vcf = downstream.phased_sv_vcf File phased_sv_vcf_index = downstream.phased_sv_vcf_index + File sv_supporting_reads = select_first([upstream.sv_supporting_reads]) # sv stats - String stat_sv_DUP_count = downstream.stat_sv_DUP_count - String stat_sv_DEL_count = downstream.stat_sv_DEL_count - String stat_sv_INS_count = downstream.stat_sv_INS_count - String stat_sv_INV_count = downstream.stat_sv_INV_count - String stat_sv_INVBND_count = downstream.stat_sv_INVBND_count - String stat_sv_BND_count = downstream.stat_sv_BND_count + String stat_sv_DUP_count = downstream.stat_sv_DUP_count + String stat_sv_DEL_count = downstream.stat_sv_DEL_count + String stat_sv_INS_count = downstream.stat_sv_INS_count + String stat_sv_INV_count = downstream.stat_sv_INV_count + String stat_sv_SWAP_count = downstream.stat_sv_SWAP_count + String stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs File phased_small_variant_vcf = downstream.phased_small_variant_vcf diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index b2240134..e2a83609 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -117,14 +117,11 @@ workflow upstream { call Sawfish.sawfish_discover { input: - sex = mosdepth.inferred_sex, aligned_bam = aligned_bam_data, aligned_bam_index = aligned_bam_index, ref_fasta = ref_map["fasta"], # !FileCoercion ref_index = ref_map["fasta_index"], # !FileCoercion out_prefix = "~{sample_id}.~{ref_map['name']}", - expected_male_bed = ref_map["hificnv_expected_bed_male"], # !FileCoercion - expected_female_bed = ref_map["hificnv_expected_bed_female"], # !FileCoercion runtime_attributes = default_runtime_attributes } @@ -199,8 +196,9 @@ workflow upstream { File discover_tar = sawfish_discover.discover_tar # sawfish outputs for single sample - File? sv_vcf = sawfish_call.vcf - File? sv_vcf_index = sawfish_call.vcf_index + File? sv_vcf = sawfish_call.vcf + File? sv_vcf_index = sawfish_call.vcf_index + File? sv_supporting_reads = sawfish_call.supporting_reads # small variant outputs File small_variant_vcf = deepvariant.vcf From 9f742dc019140301925b4c4965f438ae7f06a7eb Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 10 Apr 2025 12:43:50 -0700 Subject: [PATCH 32/61] Update to trgt 2.0.0 (#210) * Update trgt to 2.0.0 --- docs/tools_containers.md | 2 +- wdl-ci.config.json | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 376d5c98..ab7af2de 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -14,7 +14,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | | sawfish |
  • sawfish 0.12.10
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/b20d14fc62026b465970db1bd98d78d516014030/docker/sawfish) | [sawfish@sha256:ba4334987f6ae249b615f2c6788499950a513ac9cbd06156c39086bd3ed015f5](https://quay.io/repository/pacbio/sawfish/manifest/sha256:ba4334987f6ae249b615f2c6788499950a513ac9cbd06156c39086bd3ed015f5) | -| trgt |
  • trgt 1.5.1
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/641ed67d29128381f27daeca9936fbc1e41bf58d/docker/trgt) | [trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b](https://quay.io/repository/pacbio/trgt/manifest/sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b) | +| trgt |
  • trgt 2.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9b84810374b3d3d89f466c2956e6deaaa611b5ee/docker/trgt) | [trgt@sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7](https://quay.io/repository/pacbio/trgt/manifest/sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index e09f004b..9d6b14b5 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1356,7 +1356,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "g6ywayioaobdtzqdajkngnqfqwmxjtyk", + "digest": "spklfnpuz2zbc4j4nucyklfi6evyinwx", "tests": [ { "inputs": { @@ -1445,7 +1445,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "dmgbkalwvudhxs6lr3xkwopq6hkwjttv", + "digest": "yilkwgetphuphxypkjuipkiijphsndux", "tests": [ { "inputs": { @@ -1479,7 +1479,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "oc5xxjmuyfnoobc6zqe5sqtfsfgkgak7", + "digest": "mxmpecq675an3ikpdkgda7pyow6ssrkp", "tests": [ { "inputs": { From f79d7b1ed97360ef5621823ece97ade2321d5b26 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 10 Apr 2025 13:37:25 -0700 Subject: [PATCH 33/61] Add mitorsaw to workflow. (#211) --- docs/family.md | 9 +++++++ docs/singleton.md | 9 +++++++ docs/tools_containers.md | 1 + wdl-ci.config.json | 46 ++++++++++++++++++++++++++++++--- workflows/family.wdl | 6 ++++- workflows/singleton.wdl | 5 ++++ workflows/upstream/upstream.wdl | 16 ++++++++++++ 7 files changed, 88 insertions(+), 4 deletions(-) diff --git a/docs/family.md b/docs/family.md index d28e5b09..3217b542 100644 --- a/docs/family.md +++ b/docs/family.md @@ -34,6 +34,7 @@ flowchart TD samtools_merge --> mosdepth["mosdepth"] samtools_merge --> paraphase["Paraphase"] samtools_merge --> hificnv["HiFiCNV"] + samtools_merge --> mitorsaw["MitorSaw"] samtools_merge --> trgt["TRGT"] samtools_merge --> trgt_dropouts["TR coverage dropouts"] samtools_merge --> deepvariant["DeepVariant"] @@ -190,6 +191,14 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Array\[String\] | stat_cnv_DUP_sum | Sum of DUP bp | (PASS variants) | | Array\[String\] | stat_cnv_DEL_sum | Sum of DEL bp | (PASS variants) | +### Mitochondrial variants and haplotypes + +| Type | Name | Description | Notes | +| ---- | ---- | ----------- | ----- | +| Array\[File\] | mitorsaw_vcf | Mitochondrial variant VCF | | +| Array\[File\] | mitorsaw_vcf_index | Index for mitochondrial variant VCF | | +| Array\[File\] | mitorsaw_hap_stats | Mitochondrial haplotype stats | | + ### Tandem Repeat Genotyping | Type | Name | Description | Notes | diff --git a/docs/singleton.md b/docs/singleton.md index c42642b4..e964f1b8 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -32,6 +32,7 @@ flowchart TD samtools_merge --> mosdepth["mosdepth"] samtools_merge --> paraphase["Paraphase"] samtools_merge --> hificnv["HiFiCNV"] + samtools_merge --> mitorsaw["MitorSaw"] samtools_merge --> trgt["TRGT"] samtools_merge --> trgt_dropouts["TR coverage dropouts"] samtools_merge --> deepvariant["DeepVariant"] @@ -137,6 +138,14 @@ flowchart TD | File | bcftools_roh_out | ROH calling | `bcftools roh` | | File | bcftools_roh_bed | Generated from above, without filtering | | +### Mitochondrial variants and haplotypes + +| Type | Name | Description | Notes | +| ---- | ---- | ----------- | ----- | +| File | mitorsaw_vcf | Mitochondrial variant VCF | | +| File | mitorsaw_vcf_index | Index for mitochondrial variant VCF | | +| File | mitorsaw_hap_stats | Mitochondrial haplotype stats | | + ### Copy Number Variants (≥100 kb) | Type | Name | Description | Notes | diff --git a/docs/tools_containers.md b/docs/tools_containers.md index ab7af2de..f794bb68 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -17,6 +17,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | trgt |
  • trgt 2.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9b84810374b3d3d89f466c2956e6deaaa611b5ee/docker/trgt) | [trgt@sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7](https://quay.io/repository/pacbio/trgt/manifest/sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | +| mitorsaw |
  • mitorsaw 0.1.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/bacbc741dffb19d67f8660fe8ae9c1938c64471a/docker/mitorsaw) | [mitorsaw@sha256:e310f35d84fd81d4018a55c739f846b03374777dd5efcc9882acc5865f198c51](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:e310f35d84fd81d4018a55c739f846b03374777dd5efcc9882acc5865f198c51) | | paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | | pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 9d6b14b5..06ac5389 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1217,6 +1217,46 @@ } } }, + "workflows/wdl-common/wdl/tasks/mitorsaw.wdl": { + "key": "workflows/wdl-common/wdl/tasks/mitorsaw.wdl", + "name": "", + "description": "", + "tasks": { + "mitorsaw": { + "key": "mitorsaw", + "digest": "bfnbvu2pwnp734bxsb633n3phezm3j7a", + "tests": [ + { + "inputs": { + "aligned_bam": "${resources_file_path}/inputs/HG002-sprq.GRCh38.haplotagged.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG002-sprq.GRCh38.haplotagged.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "out_prefix": "HG002.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/mitorsaw/output/HG002.GRCh38.mitorsaw.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "vcftools_validator", + "check_gzip" + ] + }, + "hap_stats": { + "value": "${resources_file_path}/mitorsaw/output/HG002.GRCh38.mitorsaw.json", + "test_tasks": [ + "compare_file_basename", + "check_json" + ] + } + } + } + ] + } + } + }, "workflows/wdl-common/wdl/tasks/paraphase.wdl": { "key": "workflows/wdl-common/wdl/tasks/paraphase.wdl", "name": "", @@ -1356,7 +1396,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "spklfnpuz2zbc4j4nucyklfi6evyinwx", + "digest": "g6ywayioaobdtzqdajkngnqfqwmxjtyk", "tests": [ { "inputs": { @@ -1445,7 +1485,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "yilkwgetphuphxypkjuipkiijphsndux", + "digest": "dmgbkalwvudhxs6lr3xkwopq6hkwjttv", "tests": [ { "inputs": { @@ -1479,7 +1519,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "mxmpecq675an3ikpdkgda7pyow6ssrkp", + "digest": "oc5xxjmuyfnoobc6zqe5sqtfsfgkgak7", "tests": [ { "inputs": { diff --git a/workflows/family.wdl b/workflows/family.wdl index d0f79a4e..950ed8ab 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -10,7 +10,6 @@ import "wdl-common/wdl/tasks/trgt.wdl" as Trgt import "tertiary/tertiary.wdl" as TertiaryAnalysis import "wdl-common/wdl/tasks/utilities.wdl" as Utilities - workflow humanwgs_family { meta { description: "PacBio HiFi human whole genome sequencing pipeline, with joint calling for related samples." @@ -371,6 +370,11 @@ workflow humanwgs_family { Array[String] stat_cnv_DUP_sum = upstream.stat_cnv_DUP_sum Array[String] stat_cnv_DEL_sum = upstream.stat_cnv_DEL_sum + # per sample mitorsaw outputs + Array[File] mitorsaw_vcf = upstream.mitorsaw_vcf + Array[File] mitorsaw_vcf_index = upstream.mitorsaw_vcf_index + Array[File] mitorsaw_hap_stats = upstream.mitorsaw_hap_stats + # PGx outputs Array[File] pbstarphase_json = downstream.pbstarphase_json Array[File?] pharmcat_match_json = downstream.pharmcat_match_json diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 189e271d..0424c9c9 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -314,6 +314,11 @@ workflow humanwgs_singleton { String stat_cnv_DUP_sum = upstream.stat_cnv_DUP_sum String stat_cnv_DEL_sum = upstream.stat_cnv_DEL_sum + # per sample mitorsaw outputs + File mitorsaw_vcf = upstream.mitorsaw_vcf + File mitorsaw_vcf_index = upstream.mitorsaw_vcf_index + File mitorsaw_hap_stats = upstream.mitorsaw_hap_stats + # PGx outputs File pbstarphase_json = downstream.pbstarphase_json File? pharmcat_match_json = downstream.pharmcat_match_json diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index e2a83609..d7590bf7 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -9,6 +9,7 @@ import "../wdl-common/wdl/tasks/mosdepth.wdl" as Mosdepth import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/paraphase.wdl" as Paraphase import "../wdl-common/wdl/tasks/hificnv.wdl" as Hificnv +import "../wdl-common/wdl/tasks/mitorsaw.wdl" as Mitorsaw workflow upstream { meta { @@ -166,6 +167,16 @@ workflow upstream { runtime_attributes = default_runtime_attributes } + call Mitorsaw.mitorsaw { + input: + aligned_bam = aligned_bam_data, + aligned_bam_index = aligned_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}", + runtime_attributes = default_runtime_attributes + } + if (single_sample) { call Sawfish.sawfish_call { input: @@ -231,6 +242,11 @@ workflow upstream { String stat_cnv_DUP_sum = hificnv.stat_DUP_sum String stat_cnv_DEL_sum = hificnv.stat_DEL_sum + # per sample mitorsaw outputs + File mitorsaw_vcf = mitorsaw.vcf + File mitorsaw_vcf_index = mitorsaw.vcf_index + File mitorsaw_hap_stats = mitorsaw.hap_stats + # qc messages Array[String] msg = flatten( [ From db74a0f3c535edf1af11fbe358c9668f79d0185f Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 8 May 2025 11:36:41 -0700 Subject: [PATCH 34/61] bump: Update sawfish to v1.0 (#215) * Update sawfish to v1.0 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index f794bb68..0f41d9b4 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -13,7 +13,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pb_wdl_base |
  • htslib 1.20
  • bcftools 1.20
  • samtools 1.20
  • bedtools 2.31.0
  • python3.9
  • numpy 1.24.24
  • pandas 2.0.3
  • matplotlib 3.7.5
  • seaborn 0.13.2
  • pysam 0.22.1
  • vcfpy 0.13.8
  • biopython 1.83
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/pb_wdl_base) | [sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87](https://quay.io/repository/pacbio/pb_wdl_base/manifest/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87) | | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | -| sawfish |
  • sawfish 0.12.10
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/b20d14fc62026b465970db1bd98d78d516014030/docker/sawfish) | [sawfish@sha256:ba4334987f6ae249b615f2c6788499950a513ac9cbd06156c39086bd3ed015f5](https://quay.io/repository/pacbio/sawfish/manifest/sha256:ba4334987f6ae249b615f2c6788499950a513ac9cbd06156c39086bd3ed015f5) | +| sawfish |
  • sawfish 1.0.2
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d3d64bc7dd62a74369f4b7e9b2416b991691eedb/docker/sawfish) | [sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e](https://quay.io/repository/pacbio/sawfish/manifest/sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e) | | trgt |
  • trgt 2.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9b84810374b3d3d89f466c2956e6deaaa611b5ee/docker/trgt) | [trgt@sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7](https://quay.io/repository/pacbio/trgt/manifest/sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | diff --git a/image_manifest.txt b/image_manifest.txt index ffb1d46f..ea8d7211 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -7,7 +7,7 @@ quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f7239587 quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 -quay.io/pacbio/sawfish@sha256:fcd5d091908322ddeb2c86b7217b7cfdef9a103944adb3e87c76d495eb3fea5b +quay.io/pacbio/sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba quay.io/pacbio/trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 06ac5389..4c559a6f 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -2072,7 +2072,7 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "fhepxt5mh25uz4eg267h6al6nclhl7vl", + "digest": "bpppd3zrlqu4tpyls4fkrzq545ghyskz", "tests": [ { "inputs": { @@ -2140,7 +2140,7 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "kjha4llh7xjs55xbkowbvfikvfnwtct4", + "digest": "jic2pwxzicg2ci3jz3agp43vep6mhrh6", "tests": [ { "inputs": { From 67d88c2b10f706f51d7854e942799754dd94df37 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 11 Apr 2025 19:16:56 -0700 Subject: [PATCH 35/61] Update documentation to show chunking of alignments. --- docs/family.md | 2 +- docs/pbmm2.md | 17 +++++++++++++++++ docs/singleton.md | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 docs/pbmm2.md diff --git a/docs/family.md b/docs/family.md index 3217b542..bea3091a 100644 --- a/docs/family.md +++ b/docs/family.md @@ -29,7 +29,6 @@ flowchart TD ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] pbmm2_align --> sawfish_discover["Sawfish discover"] end - pbmm2_align --> merge_read_stats["merge read statistics"] pbmm2_align --> samtools_merge["samtools merge"] samtools_merge --> mosdepth["mosdepth"] samtools_merge --> paraphase["Paraphase"] @@ -50,6 +49,7 @@ flowchart TD split_glnexus --> hiphase trgt --> hiphase split_sawfish --> hiphase + hiphase --> bam_stats["BAM stats"] hiphase --> bcftools_roh["bcftools roh"] hiphase --> bcftools_stats["bcftools stats\n(small variants)"] hiphase --> sv_stats["SV stats"] diff --git a/docs/pbmm2.md b/docs/pbmm2.md new file mode 100644 index 00000000..565c1558 --- /dev/null +++ b/docs/pbmm2.md @@ -0,0 +1,17 @@ +# pbmm2 alignment Subworkflow + +```mermaid +flowchart TD + hifi_reads[/"HiFi reads BAm"/] --> is_aligned{"is aligned?"} + is_aligned -- yes --> samtools_reset["samtools reset"] + is_aligned -- no --> has_kinetics{"kinetics?"} + has_kinetics -- yes --> samtools_reset + has_kinetics -- no --> count_records["count records"] + samtools_reset --> count_records + count_records --> compare_counts{"compare counts?"} + compare_counts -- yes --> chunk_bam["chunk BAM"] + compare_counts -- no --> pbmm2_align["pbmm2 align"] + chunk_bam --> pbmm2_align +``` + +This subworkflow checks an input BAM for evidence of alignment or kinetics. If it finds either of these, it strips alignment and kinetics information. Next, it counts the number of records in the BAM, and if chunking is enabled and the number of records is greater than `max_reads_per_chunk`, the BAM is split into chunks of no larger than `max_reads_per_chunk`. Finally, chunks are aligned to the reference with pbmm2. diff --git a/docs/singleton.md b/docs/singleton.md index e964f1b8..511174bf 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -28,7 +28,6 @@ flowchart TD pbmm2_align --> sawfish_discover["Sawfish discover"] end pbmm2_align --> merge_read_stats["merge read statistics"] - pbmm2_align --> samtools_merge["samtools merge"] samtools_merge --> mosdepth["mosdepth"] samtools_merge --> paraphase["Paraphase"] samtools_merge --> hificnv["HiFiCNV"] @@ -43,6 +42,7 @@ flowchart TD deepvariant --> hiphase trgt --> hiphase pbsv_call --> hiphase + hiphase --> bam_stats["BAM stats"] hiphase --> bcftools_roh["bcftools roh"] hiphase --> bcftools_stats["bcftools stats\n(small variants)"] hiphase --> sv_stats["SV stats"] From b5eba45e7699cfe15fb9935ac289607ed3910c6d Mon Sep 17 00:00:00 2001 From: Juniper Lake Date: Wed, 14 May 2025 15:28:07 -0700 Subject: [PATCH 36/61] bump: update trgt to v3.0.0 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- workflows/wdl-common | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 0f41d9b4..0fac2e08 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -14,7 +14,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | | sawfish |
  • sawfish 1.0.2
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d3d64bc7dd62a74369f4b7e9b2416b991691eedb/docker/sawfish) | [sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e](https://quay.io/repository/pacbio/sawfish/manifest/sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e) | -| trgt |
  • trgt 2.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9b84810374b3d3d89f466c2956e6deaaa611b5ee/docker/trgt) | [trgt@sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7](https://quay.io/repository/pacbio/trgt/manifest/sha256:ff77823854be8ed63955fe361b53a3b8028d8e1a5e30cbd6f31ce07be62b2ed7) | +| trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | mitorsaw |
  • mitorsaw 0.1.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/bacbc741dffb19d67f8660fe8ae9c1938c64471a/docker/mitorsaw) | [mitorsaw@sha256:e310f35d84fd81d4018a55c739f846b03374777dd5efcc9882acc5865f198c51](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:e310f35d84fd81d4018a55c739f846b03374777dd5efcc9882acc5865f198c51) | diff --git a/image_manifest.txt b/image_manifest.txt index ea8d7211..b2b48653 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -10,7 +10,7 @@ quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de7 quay.io/pacbio/sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba -quay.io/pacbio/trgt@sha256:be7e6ef589a31f4de5d2ed4725dfb34b4b23cb9a440577b606e8f7bfee06526b +quay.io/pacbio/trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284 quay.io/pacbio/wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136 google/deepvariant:1.8.0 google/deepvariant:1.8.0-gpu diff --git a/workflows/wdl-common b/workflows/wdl-common index e503a1ca..9d42a51c 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit e503a1ca877f6ab6a72bbf4a2d48dfedf7c9fdec +Subproject commit 9d42a51c105e7b676c94b01b0bf59fa6c33894c7 From 0f666e5ec066a7d6ba9511fea5ee1525f62c5fae Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 22:49:12 +0000 Subject: [PATCH 37/61] update wdl-ci config file after successful tests --- wdl-ci.config.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 4c559a6f..2022ba1d 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1396,7 +1396,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "g6ywayioaobdtzqdajkngnqfqwmxjtyk", + "digest": "knw5lxj7lvuspa7bfxum7zm3n5a5hjgo", "tests": [ { "inputs": { @@ -1485,7 +1485,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "dmgbkalwvudhxs6lr3xkwopq6hkwjttv", + "digest": "jywsabnjsznsx6g6rxf7w5zmwee4fgkn", "tests": [ { "inputs": { @@ -1519,7 +1519,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "oc5xxjmuyfnoobc6zqe5sqtfsfgkgak7", + "digest": "oecyhm3k4zvuuwkcd5x3jll37pwaewwk", "tests": [ { "inputs": { From 0dbc1f7f98d4307c465aafc096a07f82332bc87f Mon Sep 17 00:00:00 2001 From: "Juniper A. Lake" Date: Tue, 20 May 2025 10:37:12 -0700 Subject: [PATCH 38/61] bump: update mitorsaw to v0.2.0 (#217) * bump: update mitorsaw to v0.2.0 --- docs/tools_containers.md | 2 +- image_manifest.txt | 1 + wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 0fac2e08..60cdc2e7 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -17,7 +17,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | -| mitorsaw |
  • mitorsaw 0.1.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/bacbc741dffb19d67f8660fe8ae9c1938c64471a/docker/mitorsaw) | [mitorsaw@sha256:e310f35d84fd81d4018a55c739f846b03374777dd5efcc9882acc5865f198c51](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:e310f35d84fd81d4018a55c739f846b03374777dd5efcc9882acc5865f198c51) | +| mitorsaw |
  • mitorsaw 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/97c698b078b413a5718bf2721e4e10daf5ae4d68/docker/mitorsaw) | [mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0) | | paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | | pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | diff --git a/image_manifest.txt b/image_manifest.txt index b2b48653..30d5e939 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,6 +1,7 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e quay.io/pacbio/hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad +quay.io/pacbio/mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 2022ba1d..289325c4 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1224,7 +1224,7 @@ "tasks": { "mitorsaw": { "key": "mitorsaw", - "digest": "bfnbvu2pwnp734bxsb633n3phezm3j7a", + "digest": "jxaaawjrrt6zwbrvhkn732nzejadpjja", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 9d42a51c..11ef6315 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 9d42a51c105e7b676c94b01b0bf59fa6c33894c7 +Subproject commit 11ef63150f396a35c3db9392e2ad88f4c1d57db5 From 88c0f7dc7af7b33f93971bf3ed13c7bf54719f01 Mon Sep 17 00:00:00 2001 From: "Juniper A. Lake" Date: Tue, 20 May 2025 14:51:05 -0700 Subject: [PATCH 39/61] bump: update paraphase to v3.3.1 (#219) * bump: update paraphase to v3.3.1 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 60cdc2e7..3eddce76 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -18,7 +18,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | mitorsaw |
  • mitorsaw 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/97c698b078b413a5718bf2721e4e10daf5ae4d68/docker/mitorsaw) | [mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0) | -| paraphase |
  • paraphase 3.2.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/0c8cf2ab0732fd610c9b91a4423a22731314f3f7/docker/paraphase) | [paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e](https://quay.io/repository/pacbio/paraphase/manifest/sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e) | +| paraphase |
  • paraphase 3.3.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2be56081bcf8c8186590189c939bc80cf83ab884/docker/paraphase) | [paraphase@sha256:bf15a5f977fa6ee34f335e5a695d5f9c73fb7b7092703fbf3c94594949ea50d7](https://quay.io/repository/pacbio/paraphase/manifest/sha256:bf15a5f977fa6ee34f335e5a695d5f9c73fb7b7092703fbf3c94594949ea50d7) | | pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | diff --git a/image_manifest.txt b/image_manifest.txt index 30d5e939..c64b897b 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -3,7 +3,7 @@ quay.io/pacbio/hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad quay.io/pacbio/mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 -quay.io/pacbio/paraphase@sha256:2823f94682498704bd63fc95314095917fc1cb31a62a674e9d951cec469d2f3e +quay.io/pacbio/paraphase@sha256:bf15a5f977fa6ee34f335e5a695d5f9c73fb7b7092703fbf3c94594949ea50d7 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 289325c4..f4b405a1 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1264,7 +1264,7 @@ "tasks": { "paraphase": { "key": "paraphase", - "digest": "fpnnrdrj72irojbdzcrrf6ylan5ztrj3", + "digest": "ylpisqivvnxce7owitvjw64u44vz4uxl", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 11ef6315..a8d7903d 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 11ef63150f396a35c3db9392e2ad88f4c1d57db5 +Subproject commit a8d7903d27d6368ea9c298fee96879cd6b457861 From 4c9154508570344258323d2b3f3296bd48676efa Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 13 Jun 2025 15:40:03 -0400 Subject: [PATCH 40/61] bump: Upgrade DeepVariant to v1.9.0 (#221) * bump: Upgrade DeepVariant to v1.9.0 --- docs/tools_containers.md | 4 +- image_manifest.txt | 4 +- wdl-ci.config.json | 92 ++++++++++++++++++++-------------------- workflows/wdl-common | 2 +- 4 files changed, 51 insertions(+), 51 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 3eddce76..ee9c681c 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -24,7 +24,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | | slivar |
  • slivar 0.3.1
  • `/opt/scripts/add_comphet_phase.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/5e1094fd6755203b4971fdac6dcb951bbc098bed/docker/slivar) | [slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa](https://quay.io/repository/pacbio/slivar/manifest/sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa) | | svpack |
  • svpack 54b54db
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6fc750b0c65b4a5c1eb65791eab9eed89864d858/docker/svpack) | [svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba](https://quay.io/repository/pacbio/svpack/manifest/sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba) | -| deepvariant |
  • DeepVariant 1.8.0
| | [deepvariant:1.8.0](https://hub.docker.com/layers/google/deepvariant/1.8.0/images/sha256-eb223b3c487be43d34cc7b08c906b9c558d195716e10672db1bd6e910dc3a00a) | -| deepvariant-gpu |
  • DeepVariant 1.8.0
| | [deepvariant:1.8.0-gpu](https://hub.docker.com/layers/google/deepvariant/1.8.0-gpu/images/sha256-892a6a8a92865f40229ee7fcaeea48f61673d8f8273a643b1f55c4fe0543d3c7) | +| deepvariant |
  • DeepVariant 1.9.0
| | [deepvariant:1.9.0](https://hub.docker.com/layers/google/deepvariant/1.9.0/images/sha256-07e95b34e40cc50074d23273d479934a27e80919ac75bd97bf39a731e3c2d6ad) | +| deepvariant-gpu |
  • DeepVariant 1.9.0
| | [deepvariant:1.9.0-gpu](https://hub.docker.com/layers/google/deepvariant/1.9.0-gpu/images/sha256-e0c8734b8700d945e3ee78d609acb90548f829c874596ffca436af8cf379f87a) | | pharmcat |
  • PharmCat 2.15.4
| | [pharmcat:2.15.4](https://hub.docker.com/layers/pgkb/pharmcat/2.15.4/images/sha256-5b58ae959b4cd85986546c2d67e3596f33097dedc40dfe57dd845b6e78781eb6) | | glnexus |
  • GLnexus 1.4.3
| | [glnexus:1.4.3](https://quay.io/repository/pacbio/glnexus/manifest/sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e) | diff --git a/image_manifest.txt b/image_manifest.txt index c64b897b..6e76dfc8 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -13,6 +13,6 @@ quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef96 quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba quay.io/pacbio/trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284 quay.io/pacbio/wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136 -google/deepvariant:1.8.0 -google/deepvariant:1.8.0-gpu +google/deepvariant:1.9.0 +google/deepvariant:1.9.0-gpu pgkb/pharmcat:2.15.4 diff --git a/wdl-ci.config.json b/wdl-ci.config.json index f4b405a1..4f79b938 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1680,19 +1680,19 @@ "task_start_index": 0, "tasks_per_shard": 8, "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.8.0", + "docker_image": "google/deepvariant:1.9.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "example_tfrecord_tar": { - "value": "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", + "value": "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" ] }, "nonvariant_site_tfrecord_tar": { - "value": "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.nonvariant_site_tfrecords.tar.gz", + "value": "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.nonvariant_site_tfrecords.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -1711,22 +1711,22 @@ "sample_id": "HG002", "ref_name": "${ref_name}", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.example_tfrecords.tar.gz" ], "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.8.0", + "docker_image": "google/deepvariant:1.9.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "tfrecords_tar": { - "value": "${resources_file_path}/deepvariant_call_variants_cpu/output/v1p8p0/HG002.GRCh38.call_variants_output.tar.gz", + "value": "${resources_file_path}/deepvariant_call_variants_cpu/output/v1p9p0/HG002.GRCh38.call_variants_output.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -1738,29 +1738,29 @@ }, "deepvariant_call_variants_gpu": { "key": "deepvariant_call_variants_gpu", - "digest": "567aoezgiryo64cbsy2c2plt3vblrbpy", + "digest": "oruoeho4i2o5kuzfey4f6z5or22mvrhl", "tests": [ { "inputs": { "sample_id": "HG002", "ref_name": "${ref_name}", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.example_tfrecords.tar.gz" ], "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.8.0-gpu", + "docker_image": "google/deepvariant:1.9.0-gpu", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "tfrecords_tar": { - "value": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p8p0/HG002.GRCh38.call_variants_output.tar.gz", + "value": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p9p0/HG002.GRCh38.call_variants_output.tar.gz", "test_tasks": [ "compare_file_basename", "check_gzip" @@ -1777,37 +1777,37 @@ { "inputs": { "sample_id": "HG002", - "tfrecords_tar": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p8p0/HG002.GRCh38.call_variants_output.tar.gz", + "tfrecords_tar": "${resources_file_path}/deepvariant_call_variants_gpu/output/v1p9p0/HG002.GRCh38.call_variants_output.tar.gz", "example_tfrecord_tars": [ - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.example_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.example_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.example_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.example_tfrecords.tar.gz" ], "nonvariant_site_tfrecord_tars": [ - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.0.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.8.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.16.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.24.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.32.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.40.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.48.nonvariant_site_tfrecords.tar.gz", - "${resources_file_path}/deepvariant_make_examples/output/v1p8p0/HG002.56.nonvariant_site_tfrecords.tar.gz" + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.0.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.8.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.16.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.24.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.32.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.40.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.48.nonvariant_site_tfrecords.tar.gz", + "${resources_file_path}/deepvariant_make_examples/output/v1p9p0/HG002.56.nonvariant_site_tfrecords.tar.gz" ], "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", "ref_name": "${ref_name}", "total_deepvariant_tasks": 64, - "docker_image": "google/deepvariant:1.8.0", + "docker_image": "google/deepvariant:1.9.0", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "vcf": { - "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p8p0/HG002.GRCh38.small_variants.vcf.gz", + "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p9p0/HG002.GRCh38.small_variants.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -1815,7 +1815,7 @@ ] }, "gvcf": { - "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p8p0/HG002.GRCh38.small_variants.g.vcf.gz", + "value": "${resources_file_path}/deepvariant_postprocess_variants/output/v1p9p0/HG002.GRCh38.small_variants.g.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -2246,7 +2246,7 @@ "max_retries": 0, "zones": "", "cpuPlatform": "", - "gpuType": "ampere", + "gpuType": "tesla", "container_registry": "quay.io/pacbio" }, "on_demand_runtime_attributes": { @@ -2255,7 +2255,7 @@ "max_retries": 0, "zones": "", "cpuPlatform": "", - "gpuType": "ampere", + "gpuType": "tesla", "container_registry": "quay.io/pacbio" } } diff --git a/workflows/wdl-common b/workflows/wdl-common index a8d7903d..6e04f5dd 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit a8d7903d27d6368ea9c298fee96879cd6b457861 +Subproject commit 6e04f5dd00e4bfa9c90ca83bc5c4f866843cf81e From d0762d3d9cd3ba3db77c19ba7ecef5b6438802b6 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 13 Jun 2025 17:13:56 -0400 Subject: [PATCH 41/61] bump: Update Paraphase to v3.3.2 (#223) - also make Paraphase outputs optional to allow it to fail without ending workflow in cases where the coverage is very low - updated docs - updated image manifest - add msg output to Paraphase to capture errors. --- docs/family.md | 6 +++--- docs/singleton.md | 6 +++--- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 2 +- workflows/family.wdl | 6 +++--- workflows/singleton.wdl | 6 +++--- workflows/upstream/upstream.wdl | 7 ++++--- workflows/wdl-common | 2 +- 9 files changed, 20 insertions(+), 19 deletions(-) diff --git a/docs/family.md b/docs/family.md index bea3091a..7310fa26 100644 --- a/docs/family.md +++ b/docs/family.md @@ -227,9 +227,9 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Type | Name | Description | Notes | | ---- | ---- | ----------- | ----- | -| Array\[File\] | paraphase_output_json | Paraphase output JSON | | -| Array\[File\] | paraphase_realigned_bam | Paraphase realigned BAM | | -| Array\[File\] | paraphase_realigned_bam_index | | | +| Array\[File?\] | paraphase_output_json | Paraphase output JSON | | +| Array\[File?\] | paraphase_realigned_bam | Paraphase realigned BAM | | +| Array\[File?\] | paraphase_realigned_bam_index | | | | Array\[File?\] | paraphase_vcfs | Paraphase VCFs | Compressed as `.tar.gz` | ### 5mCpG Methylation Calling diff --git a/docs/singleton.md b/docs/singleton.md index 511174bf..6e1c6fc8 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -186,9 +186,9 @@ flowchart TD | Type | Name | Description | Notes | | ---- | ---- | ----------- | ----- | -| File | paraphase_output_json | Paraphase output JSON | | -| File | paraphase_realigned_bam | Paraphase realigned BAM | | -| File | paraphase_realigned_bam_index | | | +| File? | paraphase_output_json | Paraphase output JSON | | +| File? | paraphase_realigned_bam | Paraphase realigned BAM | | +| File? | paraphase_realigned_bam_index | | | | File? | paraphase_vcfs | Paraphase VCFs | Compressed as `.tar.gz` | ### 5mCpG Methylation Calling diff --git a/docs/tools_containers.md b/docs/tools_containers.md index ee9c681c..218d23bb 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -18,7 +18,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | mitorsaw |
  • mitorsaw 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/97c698b078b413a5718bf2721e4e10daf5ae4d68/docker/mitorsaw) | [mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0) | -| paraphase |
  • paraphase 3.3.1
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2be56081bcf8c8186590189c939bc80cf83ab884/docker/paraphase) | [paraphase@sha256:bf15a5f977fa6ee34f335e5a695d5f9c73fb7b7092703fbf3c94594949ea50d7](https://quay.io/repository/pacbio/paraphase/manifest/sha256:bf15a5f977fa6ee34f335e5a695d5f9c73fb7b7092703fbf3c94594949ea50d7) | +| paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | | pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | diff --git a/image_manifest.txt b/image_manifest.txt index 6e76dfc8..c3b07962 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -3,7 +3,7 @@ quay.io/pacbio/hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad quay.io/pacbio/mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 -quay.io/pacbio/paraphase@sha256:bf15a5f977fa6ee34f335e5a695d5f9c73fb7b7092703fbf3c94594949ea50d7 +quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 4f79b938..c1420ef0 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1264,7 +1264,7 @@ "tasks": { "paraphase": { "key": "paraphase", - "digest": "ylpisqivvnxce7owitvjw64u44vz4uxl", + "digest": "jzs4k5qgxgpjwv54mpw7vlf6nnlu2ki2", "tests": [ { "inputs": { diff --git a/workflows/family.wdl b/workflows/family.wdl index 950ed8ab..d37ed80d 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -354,9 +354,9 @@ workflow humanwgs_family { Array[String] stat_trgt_uncalled_count = upstream.stat_trgt_uncalled_count # paraphase outputs - Array[File] paraphase_output_json = upstream.paraphase_output_json - Array[File] paraphase_realigned_bam = upstream.paraphase_realigned_bam - Array[File] paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index + Array[File?] paraphase_output_json = upstream.paraphase_output_json + Array[File?] paraphase_realigned_bam = upstream.paraphase_realigned_bam + Array[File?] paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index Array[File?] paraphase_vcfs = upstream.paraphase_vcfs # per sample cnv outputs diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 0424c9c9..40247e6a 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -298,9 +298,9 @@ workflow humanwgs_singleton { String stat_trgt_uncalled_count = upstream.stat_trgt_uncalled_count # paraphase outputs - File paraphase_output_json = upstream.paraphase_output_json - File paraphase_realigned_bam = upstream.paraphase_realigned_bam - File paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index + File? paraphase_output_json = upstream.paraphase_output_json + File? paraphase_realigned_bam = upstream.paraphase_realigned_bam + File? paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index File? paraphase_vcfs = upstream.paraphase_vcfs # per sample cnv outputs diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index d7590bf7..3f055e50 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -226,9 +226,9 @@ workflow upstream { String stat_trgt_uncalled_count = trgt.stat_uncalled_count # paraphase outputs - File paraphase_output_json = paraphase.out_json - File paraphase_realigned_bam = paraphase.bam - File paraphase_realigned_bam_index = paraphase.bam_index + File? paraphase_output_json = paraphase.out_json + File? paraphase_realigned_bam = paraphase.bam + File? paraphase_realigned_bam_index = paraphase.bam_index File? paraphase_vcfs = paraphase.vcfs_tar # per sample hificnv outputs @@ -253,6 +253,7 @@ workflow upstream { flatten(pbmm2.msg), [qc_sex], trgt.msg, + paraphase.msg, hificnv.msg ] ) diff --git a/workflows/wdl-common b/workflows/wdl-common index 6e04f5dd..62943917 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 6e04f5dd00e4bfa9c90ca83bc5c4f866843cf81e +Subproject commit 629439179a9a03750230932623c6e071c38d50e2 From fc599fd4044792152888c6095f94e7e74cee93b0 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Wed, 18 Jun 2025 15:42:32 -0400 Subject: [PATCH 42/61] bump: starphase-1.4.1 (#224) Update to StarPhase 1.4.1 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 218d23bb..fc411616 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -19,7 +19,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | mitorsaw |
  • mitorsaw 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/97c698b078b413a5718bf2721e4e10daf5ae4d68/docker/mitorsaw) | [mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0) | | paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | -| pbstarphase |
  • pbstarphase 1.3.1
  • Database 20250224
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/2750a36b40d319a52c550c2fabbd50060587a1a1/docker/pbstarphase) | [pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b) | +| pbstarphase |
  • pbstarphase 1.4.1
  • Database 20250515
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5166b28e43f36a381450ba479e2e34a841bb922/docker/pbstarphase) | [pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | | wgs_tertiary |
  • `/opt/scripts/calculate_phrank.py` 2.0.0
  • `/opt/scripts/json2ped.py` 0.5.0
Last built 2021-09-17:
  • ensembl -> HGNC
  • ensembl -> HPO
  • HGNC -> inheritance
  • HPO DAG
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fd70e2872bd3c6bb705faff5bc68374116d7d62f/docker/wgs_tertiary) | [wgs_tertiary@sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136](https://quay.io/repository/pacbio/wgs_tertiary/manifest/sha256:410597030e0c85cf16eb27a877d260e7e2824747f5e8b05566a1aaa729d71136) | | slivar |
  • slivar 0.3.1
  • `/opt/scripts/add_comphet_phase.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/5e1094fd6755203b4971fdac6dcb951bbc098bed/docker/slivar) | [slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa](https://quay.io/repository/pacbio/slivar/manifest/sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa) | diff --git a/image_manifest.txt b/image_manifest.txt index c3b07962..83d49a59 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -6,7 +6,7 @@ quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 -quay.io/pacbio/pbstarphase@sha256:f7bbbe3814ef318a5ee89dca7263d1afda00da501642604c193629303a2ada3b +quay.io/pacbio/pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4 quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 quay.io/pacbio/sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa diff --git a/wdl-ci.config.json b/wdl-ci.config.json index c1420ef0..9b9faf54 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1310,7 +1310,7 @@ "tasks": { "pbstarphase_diplotype": { "key": "pbstarphase_diplotype", - "digest": "6opgzazl7wm42vg4ftqbmvqlq3b4xsxl", + "digest": "qlxl34shcpfmx4dluqdogguhh2mcekda", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 62943917..4a98066c 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 629439179a9a03750230932623c6e071c38d50e2 +Subproject commit 4a98066c5bd8e2ec06b0db3ea4ff58a803688326 From 4ec1f3ad8b6b75a6ecbf9eb4a1ad45fcaa81b90b Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 26 Jun 2025 18:10:52 -0400 Subject: [PATCH 43/61] feat: Update to sawfish v2.0.0, which replaces hificnv functionality (#222) * feat: Update to sawfish v2.0.1, replace hificnv functionality - annotate ALU/L1/SVA annotations with sawshark - add gc_bias_corrected_depth.bw output --- GRCh38.ref_map.v2p0p0.template.tsv | 12 - GRCh38.ref_map.v3p0p0.template.tsv | 12 + GRCh38.tertiary_map.v2p0p0.template.tsv | 12 - GRCh38.tertiary_map.v3p0p0.template.tsv | 12 + .../GRCh38.ref_map.v2p0p0.aws.tsv | 12 - .../GRCh38.ref_map.v3p0p0.aws.tsv | 12 + .../GRCh38.tertiary_map.v2p0p0.aws.tsv | 12 - .../GRCh38.tertiary_map.v3p0p0.aws.tsv | 12 + .../azure/GRCh38.ref_map.v2p0p0.azure.tsv | 12 - .../azure/GRCh38.ref_map.v3p0p0.azure.tsv | 12 + ...v => GRCh38.tertiary_map.v3p0p0.azure.tsv} | 14 +- backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv | 12 - backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv | 12 + .../gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv | 12 - .../gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv | 12 + backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv | 12 - backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv | 12 + .../hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv | 12 - .../hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv | 12 + docs/family.md | 119 +++++--- docs/ref_map.md | 8 +- docs/singleton.md | 94 +++--- docs/tools_containers.md | 3 +- image_manifest.txt | 3 +- wdl-ci.config.json | 284 ++++++++---------- workflows/family.wdl | 25 +- workflows/joint/joint.wdl | 19 +- workflows/singleton.wdl | 25 +- workflows/upstream/upstream.wdl | 87 +++--- workflows/wdl-common | 2 +- 30 files changed, 438 insertions(+), 461 deletions(-) delete mode 100644 GRCh38.ref_map.v2p0p0.template.tsv create mode 100644 GRCh38.ref_map.v3p0p0.template.tsv delete mode 100644 GRCh38.tertiary_map.v2p0p0.template.tsv create mode 100644 GRCh38.tertiary_map.v3p0p0.template.tsv delete mode 100644 backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv create mode 100644 backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv delete mode 100644 backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv create mode 100644 backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv delete mode 100644 backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv create mode 100644 backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv rename backends/azure/{GRCh38.tertiary_map.v2p0p0.azure.tsv => GRCh38.tertiary_map.v3p0p0.azure.tsv} (51%) delete mode 100644 backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv create mode 100644 backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv delete mode 100644 backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv create mode 100644 backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv delete mode 100644 backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv create mode 100644 backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv delete mode 100644 backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv create mode 100644 backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv diff --git a/GRCh38.ref_map.v2p0p0.template.tsv b/GRCh38.ref_map.v2p0p0.template.tsv deleted file mode 100644 index 3a095aaf..00000000 --- a/GRCh38.ref_map.v2p0p0.template.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/GRCh38.ref_map.v3p0p0.template.tsv b/GRCh38.ref_map.v3p0p0.template.tsv new file mode 100644 index 00000000..2d031dea --- /dev/null +++ b/GRCh38.ref_map.v3p0p0.template.tsv @@ -0,0 +1,12 @@ +name GRCh38 +fasta /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +pbsv_splits /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json +pbsv_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed +trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +sawfish_exclude_bed /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/GRCh38.tertiary_map.v2p0p0.template.tsv b/GRCh38.tertiary_map.v2p0p0.template.tsv deleted file mode 100644 index af86967d..00000000 --- a/GRCh38.tertiary_map.v2p0p0.template.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js /hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff /hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi \ No newline at end of file diff --git a/GRCh38.tertiary_map.v3p0p0.template.tsv b/GRCh38.tertiary_map.v3p0p0.template.tsv new file mode 100644 index 00000000..203a04bf --- /dev/null +++ b/GRCh38.tertiary_map.v3p0p0.template.tsv @@ -0,0 +1,12 @@ +slivar_js /hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff /hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt +clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +slivar_gnotate_files /hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi \ No newline at end of file diff --git a/backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv deleted file mode 100644 index d3254267..00000000 --- a/backends/aws-healthomics/GRCh38.ref_map.v2p0p0.aws.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed s3:///hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed s3:///hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female s3:///hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf s3:///hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index s3:///hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv new file mode 100644 index 00000000..e90e7331 --- /dev/null +++ b/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv @@ -0,0 +1,12 @@ +name GRCh38 +fasta s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +pbsv_splits s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json +pbsv_tandem_repeat_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed +trgt_tandem_repeat_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +sawfish_exclude_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf s3:///hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv deleted file mode 100644 index a8770690..00000000 --- a/backends/aws-healthomics/GRCh38.tertiary_map.v2p0p0.aws.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js s3:///hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff s3:///hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup s3:///hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup s3:///hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,s3:///hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv new file mode 100644 index 00000000..5f5af304 --- /dev/null +++ b/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv @@ -0,0 +1,12 @@ +slivar_js s3:///hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff s3:///hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt +clinvar_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +slivar_gnotate_files s3:///hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,s3:///hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv b/backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv deleted file mode 100644 index 0e10c7d8..00000000 --- a/backends/azure/GRCh38.ref_map.v2p0p0.azure.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv b/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv new file mode 100644 index 00000000..19183351 --- /dev/null +++ b/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv @@ -0,0 +1,12 @@ +name GRCh38 +fasta https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +pbsv_splits https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json +pbsv_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed +trgt_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +sawfish_exclude_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv b/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv similarity index 51% rename from backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv rename to backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv index 3743aaed..e4fe31bb 100644 --- a/backends/azure/GRCh38.tertiary_map.v2p0p0.azure.tsv +++ b/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv @@ -1,12 +1,12 @@ -slivar_js /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_js /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt +clinvar_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +slivar_gnotate_files /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/datasetpbrarediseases/dataset/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi +svpack_pop_vcfs /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv b/backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv deleted file mode 100644 index 2d63bdea..00000000 --- a/backends/gcp/GRCh38.ref_map.v2p0p0.gcp.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv b/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv new file mode 100644 index 00000000..0cfed4f4 --- /dev/null +++ b/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv @@ -0,0 +1,12 @@ +name GRCh38 +fasta gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +pbsv_splits gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json +pbsv_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed +trgt_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +sawfish_exclude_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv b/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv deleted file mode 100644 index 1e916b9c..00000000 --- a/backends/gcp/GRCh38.tertiary_map.v2p0p0.gcp.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv b/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv new file mode 100644 index 00000000..1f71ce01 --- /dev/null +++ b/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv @@ -0,0 +1,12 @@ +slivar_js gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt +clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv b/backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv deleted file mode 100644 index 3a095aaf..00000000 --- a/backends/hpc/GRCh38.ref_map.v2p0p0.hpc.tsv +++ /dev/null @@ -1,12 +0,0 @@ -name GRCh38 -fasta /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta -fasta_index /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed /hifi-wdl-resources-v2.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed -hificnv_exclude_bed /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz -hificnv_exclude_bed_index /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi -hificnv_expected_bed_male /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed -hificnv_expected_bed_female /hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed -pharmcat_positions_vcf /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz -pharmcat_positions_vcf_index /hifi-wdl-resources-v2.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv b/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv new file mode 100644 index 00000000..2d031dea --- /dev/null +++ b/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv @@ -0,0 +1,12 @@ +name GRCh38 +fasta /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta +fasta_index /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai +pbsv_splits /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json +pbsv_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed +trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +sawfish_exclude_bed /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz +sawfish_exclude_bed_index /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi +sawfish_expected_bed_male /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed +sawfish_expected_bed_female /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XX.bed +pharmcat_positions_vcf /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz +pharmcat_positions_vcf_index /hifi-wdl-resources-v3.0.0/GRCh38/pharmcat/pharmcat_positions_2.15.4.vcf.bgz.csi diff --git a/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv b/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv deleted file mode 100644 index 3a2f37dc..00000000 --- a/backends/hpc/GRCh38.tertiary_map.v2p0p0.hpc.tsv +++ /dev/null @@ -1,12 +0,0 @@ -slivar_js /hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff /hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v2.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip -slivar_gnotate_prefixes gnomad,colors -slivar_max_af 0.03 -slivar_max_nhomalt 4 -slivar_max_ac 4 -slivar_min_gq 5 -svpack_pop_vcfs /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v2.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv b/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv new file mode 100644 index 00000000..35ec9444 --- /dev/null +++ b/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv @@ -0,0 +1,12 @@ +slivar_js /hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff /hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt +clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +slivar_gnotate_files /hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_gnotate_prefixes gnomad,colors +slivar_max_af 0.03 +slivar_max_nhomalt 4 +slivar_max_ac 4 +slivar_min_gq 5 +svpack_pop_vcfs /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices /hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/docs/family.md b/docs/family.md index 7310fa26..793a00f6 100644 --- a/docs/family.md +++ b/docs/family.md @@ -9,7 +9,6 @@ - [Alignments, Coverage, and QC](#alignments-coverage-and-qc) - [Small Variants (\<50 bp)](#small-variants-50-bp) - [Structural Variants (≥50 bp)](#structural-variants-50-bp) - - [Copy Number Variants (≥100 kb)](#copy-number-variants-100-kb) - [Tandem Repeat Genotyping](#tandem-repeat-genotyping) - [Variant Phasing](#variant-phasing) - [Variant Calling in Dark Regions](#variant-calling-in-dark-regions) @@ -24,50 +23,82 @@ title: family.wdl --- flowchart TD - subgraph "`**Upstream of Phasing (per-sample)**`" + subgraph "`**Upstream of Phasing\n(per-sample)**`" subgraph "per-movie" - ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] - pbmm2_align --> sawfish_discover["Sawfish discover"] + ubam[/"HiFi uBAM"/] + pbmm2_align["pbmm2 align"] end - pbmm2_align --> samtools_merge["samtools merge"] - samtools_merge --> mosdepth["mosdepth"] - samtools_merge --> paraphase["Paraphase"] - samtools_merge --> hificnv["HiFiCNV"] - samtools_merge --> mitorsaw["MitorSaw"] - samtools_merge --> trgt["TRGT"] - samtools_merge --> trgt_dropouts["TR coverage dropouts"] - samtools_merge --> deepvariant["DeepVariant"] - samtools_merge --> hiphase["HiPhase"] + samtools_merge["samtools merge"] + mosdepth["mosdepth"] + paraphase["Paraphase"] + mitorsaw["MitorSaw"] + trgt["TRGT"] + trgt_dropouts["TR coverage dropouts"] + deepvariant["DeepVariant"] + sawfish_discover["Sawfish discover"] end subgraph "`**Joint Calling**`" - deepvariant --> glnexus["GLnexus (joint-call small variants)"] - sawfish_discover --> sawfish_call["Sawfish call"] - glnexus --> split_glnexus["split small variant vcf by sample"] - sawfish_call --> split_sawfish["split SV vcf by sample"] + glnexus["GLnexus (joint-call small variants)"] + sawfish_call["Sawfish call"] + split_glnexus["split small variant vcf by sample"] + split_sawfish["split SV vcf by sample"] end - subgraph "`**Phasing and Downstream (per-sample)**`" - split_glnexus --> hiphase - trgt --> hiphase - split_sawfish --> hiphase - hiphase --> bam_stats["BAM stats"] - hiphase --> bcftools_roh["bcftools roh"] - hiphase --> bcftools_stats["bcftools stats\n(small variants)"] - hiphase --> sv_stats["SV stats"] - hiphase --> cpg_pileup["5mCpG pileup"] - hiphase --> starphase["StarPhase"] - hiphase --> pharmcat["PharmCat"] - starphase --> pharmcat + subgraph "`**Phasing and Downstream**`" + hiphase["HiPhase"] + bam_stats["BAM stats"] + bcftools_roh["bcftools roh"] + bcftools_stats["bcftools stats\n(small variants)"] + sv_stats["SV stats"] + cpg_pileup["5mCpG pileup"] + starphase["StarPhase"] + pharmcat["PharmCat"] end subgraph " " - hiphase --> merge_small_variants["bcftools merge small variants"] - hiphase --> merge_svs["bcftools merge SV"] - hiphase --> trgt_merge["trgt merge"] + merge_small_variants["bcftools merge small variants"] + merge_svs["bcftools merge SV"] + trgt_merge["trgt merge"] end subgraph "`**Tertiary Analysis**`" - merge_small_variants --> slivar_small_variants["slivar small variants"] - merge_svs --> svpack["svpack filter and annotate"] - svpack --> slivar_svpack["slivar svpack tsv"] + slivar_small_variants["slivar small variants"] + svpack["svpack filter and annotate"] + slivar_svpack["slivar svpack tsv"] end + + ubam --> pbmm2_align --> samtools_merge + samtools_merge --> mosdepth + samtools_merge --> paraphase + samtools_merge --> mitorsaw + samtools_merge --> trgt + samtools_merge --> trgt_dropouts + samtools_merge --> deepvariant + samtools_merge --> sawfish_discover + samtools_merge --> hiphase + deepvariant --> sawfish_discover + deepvariant --> glnexus + sawfish_discover --> sawfish_call + trgt --> hiphase + + glnexus --> split_glnexus + sawfish_call --> split_sawfish + split_glnexus --> hiphase + split_sawfish --> hiphase + + hiphase --> bam_stats + hiphase --> bcftools_roh + hiphase --> bcftools_stats + hiphase --> sv_stats + hiphase --> cpg_pileup + hiphase --> starphase + hiphase --> pharmcat + starphase --> pharmcat + + hiphase --> merge_small_variants + hiphase --> merge_svs + hiphase --> trgt_merge + + merge_small_variants --> slivar_small_variants + merge_svs --> svpack + svpack --> slivar_svpack ``` ## Inputs @@ -172,25 +203,15 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | Array\[String\] | stat_sv_BND_count | Structural variant BND count | (PASS variants) | | Array\[String\] | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) | | File | sv_supporting_reads | Supporting reads for structural variants | | +| Array\[File\] | sv_copynum_bedgraph | CNV copy number BEDGraph | | +| Array\[File\] | sv_depth_bw | CNV depth BigWig | | +| Array\[File\] | sv_gc_bias_corrected_depth_bw | CNV GC-bias corrected depth BigWig | | +| Array\[File\] | sv_maf_bw | CNV MAF BigWig | | | Array\[File\] | bcftools_roh_out | ROH calling | `bcftools roh` | | Array\[File\] | bcftools_roh_bed | Generated from above, without filtering | | | File? | joint_sv_vcf | Joint-called structural variant VCF | | | File? | joint_sv_vcf_index | | | -### Copy Number Variants (≥100 kb) - -| Type | Name | Description | Notes | -| ---- | ---- | ----------- | ----- | -| Array\[File\] | cnv_vcf | CNV VCF | | -| Array\[File\] | cnv_vcf_index | Index for CNV VCF | | -| Array\[File\] | cnv_copynum_bedgraph | CNV copy number BEDGraph | | -| Array\[File\] | cnv_depth_bw | CNV depth BigWig | | -| Array\[File\] | cnv_maf_bw | CNV MAF BigWig | | -| Array\[String\] | stat_cnv_DUP_count | Count of DUP events | (for PASS variants) | -| Array\[String\] | stat_cnv_DEL_count | Count of DEL events | (PASS variants) | -| Array\[String\] | stat_cnv_DUP_sum | Sum of DUP bp | (PASS variants) | -| Array\[String\] | stat_cnv_DEL_sum | Sum of DEL bp | (PASS variants) | - ### Mitochondrial variants and haplotypes | Type | Name | Description | Notes | diff --git a/docs/ref_map.md b/docs/ref_map.md index 0d6c5e93..c260bde8 100644 --- a/docs/ref_map.md +++ b/docs/ref_map.md @@ -8,10 +8,10 @@ | File | pbsv_splits | Regions for pbsv parallelization | [below](#pbsv_splits) | | File | pbsv_tandem_repeat_bed | Tandem Repeat BED used by PBSV to normalize SVs within TRs | [link](https://github.com/PacificBiosciences/pbsv/tree/master/annotations) | | File | trgt_tandem_repeat_bed | Tandem Repeat catalog (BED) for TRGT genotyping | [link](https://github.com/PacificBiosciences/trgt/blob/main/docs/repeat_files.md) | -| File | hificnv_exclude_bed | Regions to be excluded by HIFICNV in gzipped BED format | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | -| File | hificnv_exclude_bed_index | BED index | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | -| File | hificnv_expected_bed_male | Expected allosome copy number BED for XY samples | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | -| File | hificnv_expected_bed_female | Expected allosome copy number BED for XX samples | [link](https://github.com/PacificBiosciences/HiFiCNV/blob/main/docs/aux_data.md) | +| File | sawfish_exclude_bed | Regions to be excluded for Sawfish CNV calls in gzipped BED format | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#cnv-excluded-regions) | +| File | sawfish_exclude_bed_index | BED index | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#cnv-excluded-regions) | +| File | sawfish_expected_bed_male | Expected allosome copy number BED for XY samples | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#expected-copy-number) | +| File | sawfish_expected_bed_female | Expected allosome copy number BED for XX samples | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#expected-copy-number) | | File | pharmcat_positions_vcf | PharmCAT positions VCF | | | File | pharmcat_positions_vcf_index | PharmCAT positions VCF index | | diff --git a/docs/singleton.md b/docs/singleton.md index 6e1c6fc8..9b42435a 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -7,7 +7,6 @@ - [Alignments, Coverage, and QC](#alignments-coverage-and-qc) - [Small Variants (\<50 bp)](#small-variants-50-bp) - [Structural Variants (≥50 bp)](#structural-variants-50-bp) - - [Copy Number Variants (≥100 kb)](#copy-number-variants-100-kb) - [Tandem Repeat Genotyping](#tandem-repeat-genotyping) - [Variant Phasing](#variant-phasing) - [Variant Calling in Dark Regions](#variant-calling-in-dark-regions) @@ -24,38 +23,61 @@ title: singleton.wdl flowchart TD subgraph "`**Upstream of Phasing**`" subgraph "per-movie" - ubam[/"HiFi uBAM"/] --> pbmm2_align["pbmm2 align"] - pbmm2_align --> sawfish_discover["Sawfish discover"] + ubam[/"HiFi uBAM"/] + pbmm2_align["pbmm2 align"] end - pbmm2_align --> merge_read_stats["merge read statistics"] - samtools_merge --> mosdepth["mosdepth"] - samtools_merge --> paraphase["Paraphase"] - samtools_merge --> hificnv["HiFiCNV"] - samtools_merge --> mitorsaw["MitorSaw"] - samtools_merge --> trgt["TRGT"] - samtools_merge --> trgt_dropouts["TR coverage dropouts"] - samtools_merge --> deepvariant["DeepVariant"] - samtools_merge --> hiphase["HiPhase"] - sawfish_discover --> sawfish_call["Sawfish call"] + samtools_merge["samtools merge"] + mosdepth["mosdepth"] + paraphase["Paraphase"] + mitorsaw["MitorSaw"] + trgt["TRGT"] + trgt_dropouts["TR coverage dropouts"] + deepvariant["DeepVariant"] + sawfish_discover["Sawfish discover"] + sawfish_call["Sawfish call"] end subgraph "`**Phasing and Downstream**`" - deepvariant --> hiphase - trgt --> hiphase - pbsv_call --> hiphase - hiphase --> bam_stats["BAM stats"] - hiphase --> bcftools_roh["bcftools roh"] - hiphase --> bcftools_stats["bcftools stats\n(small variants)"] - hiphase --> sv_stats["SV stats"] - hiphase --> cpg_pileup["5mCpG pileup"] - hiphase --> starphase["StarPhase"] - hiphase --> pharmcat["PharmCat"] - starphase --> pharmcat + hiphase["HiPhase"] + bam_stats["BAM stats"] + bcftools_roh["bcftools roh"] + bcftools_stats["bcftools stats\n(small variants)"] + sv_stats["SV stats"] + cpg_pileup["5mCpG pileup"] + starphase["StarPhase"] + pharmcat["PharmCat"] end subgraph "`**Tertiary Analysis**`" - hiphase --> slivar_small_variants["slivar small variants"] - hiphase --> svpack["svpack filter and annotate"] - svpack --> slivar_svpack["slivar svpack tsv"] + slivar_small_variants["slivar small variants"] + svpack["svpack filter and annotate"] + slivar_svpack["slivar svpack tsv"] end + + ubam --> pbmm2_align --> samtools_merge + samtools_merge --> mosdepth + samtools_merge --> paraphase + samtools_merge --> mitorsaw + samtools_merge --> trgt + samtools_merge --> trgt_dropouts + samtools_merge --> deepvariant + samtools_merge --> sawfish_discover + samtools_merge --> hiphase + deepvariant --> sawfish_discover + deepvariant --> hiphase + sawfish_discover --> sawfish_call --> hiphase + trgt --> hiphase + + hiphase --> bam_stats + hiphase --> bcftools_roh + hiphase --> bcftools_stats + hiphase --> sv_stats + hiphase --> cpg_pileup + hiphase --> starphase + hiphase --> pharmcat + starphase --> pharmcat + + hiphase --> slivar_small_variants + hiphase --> svpack + svpack --> slivar_svpack ``` ## Inputs @@ -135,6 +157,10 @@ flowchart TD | String | stat_sv_BND_count | Structural variant BND count | (PASS variants) | | String | stat_sv_SWAP_count | Structural variant sequence swap events | (PASS variants) | | File | sv_supporting_reads | Supporting reads for structural variants | | +| File | sv_copynum_bedgraph | CNV copy number BEDGraph | | +| File | sv_depth_bw | CNV depth BigWig | | +| File | sv_gc_bias_corrected_depth_bw | CNV GC-bias corrected depth BigWig | | +| File | sv_maf_bw | CNV MAF BigWig | | | File | bcftools_roh_out | ROH calling | `bcftools roh` | | File | bcftools_roh_bed | Generated from above, without filtering | | @@ -146,20 +172,6 @@ flowchart TD | File | mitorsaw_vcf_index | Index for mitochondrial variant VCF | | | File | mitorsaw_hap_stats | Mitochondrial haplotype stats | | -### Copy Number Variants (≥100 kb) - -| Type | Name | Description | Notes | -| ---- | ---- | ----------- | ----- | -| File | cnv_vcf | CNV VCF | | -| File | cnv_vcf_index | Index for CNV VCF | | -| File | cnv_copynum_bedgraph | CNV copy number BEDGraph | | -| File | cnv_depth_bw | CNV depth BigWig | | -| File | cnv_maf_bw | CNV MAF BigWig | | -| String | stat_cnv_DUP_count | Count of DUP events | (for PASS variants) | -| String | stat_cnv_DEL_count | Count of DEL events | (PASS variants) | -| String | stat_cnv_DUP_sum | Sum of DUP bp | (PASS variants) | -| String | stat_cnv_DEL_sum | Sum of DEL bp | (PASS variants) | - ### Tandem Repeat Genotyping | Type | Name | Description | Notes | diff --git a/docs/tools_containers.md b/docs/tools_containers.md index fc411616..0ac8ddef 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -13,10 +13,9 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pb_wdl_base |
  • htslib 1.20
  • bcftools 1.20
  • samtools 1.20
  • bedtools 2.31.0
  • python3.9
  • numpy 1.24.24
  • pandas 2.0.3
  • matplotlib 3.7.5
  • seaborn 0.13.2
  • pysam 0.22.1
  • vcfpy 0.13.8
  • biopython 1.83
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/pb_wdl_base) | [sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87](https://quay.io/repository/pacbio/pb_wdl_base/manifest/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87) | | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | -| sawfish |
  • sawfish 1.0.2
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d3d64bc7dd62a74369f4b7e9b2416b991691eedb/docker/sawfish) | [sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e](https://quay.io/repository/pacbio/sawfish/manifest/sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e) | +| sawfish |
  • sawfish 2.0.1
  • sawshark 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/40c2aef86e409f522bc8e06fbbdff30edc2110a0/docker/sawfish) | [sawfish@sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44](https://quay.io/repository/pacbio/sawfish/manifest/sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44) | | trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | -| hificnv |
  • hificnv 1.0.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/a58f8b44cf8fd09c39c90e07076dbb418188084d/docker/hificnv) | [hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d](https://quay.io/repository/pacbio/hificnv/manifest/sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d) | | mitorsaw |
  • mitorsaw 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/97c698b078b413a5718bf2721e4e10daf5ae4d68/docker/mitorsaw) | [mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0) | | paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | | pbstarphase |
  • pbstarphase 1.4.1
  • Database 20250515
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5166b28e43f36a381450ba479e2e34a841bb922/docker/pbstarphase) | [pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4) | diff --git a/image_manifest.txt b/image_manifest.txt index 83d49a59..6b99f6b4 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,5 +1,4 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e -quay.io/pacbio/hificnv@sha256:c4764a70c8c2028edb1cdb4352997269947c5076ddd1aeaeef6c5076c630304d quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad quay.io/pacbio/mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 @@ -8,7 +7,7 @@ quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f7239587 quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4 quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 -quay.io/pacbio/sawfish@sha256:f995aaf97f27b3a4bb9b0b453566ce0b797c126e06007a4fc95ffc7912d78d8e +quay.io/pacbio/sawfish@sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44 quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba quay.io/pacbio/trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284 diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 9b9faf54..176b2e24 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -555,7 +555,7 @@ }, "split_vcf_by_sample": { "key": "split_vcf_by_sample", - "digest": "wrmspgbtubqm4xdskkbakz6krc5iw65k", + "digest": "zfx6w6oiy3mzreyjkrxar7wigvygm3un", "tests": [ { "inputs": { @@ -612,6 +612,7 @@ "HG003.HG002-trio.GRCh38.structural_variants.chr6_10000000_20000000.vcf.gz.tbi", "HG004.HG002-trio.GRCh38.structural_variants.chr6_10000000_20000000.vcf.gz.tbi" ], + "exclude_uncalled": false, "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { @@ -928,93 +929,6 @@ } } }, - "workflows/wdl-common/wdl/tasks/hificnv.wdl": { - "key": "workflows/wdl-common/wdl/tasks/hificnv.wdl", - "name": "", - "description": "", - "tasks": { - "hificnv": { - "key": "hificnv", - "digest": "45dcdjjpmnwhymg3vfjeo7yxbyczxart", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "sex": "MALE", - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "vcf": "${resources_file_path}/inputs/HG002.GRCh38.small_variants.vcf.gz", - "vcf_index": "${resources_file_path}/inputs/HG002.GRCh38.small_variants.vcf.gz", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "exclude_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz", - "exclude_bed_index": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/cnv.excluded_regions.common_50.hg38.bed.gz.tbi", - "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", - "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "cnv_vcf": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - }, - "copynum_bedgraph": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.copynum.bedgraph", - "test_tasks": [ - "compare_file_basename", - "check_tab_delimited", - "count_bed_columns" - ] - }, - "depth_bw": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.depth.bw", - "test_tasks": [ - "compare_file_basename", - "bigwig_validator" - ] - }, - "maf_bw": { - "value": "${resources_file_path}/hificnv/HG002.GRCh38.hificnv.maf.bw", - "test_tasks": [ - "compare_file_basename", - "bigwig_validator" - ] - }, - "stat_DUP_count": { - "value": "0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_DUP_sum": { - "value": "0", - "test_tasks": [ - "compare_string" - ] - }, - "stat_DEL_count": { - "value": "89", - "test_tasks": [ - "compare_string" - ] - }, - "stat_DEL_sum": { - "value": "2885534000", - "test_tasks": [ - "compare_string" - ] - } - } - } - ] - } - } - }, "workflows/wdl-common/wdl/tasks/hiphase.wdl": { "key": "workflows/wdl-common/wdl/tasks/hiphase.wdl", "name": "", @@ -2072,64 +1986,28 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "bpppd3zrlqu4tpyls4fkrzq545ghyskz", + "digest": "mluhmv4fzusd5rltn3uffyrcb3w6oary", "tests": [ { "inputs": { + "sample_id": "HG002-minimal", "sex": "MALE", - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", - "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", - "out_prefix": "HG002.GRCh38", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "discover_tar": { - "value": "${resources_file_path}/sawfish_discover/output/HG002/HG002.GRCh38.tar", - "test_tasks": [ - "compare_file_basename" - ] - } - } - }, - { - "inputs": { - "sex": "MALE", - "aligned_bam": "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam.bai", + "aligned_bam": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam", + "aligned_bam_index": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam.bai", "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", - "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", - "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", - "out_prefix": "HG003.GRCh38", + "exclude_bed": "${datasets_file_path}/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz", + "exclude_bed_index": "${datasets_file_path}/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi", + "expected_male_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XY.bed", + "expected_female_bed": "${datasets_file_path}/GRCh38/sawfish/expected_cn.hg38.XX.bed", + "small_variant_vcf": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.GRCh38.small_variants.vcf.gz", + "small_variant_vcf_index": "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.GRCh38.small_variants.vcf.gz.tbi", + "out_prefix": "HG002-minimal", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "discover_tar": { - "value": "${resources_file_path}/sawfish_discover/output/HG003/HG003.GRCh38.tar", - "test_tasks": [ - "compare_file_basename" - ] - } - } - }, - { - "inputs": { - "aligned_bam": "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam.bai", - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", - "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", - "out_prefix": "HG004.GRCh38", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "discover_tar": { - "value": "${resources_file_path}/sawfish_discover/output/HG004/HG004.GRCh38.tar", + "value": "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.tar", "test_tasks": [ "compare_file_basename" ] @@ -2140,28 +2018,30 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "jic2pwxzicg2ci3jz3agp43vep6mhrh6", + "digest": "rn2lgqleychf2ppcobtco26uzf4hynpj", "tests": [ { "inputs": { + "sample_ids": [ + "HG002-minimal" + ], "discover_tars": [ - "${resources_file_path}/sawfish_call/input/HG002.GRCh38.tar" + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.tar" ], "aligned_bams": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam" + "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam" ], "aligned_bam_indices": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai" + "${resources_file_path}/sawfish_discover/inputs/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam.bai" ], "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "out_prefix": "HG002.GRCh38.structural_variants", + "out_prefix": "HG002-minimal.GRCh38.structural_variants", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "vcf": { - "value": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.vcf.gz", + "value": "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.vcf.gz", "test_tasks": [ "compare_file_basename", "check_gzip", @@ -2169,40 +2049,81 @@ ] }, "supporting_reads": { - "value": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.supporting_reads.json.gz", + "value": "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.supporting_reads.json.gz", "test_tasks": [ "compare_file_basename", "check_gzip" ] + }, + "copynum_bedgraph": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.copynum.bedgraph" + ], + "test_tasks": [ + "compare_file_basename", + "check_tab_delimited", + "count_bed_columns" + ] + }, + "depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "gc_bias_corrected_depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.gc_bias_corrected_depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "maf_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-minimal/HG002-minimal.GRCh38.structural_variants.maf.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] } } }, { "inputs": { + "sample_ids": [ + "HG002-minimal", + "HG003-minimal", + "HG004-minimal" + ], "discover_tars": [ - "${resources_file_path}/sawfish_call/input/HG002.GRCh38.tar", - "${resources_file_path}/sawfish_call/input/HG003.GRCh38.tar", - "${resources_file_path}/sawfish_call/input/HG004.GRCh38.tar" + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.tar", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG003-minimal.tar", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG004-minimal.tar" ], "aligned_bams": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam", - "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam" + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG003-minimal.m84039_241002_000337_s3.hifi_reads.minimal.GRCh38.aligned.bam", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG004-minimal.m84039_241002_020632_s4.hifi_reads.minimal.GRCh38.aligned.bam" ], "aligned_bam_indices": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam.bai", - "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam.bai" + "${resources_file_path}/sawfish_call/input/sawfish-2/HG002-minimal.m84039_241001_220042_s2.hifi_reads.minimal.GRCh38.aligned.bam.bai", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG003-minimal.m84039_241002_000337_s3.hifi_reads.minimal.GRCh38.aligned.bam.bai", + "${resources_file_path}/sawfish_call/input/sawfish-2/HG004-minimal.m84039_241002_020632_s4.hifi_reads.minimal.GRCh38.aligned.bam.bai" ], "ref_fasta": "${ref_fasta}", "ref_index": "${ref_index}", - "ref_name": "${ref_name}", "out_prefix": "HG002-trio.joint.GRCh38.structural_variants", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "vcf": { - "value": "${resources_file_path}/sawfish_call/output/HG002-trio/HG002-trio.joint.GRCh38.structural_variants.vcf.gz", + "value": "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-trio.joint.GRCh38.structural_variants.vcf.gz", "test_tasks": [ "compare_file_basename", "check_gzip", @@ -2210,11 +2131,56 @@ ] }, "supporting_reads": { - "value": "${resources_file_path}/sawfish_call/output/HG002-trio/HG002-trio.joint.GRCh38.structural_variants.supporting_reads.json.gz", + "value": "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-trio.joint.GRCh38.structural_variants.supporting_reads.json.gz", "test_tasks": [ "compare_file_basename", "check_gzip" ] + }, + "copynum_bedgraph": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.copynum.bedgraph", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.copynum.bedgraph", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.copynum.bedgraph" + ], + "test_tasks": [ + "compare_file_basename", + "check_tab_delimited", + "count_bed_columns" + ] + }, + "depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "gc_bias_corrected_depth_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.gc_bias_corrected_depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.gc_bias_corrected_depth.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.gc_bias_corrected_depth.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] + }, + "maf_bw": { + "value": [ + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG002-minimal.HG002-trio.joint.GRCh38.structural_variants.maf.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG003-minimal.HG002-trio.joint.GRCh38.structural_variants.maf.bw", + "${resources_file_path}/sawfish_call/output/HG002-trio-minimal/HG004-minimal.HG002-trio.joint.GRCh38.structural_variants.maf.bw" + ], + "test_tasks": [ + "compare_file_basename", + "bigwig_validator" + ] } } } @@ -2237,9 +2203,9 @@ "engine_params": { "pacbio-hpc": { "resources_file_path": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs", - "datasets_file_path": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs/hifi-wdl-resources-v2.0.0", - "ref_fasta": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", - "ref_index": "/pbi/vast-collections/appslabht/cromwell_tests/humanwgs/hifi-wdl-resources-v2.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai", + "datasets_file_path": "/pbi/vast-collections/appslabht/analysis_workflow_inputs/hifi-wgs-wdl-resources/", + "ref_fasta": "/pbi/vast-collections/appslabht/analysis_workflow_inputs/hifi-wgs-wdl-resources/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", + "ref_index": "/pbi/vast-collections/appslabht/analysis_workflow_inputs/hifi-wgs-wdl-resources/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai", "default_runtime_attributes": { "backend": "HPC", "preemptible_tries": 0, diff --git a/workflows/family.wdl b/workflows/family.wdl index d37ed80d..b1b6a60b 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -245,10 +245,6 @@ workflow humanwgs_family { 'sv_INV_count': downstream.stat_sv_INV_count, 'sv_SWAP_count': downstream.stat_sv_SWAP_count, 'sv_BND_count': downstream.stat_sv_BND_count, - 'cnv_DUP_count': upstream.stat_cnv_DUP_count, - 'cnv_DEL_count': upstream.stat_cnv_DEL_count, - 'cnv_DUP_sum': upstream.stat_cnv_DUP_sum, - 'cnv_DEL_sum': upstream.stat_cnv_DEL_sum, 'trgt_genotyped_count': upstream.stat_trgt_genotyped_count, 'trgt_uncalled_count': upstream.stat_trgt_uncalled_count } @@ -315,9 +311,13 @@ workflow humanwgs_family { Array[String] stat_cpg_combined_count = downstream.stat_combined_cpg_count # sv outputs - Array[File] phased_sv_vcf = downstream.phased_sv_vcf - Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index - File sv_supporting_reads = select_first([joint.sv_supporting_reads, upstream.sv_supporting_reads[0]]) + Array[File] phased_sv_vcf = downstream.phased_sv_vcf + Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index + File sv_supporting_reads = select_first([joint.sv_supporting_reads, upstream.sv_supporting_reads[0]]) + Array[File] sv_copynum_bedgraph = select_first([joint.sv_copynum_bedgraph, select_all(upstream.sv_copynum_bedgraph)]) + Array[File] sv_depth_bw = select_first([joint.sv_depth_bw, select_all(upstream.sv_depth_bw)]) + Array[File] sv_gc_bias_corrected_depth_bw = select_first([joint.sv_gc_bias_corrected_depth_bw, select_all(upstream.sv_gc_bias_corrected_depth_bw)]) + Array[File] sv_maf_bw = select_first([joint.sv_maf_bw, select_all(upstream.sv_maf_bw)]) # sv stats Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count @@ -359,17 +359,6 @@ workflow humanwgs_family { Array[File?] paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index Array[File?] paraphase_vcfs = upstream.paraphase_vcfs - # per sample cnv outputs - Array[File] cnv_vcf = upstream.cnv_vcf - Array[File] cnv_vcf_index = upstream.cnv_vcf_index - Array[File] cnv_copynum_bedgraph = upstream.cnv_copynum_bedgraph - Array[File] cnv_depth_bw = upstream.cnv_depth_bw - Array[File] cnv_maf_bw = upstream.cnv_maf_bw - Array[String] stat_cnv_DUP_count = upstream.stat_cnv_DUP_count - Array[String] stat_cnv_DEL_count = upstream.stat_cnv_DEL_count - Array[String] stat_cnv_DUP_sum = upstream.stat_cnv_DUP_sum - Array[String] stat_cnv_DEL_sum = upstream.stat_cnv_DEL_sum - # per sample mitorsaw outputs Array[File] mitorsaw_vcf = upstream.mitorsaw_vcf Array[File] mitorsaw_vcf_index = upstream.mitorsaw_vcf_index diff --git a/workflows/joint/joint.wdl b/workflows/joint/joint.wdl index 215800fa..e1934121 100644 --- a/workflows/joint/joint.wdl +++ b/workflows/joint/joint.wdl @@ -56,6 +56,15 @@ workflow joint { sv_supporting_reads: { name: "Supporting reads JSON" } + sv_copynum_bedgraph: { + name: "Copy number bedgraph" + } + sv_depth_bw: { + name: "Depth bedgraph" + } + sv_maf_bw: { + name: "MAF bedgraph" + } } input { @@ -80,11 +89,12 @@ workflow joint { call Sawfish.sawfish_call { input: + sample_ids = sample_ids, discover_tars = discover_tars, aligned_bams = aligned_bams, aligned_bam_indices = aligned_bam_indices, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", runtime_attributes = default_runtime_attributes } @@ -103,6 +113,7 @@ workflow joint { vcf_index = sawfish_call.vcf_index, split_vcf_names = split_sv_vcf_name, split_vcf_index_names = split_sv_vcf_index_name, + exclude_uncalled = false, runtime_attributes = default_runtime_attributes } @@ -139,5 +150,9 @@ workflow joint { Array[File] split_joint_small_variant_vcfs = split_glnexus.split_vcfs Array[File] split_joint_small_variant_vcf_indices = split_glnexus.split_vcf_indices File sv_supporting_reads = select_first([sawfish_call.supporting_reads]) + Array[File] sv_copynum_bedgraph = sawfish_call.copynum_bedgraph + Array[File] sv_depth_bw = sawfish_call.depth_bw + Array[File] sv_gc_bias_corrected_depth_bw = sawfish_call.gc_bias_corrected_depth_bw + Array[File] sv_maf_bw = sawfish_call.maf_bw } } diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 40247e6a..e204cf5b 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -190,10 +190,6 @@ workflow humanwgs_singleton { 'sv_INV_count': [downstream.stat_sv_INV_count], 'sv_SWAP_count': [downstream.stat_sv_SWAP_count], 'sv_BND_count': [downstream.stat_sv_BND_count], - 'cnv_DUP_count': [upstream.stat_cnv_DUP_count], - 'cnv_DEL_count': [upstream.stat_cnv_DEL_count], - 'cnv_DUP_sum': [upstream.stat_cnv_DUP_sum], - 'cnv_DEL_sum': [upstream.stat_cnv_DEL_sum], 'trgt_genotyped_count': [upstream.stat_trgt_genotyped_count], 'trgt_uncalled_count': [upstream.stat_trgt_uncalled_count] } @@ -259,9 +255,13 @@ workflow humanwgs_singleton { String stat_cpg_combined_count = downstream.stat_combined_cpg_count # sv outputs - File phased_sv_vcf = downstream.phased_sv_vcf - File phased_sv_vcf_index = downstream.phased_sv_vcf_index - File sv_supporting_reads = select_first([upstream.sv_supporting_reads]) + File phased_sv_vcf = downstream.phased_sv_vcf + File phased_sv_vcf_index = downstream.phased_sv_vcf_index + File sv_supporting_reads = select_first([upstream.sv_supporting_reads]) + File sv_copynum_bedgraph = select_first([upstream.sv_copynum_bedgraph]) + File sv_depth_bw = select_first([upstream.sv_depth_bw]) + File sv_gc_bias_corrected_depth_bw = select_first([upstream.sv_gc_bias_corrected_depth_bw]) + File sv_maf_bw = select_first([upstream.sv_maf_bw]) # sv stats String stat_sv_DUP_count = downstream.stat_sv_DUP_count @@ -303,17 +303,6 @@ workflow humanwgs_singleton { File? paraphase_realigned_bam_index = upstream.paraphase_realigned_bam_index File? paraphase_vcfs = upstream.paraphase_vcfs - # per sample cnv outputs - File cnv_vcf = upstream.cnv_vcf - File cnv_vcf_index = upstream.cnv_vcf_index - File cnv_copynum_bedgraph = upstream.cnv_copynum_bedgraph - File cnv_depth_bw = upstream.cnv_depth_bw - File cnv_maf_bw = upstream.cnv_maf_bw - String stat_cnv_DUP_count = upstream.stat_cnv_DUP_count - String stat_cnv_DEL_count = upstream.stat_cnv_DEL_count - String stat_cnv_DUP_sum = upstream.stat_cnv_DUP_sum - String stat_cnv_DEL_sum = upstream.stat_cnv_DEL_sum - # per sample mitorsaw outputs File mitorsaw_vcf = upstream.mitorsaw_vcf File mitorsaw_vcf_index = upstream.mitorsaw_vcf_index diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index 3f055e50..11d933b1 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -8,7 +8,6 @@ import "../wdl-common/wdl/tasks/samtools.wdl" as Samtools import "../wdl-common/wdl/tasks/mosdepth.wdl" as Mosdepth import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/paraphase.wdl" as Paraphase -import "../wdl-common/wdl/tasks/hificnv.wdl" as Hificnv import "../wdl-common/wdl/tasks/mitorsaw.wdl" as Mitorsaw workflow upstream { @@ -106,24 +105,32 @@ workflow upstream { call DeepVariant.deepvariant { input: - sample_id = sample_id, - aligned_bams = [aligned_bam_data], - aligned_bam_indices = [aligned_bam_index], - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - gpu = gpu, - default_runtime_attributes = default_runtime_attributes + sample_id = sample_id, + aligned_bams = [aligned_bam_data], + aligned_bam_indices = [aligned_bam_index], + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + ref_name = ref_map["name"], + gpu = gpu, + default_runtime_attributes = default_runtime_attributes } call Sawfish.sawfish_discover { input: - aligned_bam = aligned_bam_data, - aligned_bam_index = aligned_bam_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - out_prefix = "~{sample_id}.~{ref_map['name']}", - runtime_attributes = default_runtime_attributes + sample_id = sample_id, + sex = mosdepth.inferred_sex, + aligned_bam = aligned_bam_data, + aligned_bam_index = aligned_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + exclude_bed = ref_map["sawfish_exclude_bed"], # !FileCoercion + exclude_bed_index = ref_map["sawfish_exclude_bed_index"], # !FileCoercion + expected_male_bed = ref_map["sawfish_expected_bed_male"], # !FileCoercion # TODO: consider renaming the exclude and expected files + expected_female_bed = ref_map["sawfish_expected_bed_female"], # !FileCoercion + small_variant_vcf = deepvariant.vcf, + small_variant_vcf_index = deepvariant.vcf_index, + out_prefix = "~{sample_id}", + runtime_attributes = default_runtime_attributes } call Trgt.trgt { @@ -149,24 +156,6 @@ workflow upstream { runtime_attributes = default_runtime_attributes } - call Hificnv.hificnv { - input: - sample_id = sample_id, - sex = mosdepth.inferred_sex, - aligned_bam = aligned_bam_data, - aligned_bam_index = aligned_bam_index, - vcf = deepvariant.vcf, - vcf_index = deepvariant.vcf_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - exclude_bed = ref_map["hificnv_exclude_bed"], # !FileCoercion - exclude_bed_index = ref_map["hificnv_exclude_bed_index"], # !FileCoercion - expected_male_bed = ref_map["hificnv_expected_bed_male"], # !FileCoercion - expected_female_bed = ref_map["hificnv_expected_bed_female"], # !FileCoercion - runtime_attributes = default_runtime_attributes - } - call Mitorsaw.mitorsaw { input: aligned_bam = aligned_bam_data, @@ -179,7 +168,8 @@ workflow upstream { if (single_sample) { call Sawfish.sawfish_call { - input: + input: + sample_ids = [sample_id], discover_tars = [sawfish_discover.discover_tar], aligned_bams = [aligned_bam_data], aligned_bam_indices = [aligned_bam_index], @@ -188,6 +178,11 @@ workflow upstream { out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", runtime_attributes = default_runtime_attributes } + + File copynum_bedgraph_output = sawfish_call.copynum_bedgraph[0] + File depth_bw_output = sawfish_call.depth_bw[0] + File gc_bias_corrected_depth_bw_output = sawfish_call.gc_bias_corrected_depth_bw[0] + File maf_bw_output = sawfish_call.maf_bw[0] } output { @@ -207,9 +202,13 @@ workflow upstream { File discover_tar = sawfish_discover.discover_tar # sawfish outputs for single sample - File? sv_vcf = sawfish_call.vcf - File? sv_vcf_index = sawfish_call.vcf_index - File? sv_supporting_reads = sawfish_call.supporting_reads + File? sv_vcf = sawfish_call.vcf + File? sv_vcf_index = sawfish_call.vcf_index + File? sv_supporting_reads = sawfish_call.supporting_reads + File? sv_copynum_bedgraph = copynum_bedgraph_output + File? sv_depth_bw = depth_bw_output + File? sv_gc_bias_corrected_depth_bw = gc_bias_corrected_depth_bw_output + File? sv_maf_bw = maf_bw_output # small variant outputs File small_variant_vcf = deepvariant.vcf @@ -231,17 +230,6 @@ workflow upstream { File? paraphase_realigned_bam_index = paraphase.bam_index File? paraphase_vcfs = paraphase.vcfs_tar - # per sample hificnv outputs - File cnv_vcf = hificnv.cnv_vcf - File cnv_vcf_index = hificnv.cnv_vcf_index - File cnv_copynum_bedgraph = hificnv.copynum_bedgraph - File cnv_depth_bw = hificnv.depth_bw - File cnv_maf_bw = hificnv.maf_bw - String stat_cnv_DUP_count = hificnv.stat_DUP_count - String stat_cnv_DEL_count = hificnv.stat_DEL_count - String stat_cnv_DUP_sum = hificnv.stat_DUP_sum - String stat_cnv_DEL_sum = hificnv.stat_DEL_sum - # per sample mitorsaw outputs File mitorsaw_vcf = mitorsaw.vcf File mitorsaw_vcf_index = mitorsaw.vcf_index @@ -253,8 +241,7 @@ workflow upstream { flatten(pbmm2.msg), [qc_sex], trgt.msg, - paraphase.msg, - hificnv.msg + sawfish_discover.msg ] ) } diff --git a/workflows/wdl-common b/workflows/wdl-common index 4a98066c..973fdba8 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 4a98066c5bd8e2ec06b0db3ea4ff58a803688326 +Subproject commit 973fdba809b58d91b11ef5cfcf8d68c6227509b2 From 292e64b089fa12bb4f7d5bc844edcf9714c0e0bd Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 27 Jun 2025 13:03:29 -0400 Subject: [PATCH 44/61] Update gene constraint lookup tables (#201) * Updated LoF metrics to use gnomAD v4.1 gene constraint metrics. TSVs will now be annotated with pLI, oe.lof, LOEUF, and LOEUF_decile. More details on these metrics [here](https://gnomad.broadinstitute.org/news/2024-03-gnomad-v4-0-gene-constraint/). --- wdl-ci.config.json | 18 +++++++++--------- workflows/tertiary/tertiary.wdl | 31 +++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 176b2e24..11150693 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -37,7 +37,7 @@ "tasks": { "slivar_small_variant": { "key": "slivar_small_variant", - "digest": "hk7smb3fjdzicbvjwes6oist7iwnefwm", + "digest": "eq7doe46obnrz37la5oczwie53pzsaf6", "tests": [ { "inputs": { @@ -73,7 +73,7 @@ "reference": "${ref_fasta}", "reference_index": "${ref_index}", "gff": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz", - "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt", + "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof.gnomadv4p1.lookup", "clinvar_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt", "slivar_js": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/slivar-functions.v0.2.8.js", "gnotate_files": [ @@ -105,7 +105,7 @@ }, "output_tests": { "filtered_vcf": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.vcf.gz", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -113,7 +113,7 @@ ] }, "compound_het_vcf": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.vcf.gz", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.vcf.gz", "test_tasks": [ "compare_file_basename", "vcftools_validator", @@ -121,7 +121,7 @@ ] }, "filtered_tsv": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.tsv", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", @@ -129,7 +129,7 @@ ] }, "compound_het_tsv": { - "value": "${resources_file_path}/slivar_small_variant/output/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.tsv", + "value": "${resources_file_path}/slivar_small_variant/output/new_lof/HG002-trio.GRCh38.small_variants.norm.slivar.compound_hets.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", @@ -199,7 +199,7 @@ }, "slivar_svpack_tsv": { "key": "slivar_svpack_tsv", - "digest": "mo3z272srlp4old7xfa3cakvycayrfqg", + "digest": "tpp6xdwvkn22boyrs72t75y4fyrsjmv2", "tests": [ { "inputs": { @@ -230,14 +230,14 @@ "1" ] ], - "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof_lookup.v2.1.1.txt", + "lof_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/lof.gnomadv4p1.lookup", "clinvar_lookup": "${resources_file_path}/hifi-wdl-resources-v2.0.0/slivar/clinvar_gene_desc.20240624T165443.txt", "phrank_lookup": "${resources_file_path}/slivar_svpack_tsv/input/HG002-trio_phrank.tsv", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "svpack_tsv": { - "value": "${resources_file_path}/slivar_svpack_tsv/output/sawfish/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.tsv", + "value": "${resources_file_path}/slivar_svpack_tsv/output/sawfish_new_lof/HG002.HG002-trio.joint.GRCh38.structural_variants.phased.svpack.tsv", "test_tasks": [ "compare_file_basename", "check_tab_delimited", diff --git a/workflows/tertiary/tertiary.wdl b/workflows/tertiary/tertiary.wdl index 7cf965fe..71241495 100644 --- a/workflows/tertiary/tertiary.wdl +++ b/workflows/tertiary/tertiary.wdl @@ -341,6 +341,11 @@ task slivar_small_variant { command <<< set -euo pipefail + cut -f1,2 ~{lof_lookup} > pli.lookup + cut -f1,3 ~{lof_lookup} > oe.lookup + cut -f1,4 ~{lof_lookup} > loeuf.lookup + cut -f1,5 ~{lof_lookup} > loeuf_decile.lookup + bcftools --version bcftools norm \ @@ -408,13 +413,16 @@ task slivar_small_variant { --sample-field dominant \ --sample-field recessive \ --csq-field BCSQ \ - --gene-description ~{lof_lookup} \ + --gene-description pli.lookup \ + --gene-description oe.lookup \ + --gene-description loeuf.lookup \ + --gene-description loeuf_decile.lookup \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{vcf_basename}.norm.slivar.vcf.gz \ - | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ + | sed '1 s/gene_description_1/pLI/;s/gene_description_2/oe.lof/;s/gene_description_3/LOEUF/;s/gene_description_4/LOEUF_decile/;s/gene_description_5/clinvar/;s/gene_description_6/phrank/;' \ > ~{vcf_basename}.norm.slivar.tsv slivar tsv \ @@ -422,13 +430,16 @@ task slivar_small_variant { --sample-field slivar_comphet \ --info-field slivar_comphet \ --csq-field BCSQ \ - --gene-description ~{lof_lookup} \ + --gene-description pli.lookup \ + --gene-description oe.lookup \ + --gene-description loeuf.lookup \ + --gene-description loeuf_decile.lookup \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{vcf_basename}.norm.slivar.compound_hets.vcf.gz \ - | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ + | sed '1 s/gene_description_1/pLI/;s/gene_description_2/oe.lof/;s/gene_description_3/LOEUF/;s/gene_description_4/LOEUF_decile/;s/gene_description_5/clinvar/;s/gene_description_6/phrank/;' \ > ~{vcf_basename}.norm.slivar.compound_hets.tsv >>> @@ -611,6 +622,11 @@ task slivar_svpack_tsv { command <<< set -euo pipefail + cut -f1,2 ~{lof_lookup} > pli.lookup + cut -f1,3 ~{lof_lookup} > oe.lookup + cut -f1,4 ~{lof_lookup} > loeuf.lookup + cut -f1,5 ~{lof_lookup} > loeuf_decile.lookup + # slivar has no version option slivar expr 2>&1 | grep -Eo 'slivar version: [0-9.]+ [0-9a-f]+' @@ -619,13 +635,16 @@ task slivar_svpack_tsv { --sample-field hetalt \ --sample-field homalt \ --csq-field BCSQ \ - --gene-description ~{lof_lookup} \ + --gene-description pli.lookup \ + --gene-description oe.lookup \ + --gene-description loeuf.lookup \ + --gene-description loeuf_decile.lookup \ --gene-description ~{clinvar_lookup} \ --gene-description ~{phrank_lookup} \ --ped ~{write_tsv(sample_metadata)} \ --out /dev/stdout \ ~{filtered_vcf} \ - | sed '1 s/gene_description_1/lof/;s/gene_description_2/clinvar/;s/gene_description_3/phrank/;' \ + | sed '1 s/gene_description_1/pLI/;s/gene_description_2/oe.lof/;s/gene_description_3/LOEUF/;s/gene_description_4/LOEUF_decile/;s/gene_description_5/clinvar/;s/gene_description_6/phrank/;' \ > ~{filtered_vcf_basename}.tsv >>> From 615036a41d088020dfe1f731b9efdc58aa681fc2 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 27 Jun 2025 13:23:11 -0400 Subject: [PATCH 45/61] bump: mitorsaw-0.2.1 (#228) * bump: update mitorsaw to v0.2.1 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 0ac8ddef..bb224a7f 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -16,7 +16,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | sawfish |
  • sawfish 2.0.1
  • sawshark 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/40c2aef86e409f522bc8e06fbbdff30edc2110a0/docker/sawfish) | [sawfish@sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44](https://quay.io/repository/pacbio/sawfish/manifest/sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44) | | trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | -| mitorsaw |
  • mitorsaw 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/97c698b078b413a5718bf2721e4e10daf5ae4d68/docker/mitorsaw) | [mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0) | +| mitorsaw |
  • mitorsaw 0.2.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/36b0935f4d63ff5d51e953263d0dc2aaf2cddbfa/docker/mitorsaw) | [mitorsaw@sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c) | | paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | | pbstarphase |
  • pbstarphase 1.4.1
  • Database 20250515
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5166b28e43f36a381450ba479e2e34a841bb922/docker/pbstarphase) | [pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | diff --git a/image_manifest.txt b/image_manifest.txt index 6b99f6b4..09ddae07 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,6 +1,6 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad -quay.io/pacbio/mitorsaw@sha256:87c49411ab8fc82e2bd0ea9177c206a0f1b9dc972c5b5e64c534585581e10fe0 +quay.io/pacbio/mitorsaw@sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 11150693..617fd479 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1138,7 +1138,7 @@ "tasks": { "mitorsaw": { "key": "mitorsaw", - "digest": "jxaaawjrrt6zwbrvhkn732nzejadpjja", + "digest": "zs5hcur6jvanxdlx3kiszbrcfc3fl57v", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 973fdba8..4b2efc02 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 973fdba809b58d91b11ef5cfcf8d68c6227509b2 +Subproject commit 4b2efc02ecfb98fd56d10b555822c6e864394b5c From 56c79a827e411a4850a970fc98ee6ebbc2eabb66 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 27 Jun 2025 14:23:47 -0700 Subject: [PATCH 46/61] Prepare for v3 release. - update static resources to v3.0.0 bundle - update to CoLoRSdb v1.2.0 - update ClinVar lookup - update gene constraint lookup to gnomAD v4.1 - update reference/tertiary map templates - update input templates - update image manifest script to pull DeepVariant 1.9.0 - update image manifest - update version numbers in README and WDLs --- GRCh38.ref_map.v3p0p0.template.tsv | 4 +--- GRCh38.tertiary_map.v3p0p0.template.tsv | 4 ++-- README.md | 10 ++++---- .../GRCh38.ref_map.v3p0p0.aws.tsv | 4 +--- .../GRCh38.tertiary_map.v3p0p0.aws.tsv | 4 ++-- .../family.healthomics.inputs.json | 4 ++-- .../singleton.healthomics.inputs.json | 4 ++-- .../azure/GRCh38.ref_map.v3p0p0.azure.tsv | 4 +--- .../GRCh38.tertiary_map.v3p0p0.azure.tsv | 14 +++++------ backends/azure/family.azure.inputs.json | 4 ++-- backends/azure/singleton.azure.inputs.json | 4 ++-- backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv | 4 +--- .../gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv | 4 ++-- backends/gcp/family.gcp.inputs.json | 4 ++-- backends/gcp/singleton.gcp.inputs.json | 4 ++-- backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv | 4 +--- .../hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv | 4 ++-- backends/hpc/family.hpc.inputs.json | 4 ++-- backends/hpc/singleton.hpc.inputs.json | 4 ++-- docs/backend-hpc.md | 8 +++---- docs/family.md | 2 ++ docs/ref_map.md | 23 ------------------- docs/singleton.md | 2 ++ image_manifest.txt | 2 +- scripts/create_image_manifest.sh | 2 +- workflows/family.wdl | 2 +- workflows/singleton.wdl | 4 ++-- 27 files changed, 54 insertions(+), 83 deletions(-) diff --git a/GRCh38.ref_map.v3p0p0.template.tsv b/GRCh38.ref_map.v3p0p0.template.tsv index 2d031dea..0551a94a 100644 --- a/GRCh38.ref_map.v3p0p0.template.tsv +++ b/GRCh38.ref_map.v3p0p0.template.tsv @@ -1,9 +1,7 @@ name GRCh38 fasta /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta fasta_index /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz sawfish_exclude_bed /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz sawfish_exclude_bed_index /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi sawfish_expected_bed_male /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed diff --git a/GRCh38.tertiary_map.v3p0p0.template.tsv b/GRCh38.tertiary_map.v3p0p0.template.tsv index 203a04bf..c66c7e4d 100644 --- a/GRCh38.tertiary_map.v3p0p0.template.tsv +++ b/GRCh38.tertiary_map.v3p0p0.template.tsv @@ -1,7 +1,7 @@ slivar_js /hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js ensembl_gff /hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt slivar_gnotate_files /hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 diff --git a/README.md b/README.md index 9fd07700..aaaa49e3 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized: -1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.0-alpha1): +1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.0-alpha2): ```bash - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha1/hifi-human-wgs-singleton.zip - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha1/hifi-human-wgs-family.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha2/hifi-human-wgs-singleton.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha2/hifi-human-wgs-family.zip ``` 2) Clone the repository and initialize the submodules: ```bash git clone \ - --depth 1 --branch v3.0.0-alpha1 \ + --depth 1 --branch v3.0.0-alpha2 \ --recursive \ https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git ``` @@ -127,7 +127,7 @@ At a high level, we have two types of inputs files: The resource bundle containing the GRCh38 reference and other files used in this workflow can be downloaded from Zenodo: -[10.5281/zenodo.14908106](https://zenodo.org/records/14908106) +[10.5281/zenodo.15750792](https://zenodo.org/records/14908106) # Tool versions and Docker images diff --git a/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv index e90e7331..dcecb60c 100644 --- a/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv +++ b/backends/aws-healthomics/GRCh38.ref_map.v3p0p0.aws.tsv @@ -1,9 +1,7 @@ name GRCh38 fasta s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta fasta_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +trgt_tandem_repeat_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz sawfish_exclude_bed s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz sawfish_exclude_bed_index s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi sawfish_expected_bed_male s3:///hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed diff --git a/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv b/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv index 5f5af304..08f84222 100644 --- a/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv +++ b/backends/aws-healthomics/GRCh38.tertiary_map.v3p0p0.aws.tsv @@ -1,7 +1,7 @@ slivar_js s3:///hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js ensembl_gff s3:///hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +lof_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup s3:///hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt slivar_gnotate_files s3:///hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,s3:///hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 diff --git a/backends/aws-healthomics/family.healthomics.inputs.json b/backends/aws-healthomics/family.healthomics.inputs.json index 8df91c16..d728bb12 100644 --- a/backends/aws-healthomics/family.healthomics.inputs.json +++ b/backends/aws-healthomics/family.healthomics.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "s3:///GRCh38.ref_map.v2p0p0.aws.tsv", - "humanwgs_family.tertiary_map_file": "s3:///GRCh38.tertiary_map.v2p0p0.aws.tsv", + "humanwgs_family.ref_map_file": "s3:///GRCh38.ref_map.v3p0p0.aws.tsv", + "humanwgs_family.tertiary_map_file": "s3:///GRCh38.tertiary_map.v3p0p0.aws.tsv", "humanwgs_family.backend": "AWS-HealthOmics", "humanwgs_family.container_registry": "String", "humanwgs_family.preemptible": true diff --git a/backends/aws-healthomics/singleton.healthomics.inputs.json b/backends/aws-healthomics/singleton.healthomics.inputs.json index c852db3e..81254e47 100644 --- a/backends/aws-healthomics/singleton.healthomics.inputs.json +++ b/backends/aws-healthomics/singleton.healthomics.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "s3:///GRCh38.ref_map.v2p0p0.aws.tsv", - "humanwgs_singleton.tertiary_map_file": "s3:///GRCh38.tertiary_map.v2p0p0.aws.tsv", + "humanwgs_singleton.ref_map_file": "s3:///GRCh38.ref_map.v3p0p0.aws.tsv", + "humanwgs_singleton.tertiary_map_file": "s3:///GRCh38.tertiary_map.v3p0p0.aws.tsv", "humanwgs_singleton.backend": "AWS-HealthOmics", "humanwgs_singleton.container_registry": "String", "humanwgs_singleton.preemptible": true diff --git a/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv b/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv index 19183351..cf4863a8 100644 --- a/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv +++ b/backends/azure/GRCh38.ref_map.v3p0p0.azure.tsv @@ -1,9 +1,7 @@ name GRCh38 fasta https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta fasta_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +trgt_tandem_repeat_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz sawfish_exclude_bed https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz sawfish_exclude_bed_index https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi sawfish_expected_bed_male https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed diff --git a/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv b/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv index e4fe31bb..08558fd8 100644 --- a/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv +++ b/backends/azure/GRCh38.tertiary_map.v3p0p0.azure.tsv @@ -1,12 +1,12 @@ -slivar_js /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js -ensembl_gff /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt -slivar_gnotate_files /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip +slivar_js https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js +ensembl_gff https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz +lof_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt +slivar_gnotate_files https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 slivar_max_nhomalt 4 slivar_max_ac 4 slivar_min_gq 5 -svpack_pop_vcfs /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,/datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz -svpack_pop_vcf_indices /datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,/datasetpbrarediseases/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi +svpack_pop_vcfs https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz +svpack_pop_vcf_indices https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/gnomad.v4.1.sv.sites.pass.vcf.gz.tbi,https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38/sv_pop_vcfs/CoLoRSdb.GRCh38.v1.2.0.pbsv.jasmine.vcf.gz.tbi diff --git a/backends/azure/family.azure.inputs.json b/backends/azure/family.azure.inputs.json index d0f4841a..f668d98b 100644 --- a/backends/azure/family.azure.inputs.json +++ b/backends/azure/family.azure.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.azure.tsv", - "humanwgs_family.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.azure.tsv", + "humanwgs_family.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.azure.tsv", + "humanwgs_family.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.azure.tsv", "humanwgs_family.backend": "Azure", "humanwgs_family.preemptible": "Boolean" } \ No newline at end of file diff --git a/backends/azure/singleton.azure.inputs.json b/backends/azure/singleton.azure.inputs.json index 91fa25b9..14e14c2f 100644 --- a/backends/azure/singleton.azure.inputs.json +++ b/backends/azure/singleton.azure.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.azure.tsv", - "humanwgs_singleton.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.azure.tsv", + "humanwgs_singleton.ref_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.azure.tsv", + "humanwgs_singleton.tertiary_map_file": "https://datasetpbrarediseases.blob.core.windows.net/dataset/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.azure.tsv", "humanwgs_singleton.backend": "Azure", "humanwgs_singleton.preemptible": "Boolean" } \ No newline at end of file diff --git a/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv b/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv index 0cfed4f4..d135ca6f 100644 --- a/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv +++ b/backends/gcp/GRCh38.ref_map.v3p0p0.gcp.tsv @@ -1,9 +1,7 @@ name GRCh38 fasta gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta fasta_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +trgt_tandem_repeat_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz sawfish_exclude_bed gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz sawfish_exclude_bed_index gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi sawfish_expected_bed_male gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed diff --git a/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv b/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv index 1f71ce01..332bd89f 100644 --- a/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv +++ b/backends/gcp/GRCh38.tertiary_map.v3p0p0.gcp.tsv @@ -1,7 +1,7 @@ slivar_js gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js ensembl_gff gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +lof_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt slivar_gnotate_files gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 diff --git a/backends/gcp/family.gcp.inputs.json b/backends/gcp/family.gcp.inputs.json index 48823812..e26e8fcd 100644 --- a/backends/gcp/family.gcp.inputs.json +++ b/backends/gcp/family.gcp.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.gcp.tsv", - "humanwgs_family.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.gcp.tsv", + "humanwgs_family.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.gcp.tsv", + "humanwgs_family.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.gcp.tsv", "humanwgs_family.backend": "GCP", "humanwgs_family.zones": "String", "humanwgs_family.preemptible": "Boolean" diff --git a/backends/gcp/singleton.gcp.inputs.json b/backends/gcp/singleton.gcp.inputs.json index fe9b8b6d..7a1de84e 100644 --- a/backends/gcp/singleton.gcp.inputs.json +++ b/backends/gcp/singleton.gcp.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.ref_map.v2p0p0.gcp.tsv", - "humanwgs_singleton.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v2.0.0/GRCh38.tertiary_map.v2p0p0.gcp.tsv", + "humanwgs_singleton.ref_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.ref_map.v3p0p0.gcp.tsv", + "humanwgs_singleton.tertiary_map_file": "gs://pacbio-wdl/hifi-wdl-resources-v3.0.0/GRCh38.tertiary_map.v3p0p0.gcp.tsv", "humanwgs_singleton.backend": "GCP", "humanwgs_singleton.zones": "String", "humanwgs_singleton.preemptible": "Boolean" diff --git a/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv b/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv index 2d031dea..0551a94a 100644 --- a/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv +++ b/backends/hpc/GRCh38.ref_map.v3p0p0.hpc.tsv @@ -1,9 +1,7 @@ name GRCh38 fasta /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta fasta_index /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai -pbsv_splits /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.pbsv_splits.json -pbsv_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed -trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/human_GRCh38_no_alt_analysis_set.trgt.v0.3.4.bed +trgt_tandem_repeat_bed /hifi-wdl-resources-v3.0.0/GRCh38/trgt/adotto_strchive_20250626.hg38.bed.gz sawfish_exclude_bed /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz sawfish_exclude_bed_index /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/annotation_and_common_cnv.hg38.bed.gz.tbi sawfish_expected_bed_male /hifi-wdl-resources-v3.0.0/GRCh38/sawfish/expected_cn.hg38.XY.bed diff --git a/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv b/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv index 35ec9444..383428f5 100644 --- a/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv +++ b/backends/hpc/GRCh38.tertiary_map.v3p0p0.hpc.tsv @@ -1,7 +1,7 @@ slivar_js /hifi-wdl-resources-v3.0.0/slivar/slivar-functions.v0.2.8.js ensembl_gff /hifi-wdl-resources-v3.0.0/GRCh38/ensembl.GRCh38.101.reformatted.gff3.gz -lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof_lookup.v2.1.1.txt -clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20240624T165443.txt +lof_lookup /hifi-wdl-resources-v3.0.0/slivar/lof.gnomadv4p1.lookup +clinvar_lookup /hifi-wdl-resources-v3.0.0/slivar/clinvar_gene_desc.20250618T144412.txt slivar_gnotate_files /hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/gnomad.hg38.v4.1.custom.v1.zip,/hifi-wdl-resources-v3.0.0/GRCh38/slivar_gnotate/CoLoRSdb.GRCh38.v1.2.0.deepvariant.glnexus.zip slivar_gnotate_prefixes gnomad,colors slivar_max_af 0.03 diff --git a/backends/hpc/family.hpc.inputs.json b/backends/hpc/family.hpc.inputs.json index 44a70568..31ef66b8 100644 --- a/backends/hpc/family.hpc.inputs.json +++ b/backends/hpc/family.hpc.inputs.json @@ -15,8 +15,8 @@ ] }, "humanwgs_family.phenotypes": "String? (optional)", - "humanwgs_family.ref_map_file": "/dataset/GRCh38.ref_map.v2p0p0.hpc.tsv", - "humanwgs_family.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v2p0p0.hpc.tsv", + "humanwgs_family.ref_map_file": "/dataset/GRCh38.ref_map.v3p0p0.hpc.tsv", + "humanwgs_family.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v3p0p0.hpc.tsv", "humanwgs_family.backend": "HPC", "humanwgs_family.preemptible": true } \ No newline at end of file diff --git a/backends/hpc/singleton.hpc.inputs.json b/backends/hpc/singleton.hpc.inputs.json index 885accf1..ffabbe87 100644 --- a/backends/hpc/singleton.hpc.inputs.json +++ b/backends/hpc/singleton.hpc.inputs.json @@ -5,8 +5,8 @@ "File" ], "humanwgs_singleton.phenotypes": "String? (optional)", - "humanwgs_singleton.ref_map_file": "/dataset/GRCh38.ref_map.v2p0p0.hpc.tsv", - "humanwgs_singleton.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v2p0p0.hpc.tsv", + "humanwgs_singleton.ref_map_file": "/dataset/GRCh38.ref_map.v3p0p0.hpc.tsv", + "humanwgs_singleton.tertiary_map_file": "/dataset/GRCh38.tertiary_map.v3p0p0.hpc.tsv", "humanwgs_singleton.backend": "HPC", "humanwgs_singleton.preemptible": true } \ No newline at end of file diff --git a/docs/backend-hpc.md b/docs/backend-hpc.md index 5ec63247..64e4b50a 100644 --- a/docs/backend-hpc.md +++ b/docs/backend-hpc.md @@ -56,14 +56,14 @@ cromwell run workflows/singleton.wdl --input ## Reference data bundle -[10.5281/zenodo.14908106](https://zenodo.org/records/14908106) +[10.5281/zenodo.15750792](https://zenodo.org/records/15750792) -Reference data is hosted on Zenodo at [10.5281/zenodo.14908106](https://zenodo.org/record/14908106). Download the reference data bundle and extract it to a location on your HPC, then update the input template file with the path to the reference data. +Reference data is hosted on Zenodo at [10.5281/zenodo.15750792](https://zenodo.org/record/15750792). Download the reference data bundle and extract it to a location on your HPC, then update the input template file with the path to the reference data. ```bash ## download the reference data bundle -wget https://zenodo.org/record/14908106/files/hifi-wdl-resources-v2.1.0.tar +wget https://zenodo.org/record/15750792/files/hifi-wdl-resources-v3.0.0.tar ## extract the reference data bundle and rename as dataset -tar -xvf hifi-wdl-resources-v2.1.0.tar +tar -xvf hifi-wdl-resources-v3.0.0.tar ``` diff --git a/docs/family.md b/docs/family.md index 793a00f6..d2e532b4 100644 --- a/docs/family.md +++ b/docs/family.md @@ -149,6 +149,8 @@ The `Sample` struct contains sample specific data and metadata. The struct has t | ---- | ---- | ----------- | ----- | | String | workflow_name | Workflow name | | | String | workflow_version | Workflow version | | +| Array\[String\] | msg | Messages from the workflow | | +| File | msg_file | File containing messages from the workflow | | | Array\[String\] | sample_ids | Sample IDs | | | File | stats_file | Table of summary statistics | | | Array\[File\] | bam_stats | BAM stats | Per-read length and read-quality | diff --git a/docs/ref_map.md b/docs/ref_map.md index c260bde8..09e62be5 100644 --- a/docs/ref_map.md +++ b/docs/ref_map.md @@ -14,26 +14,3 @@ | File | sawfish_expected_bed_female | Expected allosome copy number BED for XX samples | [link](https://github.com/PacificBiosciences/sawfish/blob/main/docs/user_guide.md#expected-copy-number) | | File | pharmcat_positions_vcf | PharmCAT positions VCF | | | File | pharmcat_positions_vcf_index | PharmCAT positions VCF index | | - -## pbsv_splits - -The `pbsv_splits` file is a JSON array of arrays of strings. Each inner array contains one or more chromosome names such that each inner array is of roughly equal size in base pairs. The inner arrays are processed in parallel. For example: - -```json -[ - ... - [ - "chr10", - "chr11" - ], - [ - "chr12", - "chr13" - ], - [ - "chr14", - "chr15" - ], - ... -] -``` diff --git a/docs/singleton.md b/docs/singleton.md index 9b42435a..b4bcff89 100644 --- a/docs/singleton.md +++ b/docs/singleton.md @@ -106,6 +106,8 @@ flowchart TD | ---- | ---- | ----------- | ----- | | String | workflow_name | Workflow name | | | String | workflow_version | Workflow version | | +| Array\[String\] | msg | Messages from the workflow | | +| File | msg_file | File containing messages from the workflow | | | File | stats_file | Table of summary statistics | | | File | bam_stats | BAM stats | Per-read length and read-quality | | File | read_length_plot | Read length plot | | diff --git a/image_manifest.txt b/image_manifest.txt index 09ddae07..30e9d2f6 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,5 +1,5 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e -quay.io/pacbio/hiphase@sha256:47fe7d42aea6b1b2e6d3c7401bc35a184464c3f647473d0525c00f3c968b40ad +quay.io/pacbio/hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482 quay.io/pacbio/mitorsaw@sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 diff --git a/scripts/create_image_manifest.sh b/scripts/create_image_manifest.sh index 66f23975..b367e455 100644 --- a/scripts/create_image_manifest.sh +++ b/scripts/create_image_manifest.sh @@ -10,7 +10,7 @@ grep '@sha' -h -r workflows/ \ | sort --unique \ > ./image_manifest.txt -deepvariant_version=1.8.0 +deepvariant_version=1.9.0 echo "google/deepvariant:${deepvariant_version}" >> ./image_manifest.txt echo "google/deepvariant:${deepvariant_version}-gpu" >> ./image_manifest.txt diff --git a/workflows/family.wdl b/workflows/family.wdl index b1b6a60b..4d70de12 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -399,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0-alpha2" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index e204cf5b..691e3e36 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -334,7 +334,7 @@ workflow humanwgs_singleton { ) # workflow metadata - String workflow_name = "humanwgs_family" - String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_name = "humanwgs_singleton" + String workflow_version = "v3.0.0-alpha2" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } From d4303a04441d0c36ce1ee42d911d4df7f041428e Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 27 Jun 2025 14:24:29 -0700 Subject: [PATCH 47/61] Remove v1 from dockstore.yml --- .dockstore.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.dockstore.yml b/.dockstore.yml index 5395b64b..611f5a6a 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -1,20 +1,6 @@ version: 1.2 workflows: - - name: HiFi-human-WGS-WDL - subclass: WDL - primaryDescriptorPath: /workflows/main.wdl - readMePath: /README.md - authors: - - orcid: 0000-0001-5921-2022 # Juniper Lake - - orcid: 0000-0001-7628-5645 # Gregory Concepcion - - orcid: 0000-0003-1183-0432 # Aaron Wenger - - orcid: 0000-0002-7422-1194 # William Rowell - - orcid: 0000-0002-5507-0896 # Heather Ward - - orcid: 0009-0001-0205-4614 # Karen Fang - latestTagAsDefault: False - filters: - tags: [ /v1\..*dockstore/ ] - name: HiFi-human-WGS-WDL-singleton subclass: WDL primaryDescriptorPath: /workflows/singleton.wdl From 2a75d4ad4f2316853833734e916dc2dcd26d8e8c Mon Sep 17 00:00:00 2001 From: William Rowell Date: Sat, 28 Jun 2025 12:56:56 -0700 Subject: [PATCH 48/61] fix: Modify coverage_targets to accept gzipped TRGT catalog. --- workflows/wdl-common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/wdl-common b/workflows/wdl-common index 4b2efc02..0c6b25a4 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 4b2efc02ecfb98fd56d10b555822c6e864394b5c +Subproject commit 0c6b25a43c4fe511295036be22c97ef39d425a90 From 760c822978ea6162a4e38a1198b3e2b756367ace Mon Sep 17 00:00:00 2001 From: William Rowell Date: Mon, 7 Jul 2025 17:25:04 -0700 Subject: [PATCH 49/61] Update to v3.0.0 --- .gitignore | 4 +++- README.md | 8 ++++---- docs/backends.md | 2 +- workflows/upstream/upstream.wdl | 12 ++++++------ workflows/wdl-common | 2 +- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index ec1f41d8..076e2057 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ tests miniwdl_singularity_cache miniwdl_download_cache miniwdl_call_cache -miniwdl_test_output \ No newline at end of file +miniwdl_test_output + +.venv \ No newline at end of file diff --git a/README.md b/README.md index aaaa49e3..468f472c 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized: -1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.0-alpha2): +1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.0): ```bash - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha2/hifi-human-wgs-singleton.zip - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0-alpha2/hifi-human-wgs-family.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0/hifi-human-wgs-singleton.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0/hifi-human-wgs-family.zip ``` 2) Clone the repository and initialize the submodules: ```bash git clone \ - --depth 1 --branch v3.0.0-alpha2 \ + --depth 1 --branch v3.0.0 \ --recursive \ https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git ``` diff --git a/docs/backends.md b/docs/backends.md index 1819695a..393f3a36 100644 --- a/docs/backends.md +++ b/docs/backends.md @@ -1,3 +1,3 @@ - [hpc](./backend-hpc.md) - [azure](./backend-azure.md) -- [gcp](./backend-gcp.md) \ No newline at end of file +- [gcp](./backend-gcp.md) diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index 11d933b1..577f046b 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -67,8 +67,8 @@ workflow upstream { sample_id = sample_id, bam = hifi_read_bam, max_reads_per_chunk = max_reads_per_alignment_chunk, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion ref_name = ref_map["name"], default_runtime_attributes = default_runtime_attributes } @@ -125,7 +125,7 @@ workflow upstream { ref_index = ref_map["fasta_index"], # !FileCoercion exclude_bed = ref_map["sawfish_exclude_bed"], # !FileCoercion exclude_bed_index = ref_map["sawfish_exclude_bed_index"], # !FileCoercion - expected_male_bed = ref_map["sawfish_expected_bed_male"], # !FileCoercion # TODO: consider renaming the exclude and expected files + expected_male_bed = ref_map["sawfish_expected_bed_male"], # !FileCoercion expected_female_bed = ref_map["sawfish_expected_bed_female"], # !FileCoercion small_variant_vcf = deepvariant.vcf, small_variant_vcf_index = deepvariant.vcf_index, @@ -139,9 +139,9 @@ workflow upstream { sex = mosdepth.inferred_sex, aligned_bam = aligned_bam_data, aligned_bam_index = aligned_bam_index, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - trgt_bed = ref_map["trgt_tandem_repeat_bed"], # !FileCoercion + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + trgt_bed = ref_map["trgt_tandem_repeat_bed"], # !FileCoercion out_prefix = "~{sample_id}.~{ref_map['name']}", runtime_attributes = default_runtime_attributes } diff --git a/workflows/wdl-common b/workflows/wdl-common index 0c6b25a4..dd64e2a3 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 0c6b25a43c4fe511295036be22c97ef39d425a90 +Subproject commit dd64e2a3f20b4a0912cf4f2d1b82ed2b5a1c8fb9 From fe987837d9593a2e932d7def11340cc71ff06e7a Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Tue, 3 Jun 2025 14:16:53 -0400 Subject: [PATCH 50/61] v2.1.3 * fix: Increase memory for samtools merge to 16GB. * updated version number * fixed singleton workflow name. --- workflows/family.wdl | 2 +- workflows/singleton.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/family.wdl b/workflows/family.wdl index 4d70de12..7ad2da0d 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -399,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v3.0.0-alpha2" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 691e3e36..65837531 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -335,6 +335,6 @@ workflow humanwgs_singleton { # workflow metadata String workflow_name = "humanwgs_singleton" - String workflow_version = "v3.0.0-alpha2" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } From 82513c7b128015da2ce1c407186589eceae76234 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Mon, 7 Jul 2025 17:35:52 -0700 Subject: [PATCH 51/61] clean up version numbers --- workflows/family.wdl | 2 +- workflows/singleton.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/family.wdl b/workflows/family.wdl index 4d70de12..7ad2da0d 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -399,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v3.0.0-alpha2" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 691e3e36..65837531 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -335,6 +335,6 @@ workflow humanwgs_singleton { # workflow metadata String workflow_name = "humanwgs_singleton" - String workflow_version = "v3.0.0-alpha2" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } From a85c6020e72b2b08f33209992563c3c6dbe4ae5e Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 11 Jul 2025 12:41:51 -0400 Subject: [PATCH 52/61] fix: Add pbtk to the image manifest. (#230) * fix: Add pbtk to the image manifest. --- image_manifest.txt | 1 + wdl-ci.config.json | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/image_manifest.txt b/image_manifest.txt index 30e9d2f6..938937c3 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -6,6 +6,7 @@ quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01a quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859 quay.io/pacbio/pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4 +quay.io/pacbio/pbtk@sha256:67cd438ed9f343f90f058108170ddbff8fb1d9b5c193f4016be42b737ee2e73c quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 quay.io/pacbio/sawfish@sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44 quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 617fd479..8d269fd3 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1433,7 +1433,7 @@ }, "coverage_dropouts": { "key": "coverage_dropouts", - "digest": "oecyhm3k4zvuuwkcd5x3jll37pwaewwk", + "digest": "rtq5s4pftxqs4d7xkpz3dyyct3a2seyx", "tests": [ { "inputs": { @@ -1774,7 +1774,7 @@ }, "split_input_bam": { "key": "split_input_bam", - "digest": "b7xnykjq4l2ecpvku2gxuswdnrnk3xka", + "digest": "uzdcqs4d3i62rs62hwoa5pe65uv7l2cm", "tests": [ { "inputs": { From 5eae69c765e44598aacaea7d5650c894fd6202e4 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 11 Jul 2025 16:57:54 -0400 Subject: [PATCH 53/61] fix: GLnexus inputs should be gVCFs, not VCFs (#231) (#232) --- workflows/family.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/family.wdl b/workflows/family.wdl index 7ad2da0d..a47a5ab6 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -145,8 +145,8 @@ workflow humanwgs_family { input: family_id = family.family_id, sample_ids = sample_id, - gvcfs = upstream.small_variant_vcf, - gvcf_indices = upstream.small_variant_vcf_index, + gvcfs = upstream.small_variant_gvcf, + gvcf_indices = upstream.small_variant_gvcf_index, discover_tars = upstream.discover_tar, aligned_bams = upstream.out_bam, aligned_bam_indices = upstream.out_bam_index, From 24c703dcc6ce8d808cd71cc97f24c3a532f02f07 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 11 Jul 2025 14:02:28 -0700 Subject: [PATCH 54/61] docs: update version numbers for v3.0.1 release --- README.md | 8 ++++---- workflows/family.wdl | 2 +- workflows/singleton.wdl | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 468f472c..f59cc4ac 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized: -1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.0): +1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.1): ```bash - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0/hifi-human-wgs-singleton.zip - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.0/hifi-human-wgs-family.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.1/hifi-human-wgs-singleton.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.1/hifi-human-wgs-family.zip ``` 2) Clone the repository and initialize the submodules: ```bash git clone \ - --depth 1 --branch v3.0.0 \ + --depth 1 --branch v3.0.1 \ --recursive \ https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git ``` diff --git a/workflows/family.wdl b/workflows/family.wdl index a47a5ab6..a4fe8676 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -399,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v3.0.0" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 65837531..6aca8331 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -335,6 +335,6 @@ workflow humanwgs_singleton { # workflow metadata String workflow_name = "humanwgs_singleton" - String workflow_version = "v3.0.0" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } From 4d01f004735b79718005841e5a0432ef56e5bd8a Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 17 Jul 2025 13:36:40 -0400 Subject: [PATCH 55/61] feat: Script to populate miniwdl_singularity_cache with image_manifest.txt. (#236) --- docs/backend-hpc.md | 2 + scripts/populate_miniwdl_singularity_cache.sh | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 scripts/populate_miniwdl_singularity_cache.sh diff --git a/docs/backend-hpc.md b/docs/backend-hpc.md index 64e4b50a..9f562765 100644 --- a/docs/backend-hpc.md +++ b/docs/backend-hpc.md @@ -48,6 +48,8 @@ See [the inputs section of the singleton README](./singleton.md#inputs) for more miniwdl run workflows/singleton.wdl --input ``` +If your compute nodes cannot contact the internet, you can use the script at [`./scripts/populate_miniwdl_singularity_cache.sh`](../scripts/populate_miniwdl_singularity_cache.sh) with the image manifest at [`./image_manifest.txt`](../image_manifest.txt) to populate the miniwdl singularity cache with the required images from a login node with internet access. + #### Running via Cromwell ```bash diff --git a/scripts/populate_miniwdl_singularity_cache.sh b/scripts/populate_miniwdl_singularity_cache.sh new file mode 100644 index 00000000..3121c1bb --- /dev/null +++ b/scripts/populate_miniwdl_singularity_cache.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -eo pipefail + +USAGE="Given a manifest file with docker images, this script populates the Singularity cache with those images. +Usage: $0 " + + +# Check if the first argument is -h or --help +if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + echo -e "${USAGE}" + exit 0 +fi +# Check if at least two arguments are provided +if [ $# -lt 2 ]; then + echo -e "${USAGE}" + exit 1 +fi + +image_manifest_file=$1 +miniwdl_singularity_cache_dir=$2 +# Check if the image manifest file exists and is readable, and if the cache directory exists and is writable +[ -r "${image_manifest_file}" ] || (echo "${image_manifest_file} is not readable." >&2 && exit 1) +if [ ! -d "${miniwdl_singularity_cache_dir}" ]; then + echo "${miniwdl_singularity_cache_dir} does not exist. Creating it now..." + mkdir -p "${miniwdl_singularity_cache_dir}" || (echo "Could not create ${miniwdl_singularity_cache_dir}/." >&2 && exit 1) +fi +[ -w "${miniwdl_singularity_cache_dir}" ] || (echo "${miniwdl_singularity_cache_dir}/ is not writable" >&2 && exit 1) +singularity --version || (echo "singularity is not in path. Please install Singularity to use this script." >&2 && exit 1) + +# manifest file should contain one image per line, with no empty lines +# image lines should be in the format: : or @sha256: +# e.g., google/deepvariant:1.9.0 or quay.io/pacbio/some_image@sha256:abc123... +while read -r image; do + if [[ -n "${image}" ]]; then + image_url="docker://${image}" + # miniwdl singularity backend replaces ':' and '/' with '_' in the SIF file name + sif_path="${image_url//:/_}" + sif_path="${sif_path//\//_}" + sif_path="${miniwdl_singularity_cache_dir}/${sif_path}.sif" + if [ -f "${sif_path}" ]; then + echo "Singularity image already exists: ${sif_path}" >&2 + else + echo "Pulling Singularity image: ${image_url}" + singularity pull "${sif_path}" "${image_url}" + fi + fi +done < "${image_manifest_file}" From 1d1764a0043b826d27f7c886e363caf6b005cefc Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 17 Jul 2025 13:39:30 -0400 Subject: [PATCH 56/61] fix: Add example_tfrecord_tars to disk_size for deepvariant_postprocess_variants. (#235) * fix: Add example_tfrecord_tars to disk_size for deepvariant_postprocess_variants. --- wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 8d269fd3..3d803be3 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1686,7 +1686,7 @@ }, "deepvariant_postprocess_variants": { "key": "deepvariant_postprocess_variants", - "digest": "bpmzthuph6iudzhtwxu2uw5myotxykva", + "digest": "7j2ndq2oubqpprkj3xqp7z32eszuqb44", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index dd64e2a3..393b16ba 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit dd64e2a3f20b4a0912cf4f2d1b82ed2b5a1c8fb9 +Subproject commit 393b16bacca59a0f503759ad7c2c7b54a63e8e27 From 29de62af3bc38deefe069faa4f316e0829353e1c Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Thu, 24 Jul 2025 17:21:11 -0400 Subject: [PATCH 57/61] bump: Update Sawfish to 2.0.3 and Sawshark to 0.3.0 (#239) * bump: update sawfish to 2.0.3 and sawshark to 0.3.0 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 4 ++-- workflows/wdl-common | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index bb224a7f..2673debc 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -13,7 +13,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | pb_wdl_base |
  • htslib 1.20
  • bcftools 1.20
  • samtools 1.20
  • bedtools 2.31.0
  • python3.9
  • numpy 1.24.24
  • pandas 2.0.3
  • matplotlib 3.7.5
  • seaborn 0.13.2
  • pysam 0.22.1
  • vcfpy 0.13.8
  • biopython 1.83
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/pb_wdl_base) | [sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87](https://quay.io/repository/pacbio/pb_wdl_base/manifest/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87) | | pbmm2 |
  • pbmm2 1.17.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/9591749da92ca57f7283ca1c2268789c45fa341d/docker/pbmm2) | [pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859](https://quay.io/repository/pacbio/pbmm2/manifest/sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039dca3d3725859) | | mosdepth |
  • mosdepth 0.3.9
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/fa84fbf582738c05c750e667ff43d11552ad4183/docker/mosdepth) | [mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1](https://quay.io/repository/pacbio/mosdepth/manifest/sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1) | -| sawfish |
  • sawfish 2.0.1
  • sawshark 0.2.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/40c2aef86e409f522bc8e06fbbdff30edc2110a0/docker/sawfish) | [sawfish@sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44](https://quay.io/repository/pacbio/sawfish/manifest/sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44) | +| sawfish |
  • sawfish 2.0.3
  • sawshark 0.3.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/124a1d97513ddf6caf2b4605832cccd904def609/docker/sawfish) | [sawfish@sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281](https://quay.io/repository/pacbio/sawfish/manifest/sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281) | | trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | | mitorsaw |
  • mitorsaw 0.2.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/36b0935f4d63ff5d51e953263d0dc2aaf2cddbfa/docker/mitorsaw) | [mitorsaw@sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c) | diff --git a/image_manifest.txt b/image_manifest.txt index 938937c3..f25edf7f 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -8,7 +8,7 @@ quay.io/pacbio/pbmm2@sha256:5f3f4d1f5dbea5cd4c388ee26b2fecbbb7dbcef449343633e039 quay.io/pacbio/pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4 quay.io/pacbio/pbtk@sha256:67cd438ed9f343f90f058108170ddbff8fb1d9b5c193f4016be42b737ee2e73c quay.io/pacbio/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87 -quay.io/pacbio/sawfish@sha256:dc7a955175967b4a5c4be9c438243332bc22f41e8240bb3b5c84699741248a44 +quay.io/pacbio/sawfish@sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281 quay.io/pacbio/slivar@sha256:f71a27f756e2d69ec30949cbea97c54abbafde757562a98ef965f21a28aa8eaa quay.io/pacbio/svpack@sha256:628e9851e425ed8044a907d33de04043d1ef02d4d2b2667cf2e9a389bb011eba quay.io/pacbio/trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284 diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 3d803be3..380ffbba 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1986,7 +1986,7 @@ "tasks": { "sawfish_discover": { "key": "sawfish_discover", - "digest": "mluhmv4fzusd5rltn3uffyrcb3w6oary", + "digest": "w6ibz5424mjp5xgg3t3cd3gjklbjbsj3", "tests": [ { "inputs": { @@ -2018,7 +2018,7 @@ }, "sawfish_call": { "key": "sawfish_call", - "digest": "rn2lgqleychf2ppcobtco26uzf4hynpj", + "digest": "zm7w4vzg4ncahd3464imevnoihzvkn6q", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 393b16ba..2723832d 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 393b16bacca59a0f503759ad7c2c7b54a63e8e27 +Subproject commit 2723832d0c2124b695ebcac583bd636086b19a4c From 47dd59ea5da76cc1ba0a10c7b70aabf4b28d0248 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 25 Jul 2025 16:31:44 -0400 Subject: [PATCH 58/61] bump: update mitorsaw to 0.2.2 (#240) * bump: update mitorsaw to 0.2.2 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 2673debc..21675bae 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -16,7 +16,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | sawfish |
  • sawfish 2.0.3
  • sawshark 0.3.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/124a1d97513ddf6caf2b4605832cccd904def609/docker/sawfish) | [sawfish@sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281](https://quay.io/repository/pacbio/sawfish/manifest/sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281) | | trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | -| mitorsaw |
  • mitorsaw 0.2.1
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/36b0935f4d63ff5d51e953263d0dc2aaf2cddbfa/docker/mitorsaw) | [mitorsaw@sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c) | +| mitorsaw |
  • mitorsaw 0.2.2
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3939015459a5432e3724cfa1bae3681c8f45a914/docker/mitorsaw) | [mitorsaw@sha256:d6137dfb1a0c82af77837a5c6222287fde57f3fb09691fd52135f572bae3d866](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:d6137dfb1a0c82af77837a5c6222287fde57f3fb09691fd52135f572bae3d866) | | paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | | pbstarphase |
  • pbstarphase 1.4.1
  • Database 20250515
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5166b28e43f36a381450ba479e2e34a841bb922/docker/pbstarphase) | [pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | diff --git a/image_manifest.txt b/image_manifest.txt index f25edf7f..de88e49a 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,6 +1,6 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e quay.io/pacbio/hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482 -quay.io/pacbio/mitorsaw@sha256:1509dbd7b0a815c7ceb3af52fddc93ef3544ae1858483139450fa0285f8dbe0c +quay.io/pacbio/mitorsaw@sha256:d6137dfb1a0c82af77837a5c6222287fde57f3fb09691fd52135f572bae3d866 quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 380ffbba..b968b35d 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1138,7 +1138,7 @@ "tasks": { "mitorsaw": { "key": "mitorsaw", - "digest": "zs5hcur6jvanxdlx3kiszbrcfc3fl57v", + "digest": "3uo2zfths27axcxni4vy2i5py7vryech", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 2723832d..616f2b6e 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 2723832d0c2124b695ebcac583bd636086b19a4c +Subproject commit 616f2b6e7f450eed7a3875f6ea25b17c22c8b1a6 From cc34fcd82b93f435eda5ab07fcda856c4b806adf Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Fri, 25 Jul 2025 16:46:54 -0400 Subject: [PATCH 59/61] docs: Update Zenodo badge URL. (#241) Co-authored-by: Helena --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f59cc4ac..a905f52d 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ At a high level, we have two types of inputs files: The resource bundle containing the GRCh38 reference and other files used in this workflow can be downloaded from Zenodo: -[10.5281/zenodo.15750792](https://zenodo.org/records/14908106) +[10.5281/zenodo.15750792](https://zenodo.org/records/15750792) # Tool versions and Docker images From f656706a7d44bd5bb0389a551752def3b1f2bce8 Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Mon, 28 Jul 2025 13:40:17 -0400 Subject: [PATCH 60/61] bump: Update mitorsaw to v0.2.3 (#242) * bump: Update mitorsaw to v0.2.3 --- docs/tools_containers.md | 2 +- image_manifest.txt | 2 +- wdl-ci.config.json | 2 +- workflows/wdl-common | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools_containers.md b/docs/tools_containers.md index 21675bae..e672426d 100644 --- a/docs/tools_containers.md +++ b/docs/tools_containers.md @@ -16,7 +16,7 @@ We directly use `deepvariant`, `deepvariant-gpu`, `pharmcat`, and `glnexus` cont | sawfish |
  • sawfish 2.0.3
  • sawshark 0.3.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/124a1d97513ddf6caf2b4605832cccd904def609/docker/sawfish) | [sawfish@sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281](https://quay.io/repository/pacbio/sawfish/manifest/sha256:561b6a232dd89a2d186b19d6ad439c74c460078348dbf96ae49ca0ea6eab0281) | | trgt |
  • trgt 3.0.0
  • `/opt/scripts/check_trgt_coverage.py` 0.1.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3c5ee05da7043bd03bd80959c3dd025e25468070/docker/trgt) | [trgt@sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284](https://quay.io/repository/pacbio/trgt/manifest/sha256:301fd3f8c0174213e82dbf942e6f2259aab31a66a7dc3355a3dfc8fcd4286284) | | hiphase |
  • hiphase 1.5.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/69039c010ada793bab4d38a9bd17a30562b9b671/docker/hiphase) | [hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482](https://quay.io/repository/pacbio/hiphase/manifest/sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482) | -| mitorsaw |
  • mitorsaw 0.2.2
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3939015459a5432e3724cfa1bae3681c8f45a914/docker/mitorsaw) | [mitorsaw@sha256:d6137dfb1a0c82af77837a5c6222287fde57f3fb09691fd52135f572bae3d866](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:d6137dfb1a0c82af77837a5c6222287fde57f3fb09691fd52135f572bae3d866) | +| mitorsaw |
  • mitorsaw 0.2.3
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/dd6d0b7c3953211ee0ea074283f42329998aeff7/docker/mitorsaw) | [mitorsaw@sha256:4a1eac52a6ae80b7ccfb0ad3809f5f34a69c7ed859541e097e36d73623e8ad0e](https://quay.io/repository/pacbio/mitorsaw/manifest/sha256:4a1eac52a6ae80b7ccfb0ad3809f5f34a69c7ed859541e097e36d73623e8ad0e) | | paraphase |
  • paraphase 3.3.2
  • minimap 2.28
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/28c84c386e28ce0a46587e4f1bf85db824bb4634/docker/paraphase) | [paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920](https://quay.io/repository/pacbio/paraphase/manifest/sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920) | | pbstarphase |
  • pbstarphase 1.4.1
  • Database 20250515
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/c5166b28e43f36a381450ba479e2e34a841bb922/docker/pbstarphase) | [pbstarphase@sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4](https://quay.io/repository/pacbio/pbstarphase/manifest/sha256:7daaad3b617a3b8b5914ab0893ee7cc545fd2025a35619211a5b8e25e4c36ac4) | | pb-cpg-tools |
  • pb-cpg-tools 3.0.0
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/330b99b79f32b2d2598e812779f3c64460739e6c/docker/pb-cpg-tools) | [pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c](https://quay.io/repository/pacbio/pb-cpg-tools/manifest/sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c) | diff --git a/image_manifest.txt b/image_manifest.txt index de88e49a..5e199050 100644 --- a/image_manifest.txt +++ b/image_manifest.txt @@ -1,6 +1,6 @@ quay.io/pacbio/glnexus@sha256:ce6fecf59dddc6089a8100b31c29c1e6ed50a0cf123da9f2bc589ee4b0c69c8e quay.io/pacbio/hiphase@sha256:353b4ffdae4281bdd5daf5a73ea3bb26ea742ef2c36e9980cb1f1ed524a07482 -quay.io/pacbio/mitorsaw@sha256:d6137dfb1a0c82af77837a5c6222287fde57f3fb09691fd52135f572bae3d866 +quay.io/pacbio/mitorsaw@sha256:4a1eac52a6ae80b7ccfb0ad3809f5f34a69c7ed859541e097e36d73623e8ad0e quay.io/pacbio/mosdepth@sha256:63f7a5d1a4a17b71e66d755d3301a951e50f6b63777d34dab3ee9e182fd7acb1 quay.io/pacbio/paraphase@sha256:e2f904111a43e8f055681112294e0f05ff2839d9801fc01ac39a17c841016920 quay.io/pacbio/pb-cpg-tools@sha256:afd5468a423fe089f1437d525fdc19c704296f723958739a6fe226caa01fba1c diff --git a/wdl-ci.config.json b/wdl-ci.config.json index b968b35d..277565e6 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -1138,7 +1138,7 @@ "tasks": { "mitorsaw": { "key": "mitorsaw", - "digest": "3uo2zfths27axcxni4vy2i5py7vryech", + "digest": "vzdinv7ullc2eobkrznr47mtjchvzuv4", "tests": [ { "inputs": { diff --git a/workflows/wdl-common b/workflows/wdl-common index 616f2b6e..2ae390d4 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 616f2b6e7f450eed7a3875f6ea25b17c22c8b1a6 +Subproject commit 2ae390d4ed6b80dd2a2ef10c960832ffa8c7d1d3 From 3f9b5bfb68f7907ad33e020e33b348bc07730165 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Mon, 28 Jul 2025 14:40:23 -0700 Subject: [PATCH 61/61] docs: bump workflow version string --- README.md | 8 ++++---- workflows/family.wdl | 2 +- workflows/singleton.wdl | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a905f52d..0e7ed824 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,18 @@ Both workflows are designed to analyze human PacBio whole genome sequencing (WGS This is an actively developed workflow with multiple versioned releases, and we make use of git submodules for common tasks that are shared by multiple workflows. There are two ways to ensure you are using a supported release of the workflow and ensure that the submodules are correctly initialized: -1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.1): +1) Download the release zips directly from a [supported release](https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/tag/v3.0.2): ```bash - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.1/hifi-human-wgs-singleton.zip - wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.1/hifi-human-wgs-family.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.2/hifi-human-wgs-singleton.zip + wget https://github.com/PacificBiosciences/HiFi-human-WGS-WDL/releases/download/v3.0.2/hifi-human-wgs-family.zip ``` 2) Clone the repository and initialize the submodules: ```bash git clone \ - --depth 1 --branch v3.0.1 \ + --depth 1 --branch v3.0.2 \ --recursive \ https://github.com/PacificBiosciences/HiFi-human-WGS-WDL.git ``` diff --git a/workflows/family.wdl b/workflows/family.wdl index a4fe8676..e7e3932e 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -399,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v3.0.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.2" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index 6aca8331..2191606b 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -335,6 +335,6 @@ workflow humanwgs_singleton { # workflow metadata String workflow_name = "humanwgs_singleton" - String workflow_version = "v3.0.1" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.2" + if defined(debug_version) then "~{"-" + debug_version}" else "" } }