@@ -7,11 +7,12 @@ import "../../TechAgnostic/Utility/SplitBamByReadgroup.wdl" as Major
77import "../../../tasks/Utility/Utils.wdl"
88import "../../../tasks/Utility/BAMutils.wdl" as BU
99import "../../../tasks/Utility/ONTUtils.wdl" as OU
10+ import "../../../tasks/Utility/GeneralUtils.wdl" as GU
1011
1112workflow RemoveDuplicateFromMergedONTBamAndSplitByReadgroup {
1213
1314 meta {
14- desciption : "Remove duplicate records from an ONT alinged BAM, drop alignment information, and split by the bam by read groups."
15+ description : "Remove duplicate records from an ONT alinged BAM, drop alignment information, and split by the bam by read groups."
1516 }
1617 parameter_meta {
1718 fix_bam_header : "Sometimes, the bam given to us contains a specific error mode. We fix it here."
@@ -36,6 +37,10 @@ workflow RemoveDuplicateFromMergedONTBamAndSplitByReadgroup {
3637 Map [String , String ]? rgid_2_ubam_emptyness = WORKHORSE .rgid_2_ubam_emptyness
3738 Boolean rgid_2_bam_are_aligned = WORKHORSE .rgid_2_bam_are_aligned
3839
40+ Map [String , Int ] mol_counts = {'Original' : CountGCCMolecules .number ,
41+ 'Deduplicated' : CountDedupMolecules .sum ,
42+ 'SplitSum' : CountFinalMolecules .sum }
43+
3944 String last_processing_date = WORKHORSE .last_processing_date
4045 }
4146
@@ -47,6 +52,7 @@ workflow RemoveDuplicateFromMergedONTBamAndSplitByReadgroup {
4752 if ('coordinate' != GatherBamMetadata .sort_order ) {
4853 call Utils .StopWorkflow { input : reason = "Input bam isn't coordinate-sorted, but rather sorted by ~{GatherBamMetadata .sort_order }" }
4954 }
55+ call BU .CountMolecules as CountGCCMolecules { input : bam = input_bam , localize_bam = true }
5056
5157 # reality of life--submitted files sometimes need fixings in their headers
5258 if (fix_bam_header ) {
@@ -56,6 +62,7 @@ workflow RemoveDuplicateFromMergedONTBamAndSplitByReadgroup {
5662 call FixAndReset .DeduplicateAndResetONTAlignedBam as Dedup { input :
5763 aligned_bam = select_first ([FixParticularBamHeaderIssue .fixed , input_bam ]), aligned_bai = input_bai , scatter_scheme = scatter_scheme
5864 }
65+ call GU .AddIntegers as CountDedupMolecules { input : integers = Dedup .deduplicated_molecule_counts_per_shard }
5966
6067 File ok_input_bam = Dedup .result
6168
@@ -73,7 +80,9 @@ workflow RemoveDuplicateFromMergedONTBamAndSplitByReadgroup {
7380 gcs_out_root_dir = gcs_out_root_dir ,
7481 debug_mode = false
7582 }
76-
83+ call GU .CoerceMapToArrayOfPairs { input : input_map = WORKHORSE .rgid_2_bam }
84+ call GU .Unzip { input : apss = CoerceMapToArrayOfPairs .output_pairs }
85+ call GU .AddIntegers as CountFinalMolecules { input : integers = Unzip .res .right }
7786 call OU .GetBasecallModel { input : bam = ok_input_bam }
7887}
7988
0 commit comments