Standardized stat names, placing aggregator function on the right (e.g. read_count, depth_mean, etc).

williamrowell · williamrowell · commit db3bb4880614 · 2025-09-04T13:45:52.000-07:00
diff --git a/docs/bam_statistics.md b/docs/bam_statistics.md
@@ -24,14 +24,14 @@ A histogram of read qualities, using only records marked `prim` or `unmapped`. T
 
 A histogram of mapping qualities and gap-compressed identities, respectively.
 
-## `stat_num_reads`, `stat_read_length_mean`, `stat_read_length_median`, `stat_read_length_n50`, `stat_read_quality_mean`, `stat_read_quality_median`
+## `stat_read_count`, `stat_read_length_mean`, `stat_read_length_median`, `stat_read_length_n50`, `stat_read_quality_mean`, `stat_read_quality_median`
 
 Statistics computed using only records marked `prim` or `unmapped`.
 
 ## `stat_mapped_read_count`, `stat_mapped_percent`
 
 Count of primary alignments, and primary alignments as a percentage of total reads.
 
-## `stat_mean_gap_compressed_identity`
+## `stat_gap_compressed_identity_mean`, `stat_gap_compressed_identity_median`
 
-Mean gap-compressed identity of primary and supplementary alignments.
+Summary of gap-compressed identity for primary and supplementary alignments.
diff --git a/docs/family.md b/docs/family.md
@@ -180,17 +180,18 @@ The `Sample` struct contains sample specific data and metadata. The struct has t
 | Array\[File\] | mosdepth_depth_distribution_plot |  |  |
 | Array\[File\] | mapq_distribution_plot | Distribution of mapping quality per alignment | |
 | Array\[File\] | mg_distribution_plot | Distribution of gap-compressed identity score per alignment | |
-| Array\[String\] | stat_num_reads | Number of reads |  |
+| Array\[String\] | stat_read_count | Number of reads |  |
 | Array\[String\] | stat_read_length_mean | Mean read length |  |
 | Array\[String\] | stat_read_length_median | Median read length |  |
 | Array\[String\] | stat_read_length_n50 | Read length N50 |  |
 | Array\[String\] | stat_read_quality_mean | Mean read quality |  |
 | Array\[String\] | stat_read_quality_median | Median read quality |  |
 | Array\[String\] | stat_mapped_read_count | Count of reads mapped to reference |  |
-| Array\[String\] | stat_mapped_percent | Percent of reads mapped to reference |  |
-| Array\[String\] | stat_mean_gap_compressed_identity | Mean gap-compressed identity |  |
+| Array\[String\] | stat_mapped_read_percent | Percent of reads mapped to reference |  |
+| Array\[String\] | stat_gap_compressed_identity_mean | Mean gap-compressed identity |  |
+| Array\[String\] | stat_gap_compressed_identity_median | Median gap-compressed identity |  |
 | Array\[String\] | inferred_sex | Inferred sex | Sex is inferred based on relative depth of chrY alignments. |
-| Array\[String\] | stat_mean_depth | Mean depth | |
+| Array\[String\] | stat_depth_mean | Mean depth | |
 
 ### Small Variants (<50 bp)
 
diff --git a/docs/singleton.md b/docs/singleton.md
@@ -136,17 +136,18 @@ flowchart TD
 | File | mosdepth_depth_distribution_plot |  |  |
 | File | mapq_distribution_plot | Distribution of mapping quality per alignment | |
 | File | mg_distribution_plot | Distribution of gap-compressed identity score per alignment | |
-| String | stat_num_reads | Number of reads |  |
+| String | stat_read_count | Number of reads |  |
 | String | stat_read_length_mean | Mean read length |  |
 | String | stat_read_length_median | Median read length |  |
 | String | stat_read_length_n50 | Read length N50 |  |
 | String | stat_read_quality_mean | Mean read quality |  |
 | String | stat_read_quality_median | Median read quality |  |
 | String | stat_mapped_read_count | Count of reads mapped to reference |  |
-| String | stat_mapped_percent | Percent of reads mapped to reference |  |
-| String | stat_mean_gap_compressed_identity | Mean gap-compressed identity |  |
+| String | stat_mapped_read_percent | Percent of reads mapped to reference |  |
+| String | stat_gap_compressed_identity_mean | Mean gap-compressed identity |  |
+| String | stat_gap_compressed_identity_median | Median gap-compressed identity |  |
 | String | inferred_sex | Inferred sex | Sex is inferred based on relative depth of chrY alignments. |
-| String | stat_mean_depth | Mean depth | |
+| String | stat_depth_mean | Mean depth | |
 
 ### Small Variants (<50 bp)
 
diff --git a/wdl-ci.config.json b/wdl-ci.config.json
@@ -377,7 +377,7 @@
                     "png_validator"
                   ]
                 },
-                "stat_num_reads": {
+                "stat_read_count": {
                   "value": "27398",
                   "test_tasks": [
                     "compare_string"
@@ -419,13 +419,19 @@
                     "compare_string"
                   ]
                 },
-                "stat_mapped_percent": {
+                "stat_mapped_read_percent": {
                   "value": "100.0",
                   "test_tasks": [
                     "compare_string"
                   ]
                 },
-                "stat_mean_gap_compressed_identity": {
+                "stat_gap_compressed_identity_mean": {
+                  "value": "99.77",
+                  "test_tasks": [
+                    "compare_string"
+                  ]
+                },
+                "stat_gap_compressed_identity_median": {
                   "value": "99.77",
                   "test_tasks": [
                     "compare_string"
@@ -1225,7 +1231,7 @@
                     "png_validator"
                   ]
                 },
-                "stat_mean_depth": {
+                "stat_depth_mean": {
                   "value": "0.07",
                   "test_tasks": [
                     "compare_string"
@@ -1274,7 +1280,7 @@
                     "png_validator"
                   ]
                 },
-                "stat_mean_depth": {
+                "stat_depth_mean": {
                   "value": "0.07",
                   "test_tasks": [
                     "compare_string"
diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl
@@ -216,21 +216,22 @@ workflow downstream {
     String stat_phase_block_ng50          = hiphase.stat_phase_block_ng50
 
     # bam stats
-    File   bam_statistics                    = bam_stats.bam_statistics
-    File   read_length_plot                  = bam_stats.read_length_plot
-    File?  read_quality_plot                 = bam_stats.read_quality_plot
-    File   mapq_distribution_plot            = bam_stats.mapq_distribution_plot
-    File   mg_distribution_plot              = bam_stats.mg_distribution_plot
-    String stat_num_reads                    = bam_stats.stat_num_reads
-    String stat_read_length_mean             = bam_stats.stat_read_length_mean
-    String stat_read_length_median           = bam_stats.stat_read_length_median
-    String stat_read_length_n50              = bam_stats.stat_read_length_n50
-    String stat_read_quality_mean            = bam_stats.stat_read_quality_mean
-    String stat_read_quality_median          = bam_stats.stat_read_quality_median
-    String stat_mapped_read_count            = bam_stats.stat_mapped_read_count
-    String stat_mapped_percent               = bam_stats.stat_mapped_percent
-    String stat_mean_gap_compressed_identity = bam_stats.stat_mean_gap_compressed_identity
-    File   trgt_coverage_dropouts            = coverage_dropouts.dropouts
+    File   bam_statistics                      = bam_stats.bam_statistics
+    File   read_length_plot                    = bam_stats.read_length_plot
+    File?  read_quality_plot                   = bam_stats.read_quality_plot
+    File   mapq_distribution_plot              = bam_stats.mapq_distribution_plot
+    File   mg_distribution_plot                = bam_stats.mg_distribution_plot
+    String stat_read_count                     = bam_stats.stat_read_count
+    String stat_read_length_mean               = bam_stats.stat_read_length_mean
+    String stat_read_length_median             = bam_stats.stat_read_length_median
+    String stat_read_length_n50                = bam_stats.stat_read_length_n50
+    String stat_read_quality_mean              = bam_stats.stat_read_quality_mean
+    String stat_read_quality_median            = bam_stats.stat_read_quality_median
+    String stat_mapped_read_count              = bam_stats.stat_mapped_read_count
+    String stat_mapped_read_percent            = bam_stats.stat_mapped_read_percent
+    String stat_gap_compressed_identity_mean   = bam_stats.stat_gap_compressed_identity_mean
+    String stat_gap_compressed_identity_median = bam_stats.stat_gap_compressed_identity_median
+    File   trgt_coverage_dropouts              = coverage_dropouts.dropouts
 
     # small variant stats
     File   small_variant_stats     = bcftools_stats_roh_small_variants.stats
diff --git a/workflows/family.wdl b/workflows/family.wdl
@@ -236,16 +236,17 @@ workflow humanwgs_family {
 
     Map[String, Array[String]] stats = {
     'sample_id': sample_id,
-    'num_reads': downstream.stat_num_reads,
+    'read_count': downstream.stat_read_count,
     'read_length_mean': downstream.stat_read_length_mean,
     'read_length_median': downstream.stat_read_length_median,
     'read_length_n50': downstream.stat_read_length_n50,
     'read_quality_mean': downstream.stat_read_quality_mean,
     'read_quality_median': downstream.stat_read_quality_median,
     'mapped_read_count': downstream.stat_mapped_read_count,
-    'mapped_percent': downstream.stat_mapped_percent,
-    'mean_gap_compressed_identity': downstream.stat_mean_gap_compressed_identity,
-    'mean_depth': upstream.stat_mean_depth,
+    'mapped_read_percent': downstream.stat_mapped_read_percent,
+    'gap_compressed_identity_mean': downstream.stat_gap_compressed_identity_mean,
+    'gap_compressed_identity_median': downstream.stat_gap_compressed_identity_median,
+    'depth_mean': upstream.stat_depth_mean,
     'inferred_sex': upstream.inferred_sex,
     'stat_phased_basepairs': downstream.stat_phased_basepairs,
     'phase_block_ng50': downstream.stat_phase_block_ng50,
@@ -284,20 +285,21 @@ workflow humanwgs_family {
     File  msg_file           = consolidate_stats.messages
 
     # bam stats
-    Array[File]   bam_statistics                    = downstream.bam_statistics
-    Array[File]   read_length_plot                  = downstream.read_length_plot
-    Array[File?]  read_quality_plot                 = downstream.read_quality_plot
-    Array[File]   mapq_distribution_plot            = downstream.mapq_distribution_plot
-    Array[File]   mg_distribution_plot              = downstream.mg_distribution_plot
-    Array[String] stat_num_reads                    = downstream.stat_num_reads
-    Array[String] stat_read_length_mean             = downstream.stat_read_length_mean
-    Array[String] stat_read_length_median           = downstream.stat_read_length_median
-    Array[String] stat_read_length_n50              = downstream.stat_read_length_n50
-    Array[String] stat_read_quality_mean            = downstream.stat_read_quality_mean
-    Array[String] stat_read_quality_median          = downstream.stat_read_quality_median
-    Array[String] stat_mapped_read_count            = downstream.stat_mapped_read_count
-    Array[String] stat_mapped_percent               = downstream.stat_mapped_percent
-    Array[String] stat_mean_gap_compressed_identity = downstream.stat_mean_gap_compressed_identity
+    Array[File]   bam_statistics                      = downstream.bam_statistics
+    Array[File]   read_length_plot                    = downstream.read_length_plot
+    Array[File?]  read_quality_plot                   = downstream.read_quality_plot
+    Array[File]   mapq_distribution_plot              = downstream.mapq_distribution_plot
+    Array[File]   mg_distribution_plot                = downstream.mg_distribution_plot
+    Array[String] stat_read_count                     = downstream.stat_read_count
+    Array[String] stat_read_length_mean               = downstream.stat_read_length_mean
+    Array[String] stat_read_length_median             = downstream.stat_read_length_median
+    Array[String] stat_read_length_n50                = downstream.stat_read_length_n50
+    Array[String] stat_read_quality_mean              = downstream.stat_read_quality_mean
+    Array[String] stat_read_quality_median            = downstream.stat_read_quality_median
+    Array[String] stat_mapped_read_count              = downstream.stat_mapped_read_count
+    Array[String] stat_mapped_read_percent            = downstream.stat_mapped_read_percent
+    Array[String] stat_gap_compressed_identity_mean   = downstream.stat_gap_compressed_identity_mean
+    Array[String] stat_gap_compressed_identity_median = downstream.stat_gap_compressed_identity_median
 
     # merged, haplotagged alignments
     Array[File]   merged_haplotagged_bam       = downstream.merged_haplotagged_bam
@@ -308,7 +310,7 @@ workflow humanwgs_family {
     Array[File]   mosdepth_region_bed              = upstream.mosdepth_region_bed
     Array[File]   mosdepth_region_bed_index        = upstream.mosdepth_region_bed_index
     Array[File]   mosdepth_depth_distribution_plot = upstream.mosdepth_depth_distribution_plot
-    Array[String] stat_mean_depth                  = upstream.stat_mean_depth
+    Array[String] stat_depth_mean                  = upstream.stat_depth_mean
     Array[String] inferred_sex                     = upstream.inferred_sex
 
     # phasing stats
diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl
@@ -186,16 +186,17 @@ workflow humanwgs_singleton {
 
   Map[String, Array[String]] stats = {
     'sample_id': [sample_id],
-    'num_reads': [downstream.stat_num_reads],
+    'read_count': [downstream.stat_read_count],
     'read_length_mean': [downstream.stat_read_length_mean],
     'read_length_median': [downstream.stat_read_length_median],
     'read_length_n50': [downstream.stat_read_length_n50],
     'read_quality_mean': [downstream.stat_read_quality_mean],
     'read_quality_median': [downstream.stat_read_quality_median],
     'mapped_read_count': [downstream.stat_mapped_read_count],
-    'mapped_percent': [downstream.stat_mapped_percent],
-    'mean_gap_compressed_identity': [downstream.stat_mean_gap_compressed_identity],
-    'mean_depth': [upstream.stat_mean_depth],
+    'mapped_read_percent': [downstream.stat_mapped_read_percent],
+    'gap_compressed_identity_mean': [downstream.stat_gap_compressed_identity_mean],
+    'gap_compressed_identity_median': [downstream.stat_gap_compressed_identity_median],
+    'depth_mean': [upstream.stat_depth_mean],
     'inferred_sex': [upstream.inferred_sex],
     'stat_phased_basepairs': [downstream.stat_phased_basepairs],
     'phase_block_ng50': [downstream.stat_phase_block_ng50],
@@ -233,20 +234,21 @@ workflow humanwgs_singleton {
     File msg_file   = consolidate_stats.messages
 
     # bam stats
-    File   bam_statistics                    = downstream.bam_statistics
-    File   read_length_plot                  = downstream.read_length_plot
-    File?  read_quality_plot                 = downstream.read_quality_plot
-    File   mapq_distribution_plot            = downstream.mapq_distribution_plot
-    File   mg_distribution_plot              = downstream.mg_distribution_plot
-    String stat_num_reads                    = downstream.stat_num_reads
-    String stat_read_length_mean             = downstream.stat_read_length_mean
-    String stat_read_length_median           = downstream.stat_read_length_median
-    String stat_read_length_n50              = downstream.stat_read_length_n50
-    String stat_read_quality_mean            = downstream.stat_read_quality_mean
-    String stat_read_quality_median          = downstream.stat_read_quality_median
-    String stat_mapped_read_count            = downstream.stat_mapped_read_count
-    String stat_mapped_percent               = downstream.stat_mapped_percent
-    String stat_mean_gap_compressed_identity = downstream.stat_mean_gap_compressed_identity
+    File   bam_statistics                      = downstream.bam_statistics
+    File   read_length_plot                    = downstream.read_length_plot
+    File?  read_quality_plot                   = downstream.read_quality_plot
+    File   mapq_distribution_plot              = downstream.mapq_distribution_plot
+    File   mg_distribution_plot                = downstream.mg_distribution_plot
+    String stat_read_count                     = downstream.stat_read_count
+    String stat_read_length_mean               = downstream.stat_read_length_mean
+    String stat_read_length_median             = downstream.stat_read_length_median
+    String stat_read_length_n50                = downstream.stat_read_length_n50
+    String stat_read_quality_mean              = downstream.stat_read_quality_mean
+    String stat_read_quality_median            = downstream.stat_read_quality_median
+    String stat_mapped_read_count              = downstream.stat_mapped_read_count
+    String stat_mapped_read_percent            = downstream.stat_mapped_read_percent
+    String stat_gap_compressed_identity_mean   = downstream.stat_gap_compressed_identity_mean
+    String stat_gap_compressed_identity_median = downstream.stat_gap_compressed_identity_median
 
     # merged, haplotagged alignments
     File   merged_haplotagged_bam       = downstream.merged_haplotagged_bam
@@ -257,7 +259,7 @@ workflow humanwgs_singleton {
     File   mosdepth_region_bed              = upstream.mosdepth_region_bed
     File   mosdepth_region_bed_index        = upstream.mosdepth_region_bed_index
     File   mosdepth_depth_distribution_plot = upstream.mosdepth_depth_distribution_plot
-    String stat_mean_depth                  = upstream.stat_mean_depth
+    String stat_depth_mean                  = upstream.stat_depth_mean
     String inferred_sex                     = upstream.inferred_sex
 
     # phasing stats
diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl
@@ -272,7 +272,7 @@ workflow upstream {
     File   mosdepth_region_bed_index        = mosdepth.region_bed_index
     File   mosdepth_depth_distribution_plot = mosdepth.depth_distribution_plot
     String inferred_sex                     = mosdepth.inferred_sex
-    String stat_mean_depth                  = mosdepth.stat_mean_depth
+    String stat_depth_mean                  = mosdepth.stat_depth_mean
 
     # per sample sv signatures
     File discover_tar = sawfish_discover.discover_tar
diff --git a/workflows/wdl-common b/workflows/wdl-common
@@ -1 +1 @@
-Subproject commit 7e31fe701e8944ad9e491bf1073dd85d880938bb
+Subproject commit 61951a01546d50533593abfa083edefcf869c1d6