Skip to content

Commit 9d91d1f

Browse files
committed
Enhance rank_and_assign_bins and rank_array_element_metrics with prefix support
- Added a `prefix` parameter to `rank_and_assign_bins` to allow customization of the output field names for ranks and bins. - Updated the `rank_array_element_metrics` function to accept a `rank_field_prefix` parameter, passing it through to `rank_and_assign_bins` for consistent naming in rank structs. - Adjusted documentation to reflect the new parameters and their default values.
1 parent a50dcc6 commit 9d91d1f

File tree

1 file changed

+18
-9
lines changed

1 file changed

+18
-9
lines changed

gnomad/utils/constraint.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,6 +2705,7 @@ def add_gencode_transcript_annotations(
27052705
def rank_and_assign_bins(
27062706
value_expr: hl.expr.Float64Expression,
27072707
bin_granularities: Optional[Dict[str, int]] = None,
2708+
prefix: str = "",
27082709
) -> hl.StructExpression:
27092710
"""Rank rows by a numeric expression and assign bin labels.
27102711
@@ -2714,19 +2715,21 @@ def rank_and_assign_bins(
27142715
bins are assigned by comparing values against pre-computed boundary values.
27152716
27162717
Rows are ordered ascending by ``value_expr``. Each row is assigned a
2717-
0-based ``rank`` and a ``bin_{name}`` field for every entry in
2718-
``bin_granularities``, computed as
2718+
0-based ``{prefix}rank`` and a ``{prefix}bin_{name}`` field for every
2719+
entry in ``bin_granularities``, computed as
27192720
``hl.int(rank * multiplier / n_rows)``.
27202721
27212722
Used by :func:`rank_array_element_metrics` to rank metrics within array
27222723
elements.
27232724
27242725
:param value_expr: Numeric expression to rank by (ascending).
27252726
:param bin_granularities: Mapping of bin name to multiplier. Each entry
2726-
produces a ``bin_{name}`` field. Default is
2727+
produces a ``{prefix}bin_{name}`` field. Default is
27272728
``{"percentile": 100, "decile": 10, "sextile": 6}``.
2728-
:return: Struct with ``rank`` and ``bin_{name}`` fields for each entry in
2729-
``bin_granularities``.
2729+
:param prefix: String prepended to ``rank`` and ``bin_{name}`` field
2730+
names. Default is ``""`` (no prefix).
2731+
:return: Struct with ``{prefix}rank`` and ``{prefix}bin_{name}`` fields
2732+
for each entry in ``bin_granularities``.
27302733
"""
27312734
if bin_granularities is None:
27322735
bin_granularities = {"percentile": 100, "decile": 10, "sextile": 6}
@@ -2737,9 +2740,9 @@ def rank_and_assign_bins(
27372740
ranked_ht = ht.select(_=value_expr).order_by("_").add_index("rank")
27382741
ranked_ht = ranked_ht.select(
27392742
*source_key,
2740-
"rank",
2743+
**{f"{prefix}rank": ranked_ht.rank},
27412744
**{
2742-
f"bin_{name}": hl.int(ranked_ht.rank * multiplier / n_rows)
2745+
f"{prefix}bin_{name}": hl.int(ranked_ht.rank * multiplier / n_rows)
27432746
for name, multiplier in bin_granularities.items()
27442747
},
27452748
).cache()
@@ -2888,6 +2891,7 @@ def rank_array_element_metrics(
28882891
],
28892892
filter_fn: Optional[Callable[[hl.Table], hl.expr.BooleanExpression]] = None,
28902893
bin_granularities: Optional[Dict[str, int]] = None,
2894+
rank_field_prefix: str = "",
28912895
) -> hl.Table:
28922896
"""
28932897
Rank metrics within array elements and annotate rank structs back.
@@ -2917,6 +2921,9 @@ def rank_array_element_metrics(
29172921
rows are ranked.
29182922
:param bin_granularities: Bin granularities passed to
29192923
:func:`rank_and_assign_bins`.
2924+
:param rank_field_prefix: Prefix for the ``rank`` and ``bin_{name}``
2925+
sub-fields within each rank struct. Passed through to
2926+
:func:`rank_and_assign_bins`. Default is ``""`` (no prefix).
29202927
:return: Table with ``{metric_name}_rank`` structs added to each array
29212928
element. The table is returned with its original key restored.
29222929
"""
@@ -2930,7 +2937,7 @@ def rank_array_element_metrics(
29302937
_rank_values=subset_ht[array_field].map(
29312938
lambda elem: hl.struct(**element_value_fn(elem))
29322939
)
2933-
).naive_coalesce(100)
2940+
)
29342941

29352942
# Determine element count and metric names from a sample row.
29362943
sample = subset_ht.take(1)[0]._rank_values
@@ -2943,7 +2950,9 @@ def rank_array_element_metrics(
29432950
hl.struct(
29442951
**{
29452952
name: rank_and_assign_bins(
2946-
subset_ht._rank_values[i][name], bin_granularities
2953+
subset_ht._rank_values[i][name],
2954+
bin_granularities,
2955+
prefix=rank_field_prefix,
29472956
)
29482957
for name in metric_names
29492958
}

0 commit comments

Comments
 (0)