Skip to content

Commit a50dcc6

Browse files
committed
Refactor rank_array_element_metrics to restore original keys and improve rank annotation handling
- Updated the `rank_array_element_metrics` function to return the table with its original key restored after ranking. - Enhanced the rank annotation process to use `or_missing` for unranked rows, ensuring correct typing without manual struct construction. - Simplified the rank lookup and annotation logic for better readability and maintainability.
1 parent 74d2de5 commit a50dcc6

File tree

1 file changed

+20
-17
lines changed

1 file changed

+20
-17
lines changed

gnomad/utils/constraint.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2918,9 +2918,9 @@ def rank_array_element_metrics(
29182918
:param bin_granularities: Bin granularities passed to
29192919
:func:`rank_and_assign_bins`.
29202920
:return: Table with ``{metric_name}_rank`` structs added to each array
2921-
element. The table is returned keyed by an internal ``_rank_idx``
2922-
integer index.
2921+
element. The table is returned with its original key restored.
29232922
"""
2923+
original_key = list(ht.key)
29242924
ht = ht.add_index("_rank_idx").key_by("_rank_idx").cache()
29252925

29262926
subset_ht = ht.filter(filter_fn(ht)) if filter_fn is not None else ht
@@ -2952,22 +2952,25 @@ def rank_array_element_metrics(
29522952
]
29532953
).cache()
29542954

2955-
# Join ranks back to the original table.
2956-
rank_lookup = subset_ht[ht._rank_idx]
2955+
# Join ranks back to the original table. Use or_missing so that
2956+
# unranked rows get correctly-typed missing rank annotations without
2957+
# needing a manually-constructed missing struct for if_else.
2958+
ht = ht.annotate(_ranks=subset_ht[ht._rank_idx]._rank_values)
29572959
ht = ht.annotate(
29582960
**{
2959-
array_field: hl.if_else(
2960-
hl.is_defined(rank_lookup._rank_values),
2961-
hl.map(
2962-
lambda elem, ranks: elem.annotate(
2963-
**{f"{name}_rank": ranks[name] for name in metric_names}
2964-
),
2965-
ht[array_field],
2966-
rank_lookup._rank_values,
2967-
),
2968-
ht[array_field],
2969-
)
2961+
array_field: [
2962+
ht[array_field][i].annotate(
2963+
**{
2964+
f"{name}_rank": hl.or_missing(
2965+
hl.is_defined(ht._ranks),
2966+
ht._ranks[i][name],
2967+
)
2968+
for name in metric_names
2969+
}
2970+
)
2971+
for i in range(n_elements)
2972+
]
29702973
}
2971-
)
2974+
).drop("_ranks")
29722975

2973-
return ht
2976+
return ht.key_by(*original_key).drop("_rank_idx")

0 commit comments

Comments
 (0)