Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@
when the input sample has zero rows, instead of relying on an assertion.
- This aligns runtime behavior with documented exceptions and avoids
optimization-dependent assert behavior.
- **Stabilized `prop_above_and_below()` return paths**
- `prop_above_and_below()` now builds concatenated outputs only from present
Series objects and returns `None` when both `below` and `above` are `None`,
avoiding ambiguous concat inputs while preserving existing behavior for valid
threshold sets.

## Tests

Expand All @@ -79,6 +84,8 @@
- Added tests for `link_transform()`, and `calc_dev()` to validate behavior for extreme probabilities, and finite 10-fold deviance summaries.
- **Expanded ASCII plot dispatcher edge-case coverage**
- Added tests for `ascii_plot_dist` with `comparative=False` to verify direct dispatch to `ascii_plot_hist` and mixed categorical+numeric routing in a single call.
- **Expanded `prop_above_and_below()` edge-case coverage**
- Added focused tests for empty threshold iterables, mixed `None` threshold groups in dict mode, and explicit all-`None` threshold handling across return formats.

# 0.16.0 (2026-02-09)

Expand Down
49 changes: 36 additions & 13 deletions balance/stats_and_plots/weights_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from __future__ import annotations

import logging
from typing import Any
from typing import Any, Literal, overload, TypedDict

import numpy as np
import numpy.typing as npt
Expand All @@ -17,6 +17,11 @@
logger: logging.Logger = logging.getLogger(__package__)


class PropAboveBelowResult(TypedDict):
below: pd.Series | None
above: pd.Series | None


##########################################
# Weights diagnostics - functions for analyzing weights
# These functions provide statistical measures for evaluating
Expand Down Expand Up @@ -199,12 +204,33 @@ def nonparametric_skew(
return (w.mean() - w.median()) / w.std()


@overload
def prop_above_and_below(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
below: tuple[float, ...] | list[float] | None = (1 / 10, 1 / 5, 1 / 3, 1 / 2, 1),
above: tuple[float, ...] | list[float] | None = (1, 2, 3, 5, 10),
return_as_series: Literal[True] = True,
) -> pd.Series | None:
pass


@overload
def prop_above_and_below(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
below: tuple[float, ...] | list[float] | None = (1 / 10, 1 / 5, 1 / 3, 1 / 2, 1),
above: tuple[float, ...] | list[float] | None = (1, 2, 3, 5, 10),
*,
return_as_series: Literal[False],
) -> PropAboveBelowResult | None:
pass


def prop_above_and_below(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
below: tuple[float, ...] | list[float] | None = (1 / 10, 1 / 5, 1 / 3, 1 / 2, 1),
above: tuple[float, ...] | list[float] | None = (1, 2, 3, 5, 10),
return_as_series: bool = True,
) -> pd.Series | dict[Any, Any] | None:
) -> pd.Series | PropAboveBelowResult | None:
# TODO (p2): look more in the literature (are there references for using this vs another, or none at all?)
# update the doc with insights, once done.
"""
Expand All @@ -224,14 +250,15 @@ def prop_above_and_below(
DataFrame, only the first column is used.
below (tuple[float, ...] | list[float] | None, optional):
values to check which proportion of normalized weights are *below* them.
Using None returns None.
Using None omits below-threshold calculations.
Defaults to (1/10, 1/5, 1/3, 1/2, 1).
above (tuple[float, ...] | list[float] | None, optional):
values to check which proportion of normalized weights are *above* (or equal) to them.
Using None returns None.
Using None omits above-threshold calculations.
Defaults to (1, 2, 3, 5, 10).
return_as_series (bool, optional): If true returns one pd.Series of values.
If False will return a dict with two pd.Series (one for below and one for above).
If False returns ``PropAboveBelowResult`` with ``below``/``above`` entries
containing a ``pd.Series`` or ``None`` for omitted groups.
Defaults to True.

Returns:
Expand Down Expand Up @@ -317,16 +344,12 @@ def prop_above_and_below(

# decide if to return one series or a dict
if return_as_series:
out = pd.concat(
[ # pyre-ignore[6]: pd.concat supports Series.
prop_below_series,
prop_above_series,
]
)
pieces = [s for s in (prop_below_series, prop_above_series) if s is not None]
out = pd.concat(pieces) if pieces else None
else:
out = {"below": prop_below_series, "above": prop_above_series}
out = PropAboveBelowResult(below=prop_below_series, above=prop_above_series)

return out # pyre-ignore[7]: TODO: see if we can fix this pyre
return out


def weighted_median_breakdown_point(
Expand Down
52 changes: 52 additions & 0 deletions tests/test_stats_and_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,58 @@ def test_prop_above_and_below(self) -> None:
}
self.assertEqual({k: v.to_list() for k, v in result_dict.items()}, expected)

def test_prop_above_and_below_edge_cases(self) -> None:
"""Cover edge combinations for thresholds and return formats."""
from balance.stats_and_plots.weights_stats import prop_above_and_below

weights = pd.Series((1.0, 2.0, 3.0, 4.0))

# Empty threshold iterables should return an empty Series in series mode.
result_empty = prop_above_and_below(weights, below=(), above=())
self.assertIsNotNone(result_empty)
result_empty = _assert_type(result_empty, pd.Series)
self.assertEqual(result_empty.to_list(), [])
self.assertEqual(result_empty.index.to_list(), [])

# Dict mode should preserve None for omitted threshold groups.
result_dict_only_above = prop_above_and_below(
weights,
below=None,
above=(1, 2),
return_as_series=False,
)
self.assertIsNotNone(result_dict_only_above)
result_dict_only_above = _assert_type(result_dict_only_above)
self.assertIsNone(result_dict_only_above["below"])
self.assertEqual(
result_dict_only_above["above"].index.to_list(),
["prop(w >= 1)", "prop(w >= 2)"],
)

result_dict_only_below = prop_above_and_below(
weights,
below=(0.5, 1),
above=None,
return_as_series=False,
)
self.assertIsNotNone(result_dict_only_below)
result_dict_only_below = _assert_type(result_dict_only_below)
self.assertEqual(
result_dict_only_below["below"].index.to_list(),
["prop(w < 0.5)", "prop(w < 1)"],
)
self.assertIsNone(result_dict_only_below["above"])

# If both groups are omitted, function should return None in all modes.
self.assertIsNone(
prop_above_and_below(
weights,
below=None,
above=None,
return_as_series=False,
)
)

def test_weights_diagnostics_accept_list_and_ndarray_input(self) -> None:
"""Ensure diagnostics are equivalent across list/ndarray/Series inputs."""
from balance.stats_and_plots.weights_stats import (
Expand Down
Loading