Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/evidently/legacy/calculations/stattests/jensenshannon.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _jensenshannon(
feature_type: ColumnType,
threshold: float,
n_bins: int = 30,
base: Optional[float] = None,
base: Optional[float] = 2.0,
) -> Tuple[float, bool]:
"""Compute the Jensen-Shannon distance between two arrays
Args:
Expand All @@ -50,7 +50,8 @@ def _jensenshannon(
feature_type: feature type
threshold: all values above this threshold means data drift
n_bins: number of bins
base: the base of the logarithm used to compute the output
base: the base of the logarithm used to compute the output.
Defaults to 2.0 so that the distance is bounded between 0 and 1.
Returns:
jensenshannon: calculated Jensen-Shannon distance
test_result: whether the drift is detected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ def __call__(self, data: SpartStatTestData, feature_type: ColumnType, threshold:
ref = data.reference_data
column_name = data.column_name
reference_percents, current_percents = get_binned_data(ref, cur, column_name, feature_type, False)
jensenshannon_value = distance.jensenshannon(reference_percents, current_percents, base=None)
jensenshannon_value = distance.jensenshannon(reference_percents, current_percents, base=2.0)
return jensenshannon_value, jensenshannon_value >= threshold