Skip to content

Commit edc5f80

Browse files
committed
Remove unneeded jsondiff output parsing
Why these changes are being introduced: Now that the DeepDiff library provides an explicit list of modified "root" fields -- i.e. TIMDEX fields -- as a built-in property, we no longer need any additional logic to parse the diff and surface what fields were modified. How this addresses that need: * This removes the helper function generate_field_diff_bools_for_record() and any tests related to it. Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TIMX-373
1 parent f181958 commit edc5f80

File tree

2 files changed

+0
-48
lines changed

2 files changed

+0
-48
lines changed

abdiff/core/calc_ab_metrics.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -104,32 +104,6 @@ def create_record_diff_matrix_dataset(
104104
return metrics_dataset
105105

106106

107-
def generate_field_diff_bools_for_record(diff_data: dict) -> dict:
108-
"""Function to return dictionary of fields that have a diff.
109-
110-
Determining if a field had a diff is as straight-forward as looking to see if it shows
111-
up in the parsed diff JSON. The fields may be at the root of the diff, or they could
112-
be nested under "$insert" or "$delete" nodes in the diff.
113-
114-
If a field from the original A/B records are not in the diff at all, then they did not
115-
have changes, and therefore will not receive a 1 here to indicate a diff.
116-
"""
117-
fields_with_diffs = {}
118-
119-
for key in diff_data:
120-
121-
# identify modified fields nested in $insert or $delete blocks
122-
if key in ("$insert", "$delete"):
123-
for subfield in diff_data[key]:
124-
fields_with_diffs[subfield] = 1
125-
126-
# identified modified fields at root of diff
127-
else:
128-
fields_with_diffs[key] = 1
129-
130-
return fields_with_diffs
131-
132-
133107
def calculate_metrics_data(field_matrix_parquet: str) -> dict:
134108
"""Create a dictionary of metrics via DuckDB queries."""
135109
summary: dict = {}

tests/test_calc_ab_metrics.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,10 @@
1212
calc_ab_metrics,
1313
calculate_metrics_data,
1414
create_record_diff_matrix_dataset,
15-
generate_field_diff_bools_for_record,
1615
)
1716
from abdiff.core.utils import load_dataset, read_run_json
1817

1918

20-
def test_record_field_diffs_no_diffs():
21-
diff_data = {}
22-
assert generate_field_diff_bools_for_record(diff_data) == {}
23-
24-
25-
def test_record_field_diffs_one_diff():
26-
diff_data = {"color": "green"}
27-
assert generate_field_diff_bools_for_record(diff_data) == {"color": 1}
28-
29-
30-
def test_record_field_diffs_diff_from_inserts_and_deletes_counted_only_once():
31-
diff_data = {
32-
"$insert": {"fruits": "strawberry"},
33-
"$delete": {"vegetables": "onion"},
34-
}
35-
assert generate_field_diff_bools_for_record(diff_data) == {
36-
"fruits": 1,
37-
"vegetables": 1,
38-
}
39-
40-
4119
def test_sparse_matrix_dataset_created_success(run_directory, diffs_dataset_directory):
4220
diff_matrix_dataset_filepath = create_record_diff_matrix_dataset(
4321
run_directory, diffs_dataset_directory

0 commit comments

Comments
 (0)