Skip to content

Commit 74c05b7

Browse files
authored
improve test coverage for data_version_diff function tests (#56)
1 parent 6a252f5 commit 74c05b7

3 files changed

Lines changed: 27 additions & 10 deletions

File tree

README.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,19 @@ from csvplus.load_optimized_csv import load_optimized_csv
7070
from csvplus.data_correction import resolve_string_value
7171
from csvplus.generate_report import summary_report
7272

73-
df_v1 = load_optimized_csv("large_dataset.csv")
74-
df_v2 = load_optimized_csv("large_dataset2.csv")
75-
76-
diff = data_version_diff(df_v1, df_v2)
77-
# Inspect the returned dictionary
78-
print(diff["columns_added"])
79-
print(diff["row_count_change"])
73+
# --- test data type change in csvplus.data_version_diff ---
74+
df1 = pd.DataFrame({"a": [1,2,3]})
75+
df2 = pd.DataFrame({"a": ["1","2","3"]})
76+
diff = data_version_diff(df1, df2)
77+
print(diff)
8078
# Optionally display a formatted summary
8179
display_data_version_diff(diff)
8280

83-
resolve_string_value(df_v1, "company_name", ["Google", "Microsoft"], 80)
84-
summary_report(df_v1)
81+
# --- csvplus.data_correction --
82+
df_v1 = load_optimized_csv("large_dataset.csv")
83+
df_v2 = load_optimized_csv("large_dataset2.csv")
84+
resolve_string_value(df1, "company_name", ["Google", "Microsoft"], 80)
85+
summary_report(df1)
8586
```
8687

8788
### Running Tests

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ dependencies:
66
- rapidfuzz
77
- pytest
88
- numpy
9+
- scipy
910
- pip
1011
- pip:
1112
- faker

tests/unit/test_data_version_diff.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def test_empty_dataframes():
130130
"""
131131
Ensure the function handles empty DataFrames without error.
132132
"""
133-
133+
134134
df_old = pd.DataFrame()
135135
df_new = pd.DataFrame()
136136

@@ -140,3 +140,18 @@ def test_empty_dataframes():
140140
assert result["row_count_change"]["new_row_count"] == 0
141141
assert result["columns_added"] == []
142142
assert result["columns_removed"] == []
143+
144+
def test_display_data_version_diff(capsys):
145+
"""
146+
Test that the display function runs without error and prints something.
147+
"""
148+
df_old = pd.DataFrame({"a": [1, None]})
149+
df_new = pd.DataFrame({"a": [2, None]})
150+
result = data_version_diff(df_old, df_new)
151+
152+
# Call the display function
153+
from csvplus.data_version_diff import display_data_version_diff
154+
display_data_version_diff(result)
155+
156+
captured = capsys.readouterr()
157+
assert "DATA VERSION CHANGE SUMMARY" in captured.out

0 commit comments

Comments
 (0)