-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_dataset_attribution.py
More file actions
120 lines (93 loc) · 3.76 KB
/
test_dataset_attribution.py
File metadata and controls
120 lines (93 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""Test dataset_attribution function against R cancensus."""
import os
import sys
import pytest
from pathlib import Path
# Add pycancensus to path
sys.path.insert(0, str(Path(__file__).parent.parent))
import pycancensus as pc
# Import R bridge for cross-validation (optional - only available locally)
try:
from tests.cross_validation.utils.r_python_bridge import RPythonBridge
R_AVAILABLE = True
except ImportError:
R_AVAILABLE = False
RPythonBridge = None
class TestDatasetAttribution:
"""Test dataset_attribution function."""
def test_single_dataset(self):
"""Test attribution for a single dataset."""
result = pc.dataset_attribution(['CA16'])
assert isinstance(result, list)
assert len(result) > 0
assert all(isinstance(attr, str) for attr in result)
assert any('2016' in attr for attr in result)
def test_multiple_datasets_same_type(self):
"""Test attribution for multiple census datasets."""
result = pc.dataset_attribution(['CA06', 'CA16'])
assert isinstance(result, list)
assert len(result) > 0
# Should have merged years
has_merged = False
for attr in result:
if '2006' in attr and '2016' in attr:
has_merged = True
break
assert has_merged, "Expected merged attribution with both years"
def test_invalid_dataset(self):
"""Test with invalid dataset."""
with pytest.raises(ValueError, match="No valid datasets found"):
pc.dataset_attribution(['INVALID'])
def test_case_insensitive(self):
"""Test that dataset names are case insensitive."""
result1 = pc.dataset_attribution(['ca16'])
result2 = pc.dataset_attribution(['CA16'])
assert result1 == result2
@pytest.mark.skipif(
not os.environ.get("RUN_R_TESTS", False),
reason="R tests not enabled"
)
def test_r_equivalence(self):
"""Test equivalence with R cancensus."""
bridge = RPythonBridge()
try:
# Test single dataset
r_result = bridge.run_r_code("""
library(cancensus)
dataset_attribution('CA16')
""", return_type="raw")
py_result = pc.dataset_attribution(['CA16'])
# Both should return similar attribution text
assert len(py_result) > 0
assert isinstance(py_result[0], str)
# Test multiple datasets
r_result_multi = bridge.run_r_code("""
library(cancensus)
dataset_attribution(c('CA06', 'CA16'))
""", return_type="raw")
py_result_multi = pc.dataset_attribution(['CA06', 'CA16'])
# Should have merged attributions
assert len(py_result_multi) > 0
finally:
bridge.cleanup()
if __name__ == "__main__":
# Run basic tests
test = TestDatasetAttribution()
print("Testing single dataset...")
test.test_single_dataset()
print("✅ Single dataset test passed")
print("\nTesting multiple datasets...")
test.test_multiple_datasets_same_type()
print("✅ Multiple datasets test passed")
print("\nTesting invalid dataset...")
test.test_invalid_dataset()
print("✅ Invalid dataset test passed")
print("\nTesting case insensitivity...")
test.test_case_insensitive()
print("✅ Case insensitivity test passed")
# Run R equivalence test if requested
if os.environ.get("RUN_R_TESTS"):
print("\nTesting R equivalence...")
test.test_r_equivalence()
print("✅ R equivalence test passed")
print("\n🎉 All tests passed!")