Skip to content

Commit e3d38ac

Browse files
dshkolclaude
andcommitted
style: Format test files with black
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent a896921 commit e3d38ac

9 files changed

Lines changed: 642 additions & 537 deletions

tests/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
# Test package for pycancensus
1+
# Test package for pycancensus

tests/integration/test_cancensus_compatibility.py

Lines changed: 81 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
class TestCancensusCompatibility:
2121
"""Test pycancensus compatibility with cancensus R library results."""
22-
22+
2323
@pytest.fixture(autouse=True)
2424
def setup_test_env(self):
2525
"""Setup test environment."""
@@ -29,63 +29,64 @@ def setup_test_env(self):
2929
pc.set_api_key(test_api_key)
3030
else:
3131
pytest.skip("CANCENSUS_API_KEY not set")
32-
32+
3333
# Setup temporary cache directory
3434
self.temp_cache = tempfile.mkdtemp()
3535
pc.set_cache_path(self.temp_cache)
36-
36+
3737
yield
38-
38+
3939
# Cleanup
4040
if os.path.exists(self.temp_cache):
4141
shutil.rmtree(self.temp_cache)
42-
42+
4343
def test_get_census_basic_functionality(self):
4444
"""Test basic get_census functionality matches expected patterns."""
4545
# This test uses a small, fast query
4646
data = pc.get_census(
4747
dataset="CA21",
4848
regions={"PR": "59"}, # British Columbia
4949
vectors=["v_CA21_1"], # Total population
50-
level="PR"
50+
level="PR",
5151
)
52-
52+
5353
# Verify structure matches expected cancensus format
5454
assert isinstance(data, pd.DataFrame)
5555
assert "GeoUID" in data.columns
5656
assert "Type" in data.columns
5757
assert "Region Name" in data.columns
5858
assert "v_CA21_1" in data.columns
59-
59+
6060
# Verify data types are appropriate
6161
assert pd.api.types.is_integer_dtype(data["v_CA21_1"])
6262
assert isinstance(data["GeoUID"].iloc[0], str)
63-
63+
6464
# Verify basic data integrity
6565
assert len(data) > 0
6666
assert not data["v_CA21_1"].isna().all()
67-
67+
6868
def test_get_census_with_geometry(self):
6969
"""Test geometry retrieval functionality."""
7070
data = pc.get_census(
7171
dataset="CA21",
7272
regions={"CSD": "5915022"}, # Vancouver
7373
vectors=["v_CA21_1"],
7474
level="CSD",
75-
geo_format="geopandas"
75+
geo_format="geopandas",
7676
)
77-
77+
7878
# Verify geometry is included
79-
assert hasattr(data, 'geometry')
79+
assert hasattr(data, "geometry")
8080
assert not data.geometry.isna().all()
81-
81+
8282
# Verify it's a proper GeoDataFrame
8383
try:
8484
import geopandas as gpd
85+
8586
assert isinstance(data, gpd.GeoDataFrame)
8687
except ImportError:
8788
pytest.skip("geopandas not available")
88-
89+
8990
def test_list_functions_compatibility(self):
9091
"""Test list functions return expected formats."""
9192
# Test datasets
@@ -94,34 +95,34 @@ def test_list_functions_compatibility(self):
9495
assert "dataset" in datasets.columns
9596
assert "description" in datasets.columns
9697
assert len(datasets) > 0
97-
98+
9899
# Test vectors
99100
vectors = pc.list_census_vectors("CA21")
100101
assert isinstance(vectors, pd.DataFrame)
101102
assert "vector" in vectors.columns
102103
assert "type" in vectors.columns
103104
assert "label" in vectors.columns
104105
assert len(vectors) > 0
105-
106+
106107
# Test regions
107108
regions = pc.list_census_regions("CA21")
108109
assert isinstance(regions, pd.DataFrame)
109110
assert "region" in regions.columns
110111
assert "name" in regions.columns
111112
assert len(regions) > 0
112-
113+
113114
def test_search_functions(self):
114115
"""Test search functionality."""
115116
# Search vectors
116117
vectors = pc.search_census_vectors("CA21", "population")
117118
assert isinstance(vectors, pd.DataFrame)
118119
assert len(vectors) > 0
119-
120+
120121
# Search regions
121122
regions = pc.search_census_regions("CA21", "Vancouver")
122123
assert isinstance(regions, pd.DataFrame)
123124
assert len(regions) > 0
124-
125+
125126
def test_error_handling(self):
126127
"""Test error handling matches expected patterns."""
127128
# Invalid dataset
@@ -130,103 +131,101 @@ def test_error_handling(self):
130131
dataset="INVALID",
131132
regions={"PR": "59"},
132133
vectors=["v_CA21_1"],
133-
level="PR"
134+
level="PR",
134135
)
135-
136+
136137
# Invalid region
137138
with pytest.raises((ValueError, Exception)):
138139
pc.get_census(
139140
dataset="CA21",
140141
regions={"PR": "99999"}, # Non-existent region
141142
vectors=["v_CA21_1"],
142-
level="PR"
143+
level="PR",
143144
)
144-
145+
145146
# Invalid vector
146147
with pytest.raises((ValueError, Exception)):
147148
pc.get_census(
148149
dataset="CA21",
149150
regions={"PR": "59"},
150151
vectors=["v_INVALID_999"],
151-
level="PR"
152+
level="PR",
152153
)
153-
154+
154155
def test_caching_functionality(self):
155156
"""Test caching works as expected."""
156157
# Clear cache first
157158
pc.clear_cache()
158-
159+
159160
# Make request
160161
data1 = pc.get_census(
161-
dataset="CA21",
162-
regions={"PR": "59"},
163-
vectors=["v_CA21_1"],
164-
level="PR"
162+
dataset="CA21", regions={"PR": "59"}, vectors=["v_CA21_1"], level="PR"
165163
)
166-
164+
167165
# Check cache was created
168166
cache_list = pc.list_cache()
169167
assert len(cache_list) > 0
170-
168+
171169
# Make same request again
172170
data2 = pc.get_census(
173-
dataset="CA21",
174-
regions={"PR": "59"},
175-
vectors=["v_CA21_1"],
176-
level="PR"
171+
dataset="CA21", regions={"PR": "59"}, vectors=["v_CA21_1"], level="PR"
177172
)
178-
173+
179174
# Data should be identical
180175
pd.testing.assert_frame_equal(data1, data2)
181-
176+
182177
def test_data_type_consistency(self):
183178
"""Test that data types are consistent with cancensus expectations."""
184179
data = pc.get_census(
185180
dataset="CA21",
186181
regions={"CSD": "5915022"}, # Vancouver
187182
vectors=["v_CA21_1", "v_CA21_8", "v_CA21_434"], # Mix of data types
188-
level="CSD"
183+
level="CSD",
189184
)
190-
185+
191186
# Population counts should be integers or floats
192187
assert pd.api.types.is_numeric_dtype(data["v_CA21_1"])
193-
188+
194189
# Geographic identifiers should be strings
195-
assert pd.api.types.is_string_dtype(data["GeoUID"]) or pd.api.types.is_object_dtype(data["GeoUID"])
196-
190+
assert pd.api.types.is_string_dtype(
191+
data["GeoUID"]
192+
) or pd.api.types.is_object_dtype(data["GeoUID"])
193+
197194
# Check for proper handling of NA values
198195
# Should not have string representations of NA
199196
for col in data.columns:
200-
if data[col].dtype == 'object' or pd.api.types.is_string_dtype(data[col]):
201-
assert not any(data[col].astype(str).str.contains(r'^(x|X|F|\.\.\.)$', na=False))
202-
197+
if data[col].dtype == "object" or pd.api.types.is_string_dtype(data[col]):
198+
assert not any(
199+
data[col].astype(str).str.contains(r"^(x|X|F|\.\.\.)$", na=False)
200+
)
201+
203202
def test_multiple_regions_functionality(self):
204203
"""Test handling of multiple regions."""
205204
data = pc.get_census(
206205
dataset="CA21",
207206
regions={"CSD": ["5915022", "3520005"]}, # Vancouver, Toronto
208207
vectors=["v_CA21_1"],
209-
level="CSD"
208+
level="CSD",
210209
)
211-
210+
212211
assert len(data) == 2 # Should have data for both cities
213212
assert "5915022" in data["GeoUID"].values
214213
assert "3520005" in data["GeoUID"].values
215-
214+
216215
def test_hierarchical_region_retrieval(self):
217216
"""Test retrieving data at different hierarchical levels."""
218217
# Get CT data for Vancouver CMA
219218
data = pc.get_census(
220219
dataset="CA21",
221220
regions={"CMA": "59933"}, # Vancouver CMA
222221
vectors=["v_CA21_1"],
223-
level="CT"
222+
level="CT",
224223
)
225-
224+
226225
# Should have multiple census tracts
227226
assert len(data) > 100 # Vancouver CMA has many CTs
228227
assert all(data["GeoUID"].str.len() == 10) # CT GeoUIDs are 10 digits
229-
228+
230229
@pytest.mark.slow
231230
def test_large_dataset_handling(self):
232231
"""Test handling of larger datasets."""
@@ -235,9 +234,9 @@ def test_large_dataset_handling(self):
235234
dataset="CA21",
236235
regions={"PR": "59"}, # BC
237236
vectors=["v_CA21_1", "v_CA21_2", "v_CA21_3"],
238-
level="DA" # Dissemination Areas - lots of records
237+
level="DA", # Dissemination Areas - lots of records
239238
)
240-
239+
241240
# Should handle large datasets without issues
242241
assert len(data) > 10000 # BC has many DAs
243242
assert not data.empty
@@ -246,7 +245,7 @@ def test_large_dataset_handling(self):
246245

247246
class TestDataQuality:
248247
"""Test data quality and consistency."""
249-
248+
250249
@pytest.fixture(autouse=True)
251250
def setup_test_env(self):
252251
"""Setup test environment."""
@@ -255,62 +254,59 @@ def setup_test_env(self):
255254
pc.set_api_key(test_api_key)
256255
else:
257256
pytest.skip("CANCENSUS_API_KEY not set")
258-
257+
259258
def test_data_consistency_across_levels(self):
260259
"""Test that data is consistent across geographic levels."""
261260
# Get CMA-level data
262261
cma_data = pc.get_census(
263262
dataset="CA21",
264263
regions={"CMA": "59933"}, # Vancouver CMA
265264
vectors=["v_CA21_1"], # Total population
266-
level="CMA"
265+
level="CMA",
267266
)
268-
267+
269268
# Get CSD-level data for same region
270269
csd_data = pc.get_census(
271-
dataset="CA21",
272-
regions={"CMA": "59933"},
273-
vectors=["v_CA21_1"],
274-
level="CSD"
270+
dataset="CA21", regions={"CMA": "59933"}, vectors=["v_CA21_1"], level="CSD"
275271
)
276-
272+
277273
# Sum of CSDs should approximately equal CMA total
278274
cma_pop = cma_data["v_CA21_1"].iloc[0]
279275
csd_pop_sum = csd_data["v_CA21_1"].sum()
280-
276+
281277
# Allow for small rounding differences
282-
assert abs(cma_pop - csd_pop_sum) / cma_pop < 0.01, (
283-
f"Population inconsistency: CMA={cma_pop}, CSD sum={csd_pop_sum}"
284-
)
285-
278+
assert (
279+
abs(cma_pop - csd_pop_sum) / cma_pop < 0.01
280+
), f"Population inconsistency: CMA={cma_pop}, CSD sum={csd_pop_sum}"
281+
286282
def test_geographic_identifier_format(self):
287283
"""Test that geographic identifiers follow expected formats."""
288284
test_cases = [
289-
("PR", "CA21", 2), # Province: 2 digits
290-
("CMA", "CA21", 3), # CMA: 3 digits
291-
("CD", "CA21", 4), # CD: 4 digits
292-
("CSD", "CA21", 7), # CSD: 7 digits
293-
("CT", "CA21", 10), # CT: 10 digits
285+
("PR", "CA21", 2), # Province: 2 digits
286+
("CMA", "CA21", 3), # CMA: 3 digits
287+
("CD", "CA21", 4), # CD: 4 digits
288+
("CSD", "CA21", 7), # CSD: 7 digits
289+
("CT", "CA21", 10), # CT: 10 digits
294290
]
295-
291+
296292
for level, dataset, expected_length in test_cases:
297293
data = pc.get_census(
298294
dataset=dataset,
299295
regions={"PR": "59"}, # BC
300296
vectors=["v_CA21_1"],
301-
level=level
297+
level=level,
302298
)
303-
299+
304300
# Check GeoUID format
305-
assert all(data["GeoUID"].str.len() == expected_length), (
306-
f"GeoUID length mismatch for {level}: expected {expected_length}"
307-
)
308-
301+
assert all(
302+
data["GeoUID"].str.len() == expected_length
303+
), f"GeoUID length mismatch for {level}: expected {expected_length}"
304+
309305
# Check GeoUID is numeric
310-
assert all(data["GeoUID"].str.isdigit()), (
311-
f"Non-numeric GeoUID found for {level}"
312-
)
306+
assert all(
307+
data["GeoUID"].str.isdigit()
308+
), f"Non-numeric GeoUID found for {level}"
313309

314310

315311
if __name__ == "__main__":
316-
pytest.main([__file__, "-v"])
312+
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)