Skip to content

Commit 373411e

Browse files
dshkolclaude
andcommitted
refactor: Move imports to top and centralize API URLs
This commit makes two code organization improvements: 1. Move all inline imports to the top of their respective files - Follows Python convention (PEP 8) - Improves static analysis and IDE support - Makes dependencies visible at a glance 2. Centralize API URLs in settings.py - CENSUSMAPPER_API_URL for api/v1 endpoints - CENSUSMAPPER_DATA_URL for data_sets endpoints - Single source of truth eliminates drift risk Files modified: - settings.py: Add URL constants - core.py: Move json, io, hashlib imports; use URL constant - vectors.py: Move io, warnings imports; use URL constant - regions.py: Move io import; use URL constant - datasets.py: Move re import; use URL constant - hierarchy.py: Move re import to top - intersect_geometry.py: Reorganize imports; use URL constant - resilience.py: Move atexit import to top All existing tests pass unchanged, confirming no behavioral impact. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 758b18b commit 373411e

8 files changed

Lines changed: 53 additions & 58 deletions

File tree

pycancensus/core.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
Core functionality for accessing Canadian Census data through the CensusMapper API.
33
"""
44

5-
import os
6-
import requests
5+
import hashlib
6+
import io
7+
import json
8+
import warnings
9+
from typing import Dict, List, Optional, Union
10+
711
import pandas as pd
812
import geopandas as gpd
9-
from typing import Dict, List, Optional, Union
10-
import warnings
13+
import requests
1114

12-
from .settings import get_api_key, get_cache_path
15+
from .settings import get_api_key, get_cache_path, CENSUSMAPPER_API_URL
1316
from .cache import get_cached_data, cache_data
1417
from .utils import validate_dataset, validate_level, process_regions
1518
from .progress import show_request_preview, create_progress_for_request
@@ -122,10 +125,9 @@ def get_census(
122125
return cached_data
123126

124127
# Build API request exactly like the R package
125-
base_url = "https://censusmapper.ca/api/v1/"
128+
base_url = f"{CENSUSMAPPER_API_URL}/"
126129

127130
# Format parameters exactly like the R package
128-
import json
129131

130132
# Convert regions to JSON format exactly like R package: jsonlite::toJSON(lapply(regions, as.character))
131133
# R package ALWAYS puts region values in arrays - this was the key missing piece!
@@ -303,8 +305,6 @@ def get_census(
303305

304306
def _generate_cache_key(dataset, regions, vectors, level, geo_format):
305307
"""Generate a cache key for the given parameters."""
306-
import hashlib
307-
308308
# Create a string representation of the parameters
309309
params_str = f"{dataset}_{regions}_{vectors}_{level}_{geo_format}"
310310

@@ -355,15 +355,13 @@ def _extract_vector_metadata(df, vectors, labels):
355355

356356
# Store metadata as dict to avoid pandas attrs comparison bug
357357
# Convert DataFrame to list of dicts for storage
358-
df.attrs["census_vectors"] = metadata_df.to_dict(orient='records')
358+
df.attrs["census_vectors"] = metadata_df.to_dict(orient="records")
359359

360360
return df
361361

362362

363363
def _process_csv_response(csv_text, vectors, labels):
364364
"""Process CSV API response into a pandas DataFrame."""
365-
import io
366-
367365
# Read all columns as strings initially (like R package)
368366
df = pd.read_csv(io.StringIO(csv_text), dtype=str, encoding="utf-8")
369367

pycancensus/datasets.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
Functions for working with census datasets.
33
"""
44

5-
import requests
5+
import re
6+
from typing import List, Optional
7+
68
import pandas as pd
7-
from typing import Optional
9+
import requests
810

9-
from .settings import get_api_key
11+
from .settings import get_api_key, CENSUSMAPPER_API_URL
1012
from .cache import get_cached_data, cache_data
1113

1214

@@ -58,14 +60,15 @@ def list_census_datasets(
5860
return cached_data
5961

6062
# Query API
61-
base_url = "https://censusmapper.ca/api/v1"
6263
params = {"api_key": api_key, "format": "json"}
6364

6465
try:
6566
if not quiet:
6667
print("Querying CensusMapper API for available datasets...")
6768

68-
response = requests.get(f"{base_url}/list_datasets", params=params, timeout=30)
69+
response = requests.get(
70+
f"{CENSUSMAPPER_API_URL}/list_datasets", params=params, timeout=30
71+
)
6972
response.raise_for_status()
7073

7174
data = response.json()
@@ -161,8 +164,6 @@ def dataset_attribution(datasets):
161164
>>> for attr in attributions:
162165
... print(attr)
163166
"""
164-
import re
165-
166167
# Get all datasets info
167168
datasets_df = list_census_datasets(quiet=True)
168169

pycancensus/hierarchy.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""Vector hierarchy navigation functions for pycancensus."""
22

3+
import re
4+
import warnings
5+
from typing import Dict, List, Optional, Union
6+
37
import pandas as pd
48
import requests
5-
from typing import List, Dict, Optional, Union
6-
import warnings
79

810
from .settings import get_api_key
911
from .utils import validate_dataset
@@ -242,8 +244,6 @@ def find_census_vectors(
242244
)
243245
elif search_type == "regex":
244246
# Regex search
245-
import re
246-
247247
try:
248248
pattern = re.compile(query, re.IGNORECASE)
249249
mask = all_vectors["label"].str.contains(pattern, na=False) | all_vectors[
@@ -287,8 +287,6 @@ def _infer_parent_vector(vector: str, all_vectors: pd.DataFrame) -> Optional[Dic
287287
This is a fallback when explicit parent_vector column is not available.
288288
"""
289289
# Extract the numeric part of the vector ID
290-
import re
291-
292290
match = re.match(r"(v_[A-Z0-9]+_)(\d+)", vector)
293291
if not match:
294292
return None
@@ -321,8 +319,6 @@ def _infer_child_vectors(vector: str, all_vectors: pd.DataFrame) -> List[Dict]:
321319
322320
This is a fallback when explicit parent_vector column is not available.
323321
"""
324-
import re
325-
326322
match = re.match(r"(v_[A-Z0-9]+_)(\d+)", vector)
327323
if not match:
328324
return []

pycancensus/intersect_geometry.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@
22
Functions for finding census regions that intersect with geometries.
33
"""
44

5-
import json
65
import hashlib
7-
import requests
6+
import json
7+
import warnings
8+
from typing import Any, Dict, List, Optional, Union
9+
810
import geopandas as gpd
9-
from shapely.geometry import Point, Polygon, MultiPolygon
11+
import requests
12+
from shapely.geometry import MultiPolygon, Point, Polygon
1013
from shapely.ops import unary_union
11-
from typing import Union, List, Dict, Any, Optional
12-
import warnings
1314

14-
from .settings import get_api_key
15+
from .settings import get_api_key, CENSUSMAPPER_API_URL
1516
from .cache import get_cached_data, cache_data
1617
from .utils import validate_dataset
1718

@@ -176,8 +177,6 @@ def _query_intersecting_geometries_api(
176177
dataset: str, level: str, geojson_str: str, area: float, api_key: str, quiet: bool
177178
) -> Any:
178179
"""Query the CensusMapper API for intersecting geometries."""
179-
base_url = "https://censusmapper.ca/api/v1/"
180-
181180
# Prepare request data
182181
request_data = {
183182
"dataset": dataset,
@@ -192,7 +191,7 @@ def _query_intersecting_geometries_api(
192191

193192
try:
194193
response = requests.post(
195-
f"{base_url}intersecting_geographies",
194+
f"{CENSUSMAPPER_API_URL}/intersecting_geographies",
196195
json=request_data,
197196
headers={"Accept": "application/json"},
198197
timeout=60,

pycancensus/regions.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
Functions for working with census regions.
33
"""
44

5-
import requests
6-
import pandas as pd
5+
import io
76
from typing import Optional
87

9-
from .settings import get_api_key
8+
import pandas as pd
9+
import requests
10+
11+
from .settings import get_api_key, CENSUSMAPPER_DATA_URL
1012
from .utils import validate_dataset
1113
from .cache import get_cached_data, cache_data
1214

@@ -69,8 +71,7 @@ def list_census_regions(
6971
return cached_data
7072

7173
# Query API using the correct endpoint (same as R cancensus)
72-
# R cancensus uses: https://censusmapper.ca/data_sets/{dataset}/place_names.csv
73-
url = f"https://censusmapper.ca/data_sets/{dataset}/place_names.csv"
74+
url = f"{CENSUSMAPPER_DATA_URL}/{dataset}/place_names.csv"
7475

7576
try:
7677
if not quiet:
@@ -81,8 +82,6 @@ def list_census_regions(
8182
response.raise_for_status()
8283

8384
# Parse CSV response
84-
import io
85-
8685
df = pd.read_csv(io.StringIO(response.text))
8786

8887
# Map column names to match expected output format

pycancensus/resilience.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""Production-grade resilience features for pycancensus API calls."""
22

3-
import time
4-
import requests
3+
import atexit
54
import logging
6-
from typing import Dict, Any, Optional, Callable
7-
from functools import wraps
85
import random
6+
import time
7+
from functools import wraps
8+
from typing import Any, Callable, Dict, Optional
9+
10+
import requests
911

1012
logger = logging.getLogger(__name__)
1113

@@ -329,6 +331,4 @@ def close_session():
329331

330332

331333
# Cleanup on module exit
332-
import atexit
333-
334334
atexit.register(close_session)

pycancensus/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
_API_KEY = None
1313
_CACHE_PATH = None
1414

15+
# API configuration
16+
CENSUSMAPPER_API_URL = "https://censusmapper.ca/api/v1"
17+
CENSUSMAPPER_DATA_URL = "https://censusmapper.ca/data_sets"
18+
1519

1620
# Config file location
1721
def _get_config_path() -> Path:

pycancensus/vectors.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
Functions for working with census vectors (variables).
33
"""
44

5-
import requests
6-
import pandas as pd
5+
import io
6+
import warnings
77
from typing import Optional
88

9-
from .settings import get_api_key
9+
import pandas as pd
10+
import requests
11+
12+
from .settings import get_api_key, CENSUSMAPPER_API_URL
1013
from .utils import validate_dataset
1114
from .cache import get_cached_data, cache_data
1215

@@ -45,8 +48,6 @@ def label_vectors(x):
4548
>>> labels = pc.label_vectors(data)
4649
>>> print(labels)
4750
"""
48-
import warnings
49-
5051
if hasattr(x, "attrs") and "census_vectors" in x.attrs:
5152
# Convert stored dict back to DataFrame
5253
metadata = x.attrs["census_vectors"]
@@ -125,7 +126,6 @@ def list_census_vectors(
125126
return cached_data
126127

127128
# Query API using the correct endpoint (discovered via diagnostics)
128-
base_url = "https://censusmapper.ca/api/v1"
129129
params = {"dataset": dataset, "api_key": api_key}
130130

131131
try:
@@ -134,13 +134,11 @@ def list_census_vectors(
134134

135135
# Use the working CSV endpoint instead of the non-working JSON endpoint
136136
response = requests.get(
137-
f"{base_url}/vector_info.csv", params=params, timeout=30
137+
f"{CENSUSMAPPER_API_URL}/vector_info.csv", params=params, timeout=30
138138
)
139139
response.raise_for_status()
140140

141141
# Parse CSV response
142-
import io
143-
144142
df = pd.read_csv(io.StringIO(response.text))
145143

146144
# Rename columns to match expected format

0 commit comments

Comments
 (0)