refactor: Move imports to top and centralize API URLs

dshkol · claude · dshkol · commit 373411eaf8e2 · 2026-01-14T22:45:10.000-08:00
This commit makes two code organization improvements:

1. Move all inline imports to the top of their respective files
   - Follows Python convention (PEP 8)
   - Improves static analysis and IDE support
   - Makes dependencies visible at a glance

2. Centralize API URLs in settings.py
   - CENSUSMAPPER_API_URL for api/v1 endpoints
   - CENSUSMAPPER_DATA_URL for data_sets endpoints
   - Single source of truth eliminates drift risk

Files modified:
- settings.py: Add URL constants
- core.py: Move json, io, hashlib imports; use URL constant
- vectors.py: Move io, warnings imports; use URL constant
- regions.py: Move io import; use URL constant
- datasets.py: Move re import; use URL constant
- hierarchy.py: Move re import to top
- intersect_geometry.py: Reorganize imports; use URL constant
- resilience.py: Move atexit import to top

All existing tests pass unchanged, confirming no behavioral impact.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/pycancensus/core.py b/pycancensus/core.py
@@ -2,14 +2,17 @@
 Core functionality for accessing Canadian Census data through the CensusMapper API.
 """
 
-import os
-import requests
+import hashlib
+import io
+import json
+import warnings
+from typing import Dict, List, Optional, Union
+
 import pandas as pd
 import geopandas as gpd
-from typing import Dict, List, Optional, Union
-import warnings
+import requests
 
-from .settings import get_api_key, get_cache_path
+from .settings import get_api_key, get_cache_path, CENSUSMAPPER_API_URL
 from .cache import get_cached_data, cache_data
 from .utils import validate_dataset, validate_level, process_regions
 from .progress import show_request_preview, create_progress_for_request
@@ -122,10 +125,9 @@ def get_census(
             return cached_data
 
     # Build API request exactly like the R package
-    base_url = "https://censusmapper.ca/api/v1/"
+    base_url = f"{CENSUSMAPPER_API_URL}/"
 
     # Format parameters exactly like the R package
-    import json
 
     # Convert regions to JSON format exactly like R package: jsonlite::toJSON(lapply(regions, as.character))
     # R package ALWAYS puts region values in arrays - this was the key missing piece!
@@ -303,8 +305,6 @@ def get_census(
 
 def _generate_cache_key(dataset, regions, vectors, level, geo_format):
     """Generate a cache key for the given parameters."""
-    import hashlib
-
     # Create a string representation of the parameters
     params_str = f"{dataset}_{regions}_{vectors}_{level}_{geo_format}"
 
@@ -355,15 +355,13 @@ def _extract_vector_metadata(df, vectors, labels):
 
         # Store metadata as dict to avoid pandas attrs comparison bug
         # Convert DataFrame to list of dicts for storage
-        df.attrs["census_vectors"] = metadata_df.to_dict(orient='records')
+        df.attrs["census_vectors"] = metadata_df.to_dict(orient="records")
 
     return df
 
 
 def _process_csv_response(csv_text, vectors, labels):
     """Process CSV API response into a pandas DataFrame."""
-    import io
-
     # Read all columns as strings initially (like R package)
     df = pd.read_csv(io.StringIO(csv_text), dtype=str, encoding="utf-8")
 
diff --git a/pycancensus/datasets.py b/pycancensus/datasets.py
@@ -2,11 +2,13 @@
 Functions for working with census datasets.
 """
 
-import requests
+import re
+from typing import List, Optional
+
 import pandas as pd
-from typing import Optional
+import requests
 
-from .settings import get_api_key
+from .settings import get_api_key, CENSUSMAPPER_API_URL
 from .cache import get_cached_data, cache_data
 
 
@@ -58,14 +60,15 @@ def list_census_datasets(
             return cached_data
 
     # Query API
-    base_url = "https://censusmapper.ca/api/v1"
     params = {"api_key": api_key, "format": "json"}
 
     try:
         if not quiet:
             print("Querying CensusMapper API for available datasets...")
 
-        response = requests.get(f"{base_url}/list_datasets", params=params, timeout=30)
+        response = requests.get(
+            f"{CENSUSMAPPER_API_URL}/list_datasets", params=params, timeout=30
+        )
         response.raise_for_status()
 
         data = response.json()
@@ -161,8 +164,6 @@ def dataset_attribution(datasets):
     >>> for attr in attributions:
     ...     print(attr)
     """
-    import re
-
     # Get all datasets info
     datasets_df = list_census_datasets(quiet=True)
 
diff --git a/pycancensus/hierarchy.py b/pycancensus/hierarchy.py
@@ -1,9 +1,11 @@
 """Vector hierarchy navigation functions for pycancensus."""
 
+import re
+import warnings
+from typing import Dict, List, Optional, Union
+
 import pandas as pd
 import requests
-from typing import List, Dict, Optional, Union
-import warnings
 
 from .settings import get_api_key
 from .utils import validate_dataset
@@ -242,8 +244,6 @@ def find_census_vectors(
         )
     elif search_type == "regex":
         # Regex search
-        import re
-
         try:
             pattern = re.compile(query, re.IGNORECASE)
             mask = all_vectors["label"].str.contains(pattern, na=False) | all_vectors[
@@ -287,8 +287,6 @@ def _infer_parent_vector(vector: str, all_vectors: pd.DataFrame) -> Optional[Dic
     This is a fallback when explicit parent_vector column is not available.
     """
     # Extract the numeric part of the vector ID
-    import re
-
     match = re.match(r"(v_[A-Z0-9]+_)(\d+)", vector)
     if not match:
         return None
@@ -321,8 +319,6 @@ def _infer_child_vectors(vector: str, all_vectors: pd.DataFrame) -> List[Dict]:
 
     This is a fallback when explicit parent_vector column is not available.
     """
-    import re
-
     match = re.match(r"(v_[A-Z0-9]+_)(\d+)", vector)
     if not match:
         return []
diff --git a/pycancensus/intersect_geometry.py b/pycancensus/intersect_geometry.py
@@ -2,16 +2,17 @@
 Functions for finding census regions that intersect with geometries.
 """
 
-import json
 import hashlib
-import requests
+import json
+import warnings
+from typing import Any, Dict, List, Optional, Union
+
 import geopandas as gpd
-from shapely.geometry import Point, Polygon, MultiPolygon
+import requests
+from shapely.geometry import MultiPolygon, Point, Polygon
 from shapely.ops import unary_union
-from typing import Union, List, Dict, Any, Optional
-import warnings
 
-from .settings import get_api_key
+from .settings import get_api_key, CENSUSMAPPER_API_URL
 from .cache import get_cached_data, cache_data
 from .utils import validate_dataset
 
@@ -176,8 +177,6 @@ def _query_intersecting_geometries_api(
     dataset: str, level: str, geojson_str: str, area: float, api_key: str, quiet: bool
 ) -> Any:
     """Query the CensusMapper API for intersecting geometries."""
-    base_url = "https://censusmapper.ca/api/v1/"
-
     # Prepare request data
     request_data = {
         "dataset": dataset,
@@ -192,7 +191,7 @@ def _query_intersecting_geometries_api(
 
     try:
         response = requests.post(
-            f"{base_url}intersecting_geographies",
+            f"{CENSUSMAPPER_API_URL}/intersecting_geographies",
             json=request_data,
             headers={"Accept": "application/json"},
             timeout=60,
diff --git a/pycancensus/regions.py b/pycancensus/regions.py
@@ -2,11 +2,13 @@
 Functions for working with census regions.
 """
 
-import requests
-import pandas as pd
+import io
 from typing import Optional
 
-from .settings import get_api_key
+import pandas as pd
+import requests
+
+from .settings import get_api_key, CENSUSMAPPER_DATA_URL
 from .utils import validate_dataset
 from .cache import get_cached_data, cache_data
 
@@ -69,8 +71,7 @@ def list_census_regions(
             return cached_data
 
     # Query API using the correct endpoint (same as R cancensus)
-    # R cancensus uses: https://censusmapper.ca/data_sets/{dataset}/place_names.csv
-    url = f"https://censusmapper.ca/data_sets/{dataset}/place_names.csv"
+    url = f"{CENSUSMAPPER_DATA_URL}/{dataset}/place_names.csv"
 
     try:
         if not quiet:
@@ -81,8 +82,6 @@ def list_census_regions(
         response.raise_for_status()
 
         # Parse CSV response
-        import io
-
         df = pd.read_csv(io.StringIO(response.text))
 
         # Map column names to match expected output format
diff --git a/pycancensus/resilience.py b/pycancensus/resilience.py
@@ -1,11 +1,13 @@
 """Production-grade resilience features for pycancensus API calls."""
 
-import time
-import requests
+import atexit
 import logging
-from typing import Dict, Any, Optional, Callable
-from functools import wraps
 import random
+import time
+from functools import wraps
+from typing import Any, Callable, Dict, Optional
+
+import requests
 
 logger = logging.getLogger(__name__)
 
@@ -329,6 +331,4 @@ def close_session():
 
 
 # Cleanup on module exit
-import atexit
-
 atexit.register(close_session)
diff --git a/pycancensus/settings.py b/pycancensus/settings.py
@@ -12,6 +12,10 @@
 _API_KEY = None
 _CACHE_PATH = None
 
+# API configuration
+CENSUSMAPPER_API_URL = "https://censusmapper.ca/api/v1"
+CENSUSMAPPER_DATA_URL = "https://censusmapper.ca/data_sets"
+
 
 # Config file location
 def _get_config_path() -> Path:
diff --git a/pycancensus/vectors.py b/pycancensus/vectors.py
@@ -2,11 +2,14 @@
 Functions for working with census vectors (variables).
 """
 
-import requests
-import pandas as pd
+import io
+import warnings
 from typing import Optional
 
-from .settings import get_api_key
+import pandas as pd
+import requests
+
+from .settings import get_api_key, CENSUSMAPPER_API_URL
 from .utils import validate_dataset
 from .cache import get_cached_data, cache_data
 
@@ -45,8 +48,6 @@ def label_vectors(x):
     >>> labels = pc.label_vectors(data)
     >>> print(labels)
     """
-    import warnings
-
     if hasattr(x, "attrs") and "census_vectors" in x.attrs:
         # Convert stored dict back to DataFrame
         metadata = x.attrs["census_vectors"]
@@ -125,7 +126,6 @@ def list_census_vectors(
             return cached_data
 
     # Query API using the correct endpoint (discovered via diagnostics)
-    base_url = "https://censusmapper.ca/api/v1"
     params = {"dataset": dataset, "api_key": api_key}
 
     try:
@@ -134,13 +134,11 @@ def list_census_vectors(
 
         # Use the working CSV endpoint instead of the non-working JSON endpoint
         response = requests.get(
-            f"{base_url}/vector_info.csv", params=params, timeout=30
+            f"{CENSUSMAPPER_API_URL}/vector_info.csv", params=params, timeout=30
         )
         response.raise_for_status()
 
         # Parse CSV response
-        import io
-
         df = pd.read_csv(io.StringIO(response.text))
 
         # Rename columns to match expected format