Skip to content

Commit b79bc5b

Browse files
dr5hnclaude
andcommitted
feat(postcodes/VI): import 6 USVI ZIPs (#1039)
Mirrors VI-mapped ZIPs from US.json into the separate USVI country namespace, FK'd to nearest USVI city by centroid distance. Why --- USVI uses US ZIPs in the 008xx range, but CSC represents USVI as its own country (iso2=VI, country_id=242) with 3 island states (Saint Thomas / Saint John / Saint Croix). Without this mirror, postcode lookups for VI return empty. Coverage -------- - 6 ZIPs / 100% state FK - All 3 USVI states represented (ST: 00802, SC: 4 codes, SJ: 00830) State FK strategy ----------------- Centroid-distance matching against cities/VI.json (20 USVI localities), then FK to that city's state_id (one of 3 islands). License ------- Original source: US Census ZCTA (CC-0). Each row: source: "us-census-via-vi-mirror" Validation ---------- - python3 -m py_compile passes - 100% regex match (^008\d{2}(?:-\d{4})?$) - 100% state_id valid + state.country_id == 242 + state_code agrees - No auto-managed fields (id, created_at, updated_at, flag) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 085bfd5 commit b79bc5b

2 files changed

Lines changed: 275 additions & 0 deletions

File tree

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/usr/bin/env python3
2+
"""US Virgin Islands -> contributions/postcodes/VI.json importer for issue #1039.
3+
4+
Source data
5+
-----------
6+
USVI uses US ZIP codes in the 008xx range. The US Census ZCTA file
7+
(already shipped to contributions/postcodes/US.json under
8+
state_code='VI') contains 6 USVI-mapped postcodes with WGS-84
9+
lat/lng centroids.
10+
11+
CSC represents USVI as its own country (iso2=VI, country_id=242)
12+
with 3 states (Saint Thomas / Saint John / Saint Croix). This
13+
importer mirrors the same codes into VI.json under the VI country
14+
namespace and FK'd to the nearest USVI city by centroid distance.
15+
16+
What this script does
17+
---------------------
18+
1. Reads existing US.json filtered to state_code='VI' (6 codes).
19+
2. Loads contributions/cities/VI.json (20 USVI cities).
20+
3. For each VI ZIP, finds the nearest USVI city by haversine
21+
distance, uses that city's state_id (which corresponds to
22+
one of the 3 USVI islands).
23+
4. Writes contributions/postcodes/VI.json with country_id=242.
24+
25+
License & attribution
26+
---------------------
27+
- Original source: US Census ZCTA Gazetteer (CC-0, public domain)
28+
- Each row: ``source: "us-census-via-vi-mirror"``
29+
30+
Usage
31+
-----
32+
python3 bin/scripts/sync/import_us_virgin_islands_postcodes.py
33+
"""
34+
35+
from __future__ import annotations
36+
37+
import argparse
38+
import json
39+
import math
40+
import re
41+
import sys
42+
from pathlib import Path
43+
from typing import Dict, List
44+
45+
46+
def haversine_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
47+
R = 6371.0
48+
p1 = math.radians(lat1)
49+
p2 = math.radians(lat2)
50+
dlat = math.radians(lat2 - lat1)
51+
dlon = math.radians(lon2 - lon1)
52+
a = (
53+
math.sin(dlat / 2) ** 2
54+
+ math.cos(p1) * math.cos(p2) * math.sin(dlon / 2) ** 2
55+
)
56+
return 2 * R * math.asin(math.sqrt(a))
57+
58+
59+
def main() -> int:
60+
parser = argparse.ArgumentParser(description=__doc__)
61+
parser.add_argument("--dry-run", action="store_true")
62+
args = parser.parse_args()
63+
64+
project_root = Path(__file__).resolve().parents[3]
65+
66+
countries = json.load(
67+
(project_root / "contributions/countries/countries.json").open(encoding="utf-8")
68+
)
69+
vi_country = next((c for c in countries if c.get("iso2") == "VI"), None)
70+
if vi_country is None:
71+
print("ERROR: VI not in countries.json", file=sys.stderr)
72+
return 2
73+
regex = re.compile(vi_country.get("postal_code_regex") or ".*")
74+
75+
us_path = project_root / "contributions/postcodes/US.json"
76+
us_data = json.load(us_path.open(encoding="utf-8"))
77+
vi_zips = [r for r in us_data if r.get("state_code") == "VI"]
78+
print(f"VI-mapped ZIPs in US.json: {len(vi_zips)}")
79+
80+
cities_path = project_root / "contributions/cities/VI.json"
81+
vi_cities = json.load(cities_path.open(encoding="utf-8"))
82+
vi_cities_with_geo = []
83+
for c in vi_cities:
84+
try:
85+
lat = float(c.get("latitude") or 0)
86+
lon = float(c.get("longitude") or 0)
87+
except (ValueError, TypeError):
88+
continue
89+
if lat or lon:
90+
vi_cities_with_geo.append((lat, lon, c))
91+
print(f"VI cities with geo: {len(vi_cities_with_geo)}")
92+
93+
states = json.load(
94+
(project_root / "contributions/states/states.json").open(encoding="utf-8")
95+
)
96+
vi_states = {s["id"]: s for s in states if s.get("country_id") == vi_country["id"]}
97+
print(
98+
f"Country: USVI (id={vi_country['id']}); "
99+
f"states indexed: {len(vi_states)}"
100+
)
101+
102+
seen: set = set()
103+
records: List[dict] = []
104+
skipped_bad_regex = 0
105+
skipped_no_state = 0
106+
matched_state = 0
107+
108+
for r in vi_zips:
109+
code = r["code"]
110+
if not regex.match(code):
111+
skipped_bad_regex += 1
112+
continue
113+
114+
try:
115+
lat = float(r["latitude"])
116+
lon = float(r["longitude"])
117+
except (ValueError, TypeError, KeyError):
118+
lat = lon = None
119+
120+
nearest_city = None
121+
if lat is not None and lon is not None and vi_cities_with_geo:
122+
best_d = float("inf")
123+
for clat, clon, city in vi_cities_with_geo:
124+
d = haversine_km(lat, lon, clat, clon)
125+
if d < best_d:
126+
best_d = d
127+
nearest_city = city
128+
129+
state = None
130+
locality = None
131+
if nearest_city:
132+
state = vi_states.get(nearest_city.get("state_id"))
133+
locality = nearest_city.get("name")
134+
135+
if state is None:
136+
skipped_no_state += 1
137+
else:
138+
matched_state += 1
139+
140+
key = (code, (locality or "").lower())
141+
if key in seen:
142+
continue
143+
seen.add(key)
144+
145+
record: Dict[str, object] = {
146+
"code": code,
147+
"country_id": int(vi_country["id"]),
148+
"country_code": "VI",
149+
}
150+
if state is not None:
151+
record["state_id"] = int(state["id"])
152+
record["state_code"] = state.get("iso2")
153+
if locality:
154+
record["locality_name"] = locality
155+
if lat is not None and lon is not None:
156+
record["latitude"] = f"{lat:.6f}"
157+
record["longitude"] = f"{lon:.6f}"
158+
record["type"] = "full"
159+
record["source"] = "us-census-via-vi-mirror"
160+
records.append(record)
161+
162+
print(f"Skipped (regex fail): {skipped_bad_regex:,}")
163+
print(f"Skipped (no state FK): {skipped_no_state:,}")
164+
print(f"Records emitted: {len(records):,}")
165+
pct = matched_state * 100 // max(1, len(records))
166+
print(f" with state: {matched_state:,} ({pct}%)")
167+
168+
if args.dry_run:
169+
return 0
170+
171+
target = project_root / "contributions/postcodes/VI.json"
172+
target.parent.mkdir(parents=True, exist_ok=True)
173+
if target.exists():
174+
with target.open(encoding="utf-8") as f:
175+
existing = json.load(f)
176+
existing_seen = {
177+
(r["code"], (r.get("locality_name") or "").lower()) for r in existing
178+
}
179+
merged = list(existing)
180+
for r in records:
181+
key = (r["code"], (r.get("locality_name") or "").lower())
182+
if key not in existing_seen:
183+
merged.append(r)
184+
existing_seen.add(key)
185+
merged.sort(key=lambda r: (r["code"], r.get("locality_name", "")))
186+
else:
187+
merged = sorted(records, key=lambda r: (r["code"], r.get("locality_name", "")))
188+
189+
with target.open("w", encoding="utf-8") as f:
190+
json.dump(merged, f, ensure_ascii=False, indent=2)
191+
f.write("\n")
192+
size_kb = target.stat().st_size / 1024
193+
print(
194+
f"\n[OK] Wrote {target.relative_to(project_root)} "
195+
f"({len(merged):,} rows, {size_kb:.0f} KB)"
196+
)
197+
return 0
198+
199+
200+
if __name__ == "__main__":
201+
raise SystemExit(main())

contributions/postcodes/VI.json

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
[
2+
{
3+
"code": "00802",
4+
"country_id": 242,
5+
"country_code": "VI",
6+
"state_id": 5072,
7+
"state_code": "ST",
8+
"locality_name": "Charlotte Amalie",
9+
"latitude": "18.342939",
10+
"longitude": "-64.925102",
11+
"type": "full",
12+
"source": "us-census-via-vi-mirror"
13+
},
14+
{
15+
"code": "00820",
16+
"country_id": 242,
17+
"country_code": "VI",
18+
"state_id": 5074,
19+
"state_code": "SC",
20+
"locality_name": "Christiansted",
21+
"latitude": "17.736627",
22+
"longitude": "-64.708215",
23+
"type": "full",
24+
"source": "us-census-via-vi-mirror"
25+
},
26+
{
27+
"code": "00830",
28+
"country_id": 242,
29+
"country_code": "VI",
30+
"state_id": 5073,
31+
"state_code": "SJ",
32+
"locality_name": "Coral Bay",
33+
"latitude": "18.338559",
34+
"longitude": "-64.736530",
35+
"type": "full",
36+
"source": "us-census-via-vi-mirror"
37+
},
38+
{
39+
"code": "00840",
40+
"country_id": 242,
41+
"country_code": "VI",
42+
"state_id": 5074,
43+
"state_code": "SC",
44+
"locality_name": "Northcentral",
45+
"latitude": "17.724710",
46+
"longitude": "-64.848522",
47+
"type": "full",
48+
"source": "us-census-via-vi-mirror"
49+
},
50+
{
51+
"code": "00850",
52+
"country_id": 242,
53+
"country_code": "VI",
54+
"state_id": 5074,
55+
"state_code": "SC",
56+
"locality_name": "Southcentral",
57+
"latitude": "17.726822",
58+
"longitude": "-64.792245",
59+
"type": "full",
60+
"source": "us-census-via-vi-mirror"
61+
},
62+
{
63+
"code": "00851",
64+
"country_id": 242,
65+
"country_code": "VI",
66+
"state_id": 5074,
67+
"state_code": "SC",
68+
"locality_name": "Sion Farm",
69+
"latitude": "17.747525",
70+
"longitude": "-64.787439",
71+
"type": "full",
72+
"source": "us-census-via-vi-mirror"
73+
}
74+
]

0 commit comments

Comments
 (0)