This repository was archived by the owner on May 12, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
Expand file tree
/
Copy pathzip_code_lookup.py
More file actions
115 lines (95 loc) · 3.94 KB
/
Copy pathzip_code_lookup.py
File metadata and controls
115 lines (95 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import requests
import csv
import sys
import os
import pandas as pd
import re
from io import StringIO
# Output file path for enriched ZIP code data
OUTPUT_CSV = "../../prime-router/metadata/tables/local/zip-code-data.csv"
# URL to fetch national county FIPS lookup data from the U.S. Census Bureau
COUNTY_URL = "https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt"
def fetch_census_county_lookup():
"""
Downloads the national county lookup file from the Census Bureau
and returns it as a pandas DataFrame with cleaned column names.
"""
print("Downloading Census county lookup..")
response = requests.get(COUNTY_URL, verify=False) # Disabling SSL verification
df = pd.read_csv(StringIO(response.text), delimiter=",", dtype=str)
df.columns = ["state_abbr", "state_fips", "county_fips", "county", "FIPS Class Code"]
print(df)
return df
def get_zip_info(zip_code, headers):
"""
Queries the HUD USPS API for information about a ZIP code.
Returns a DataFrame with city, state, and FIPS codes.
"""
url = f"https://www.huduser.gov/hudapi/public/usps?type=2&query={zip_code}"
rows = []
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
results = response.json().get("data", {}).get("results", [])
if not results:
print(f"No data for ZIP {zip_code}")
return None
for item in results:
city = item.get("city", "").title()
state = item.get("state")
county_fips = item.get("geoid")[2:5] # Characters 3–5 are the county FIPS
state_fips = item.get("geoid")[:2] # Characters 1–2 are the state FIPS
rows.append({
"zip_code": zip_code,
"city": city,
"county_fips": county_fips,
"state_fips": state_fips,
"state": state
})
return pd.DataFrame(rows)
except Exception as e:
print(f"Error processing ZIP {zip_code}: {e}")
return pd.DataFrame(rows)
def main():
# Download county lookup data from the Census
gaz = fetch_census_county_lookup()
# Validate command-line arguments
if len(sys.argv) < 3:
print("Usage: python lookup_zips_from_file.py <input_file> <USPS_API_token>")
sys.exit(1)
input_file = sys.argv[1]
token = sys.argv[2]
# Ensure the input file exists
if not os.path.exists(input_file):
print(f"File not found: {input_file}")
sys.exit(1)
# Set up authorization header for HUD API
headers = {
"accept": "application/json",
"Authorization": f"Bearer {token}"
}
# Open the input file (list of ZIP codes) and output CSV for writing
with open(input_file, "r") as infile, open(OUTPUT_CSV, "a", newline="") as outfile:
writer = csv.writer(outfile, delimiter=',')
writer.writerow(['state_fips', 'state', 'state_abbr', 'zip_code', 'county', "city"])
# Process each ZIP code in the input file
for line in infile:
zip_code = re.sub(r'\D', '', line) # Remove all non-digit characters
print("zip_code " + zip_code)
if not zip_code.isdigit():
print("NOT zip_code")
continue
print(f"Looking up ZIP: {zip_code}")
info = get_zip_info(zip_code, headers)
if not info.empty:
# Merge ZIP info with Census county data using FIPS codes
merged = pd.merge(gaz, info, on=["state_fips", "county_fips"], how="inner")
# Select and deduplicate the relevant columns
new_rows = merged[['state_fips', 'state', 'state_abbr', 'zip_code', 'county', "city"]].drop_duplicates()
print(merged.values)
writer.writerows(new_rows.values.tolist())
else:
print(f"Skipped ZIP {zip_code}")
# Entry point
if __name__ == "__main__":
main()