Skip to content
This repository was archived by the owner on May 12, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a4b23a5
Bump io.swagger:swagger-annotations in /prime-router
dependabot[bot] Jun 8, 2025
a9a15d7
Bump org.json:json from 20250107 to 20250517 in /prime-router
dependabot[bot] Jun 8, 2025
eb37fa4
Bump org.springdoc:springdoc-openapi-starter-webflux-ui in /auth
dependabot[bot] Jun 8, 2025
4542812
Bump org.postgresql:postgresql from 42.7.4 to 42.7.7 in /prime-router
dependabot[bot] Jun 12, 2025
ae0ecc0
Added SOP for handling missing zip codes
victor-chaparro Jun 18, 2025
d07a2d6
Added SOP for handling missing zip codes
victor-chaparro Jun 18, 2025
c991142
Bump com.anyascii:anyascii from 0.3.2 to 0.3.3 in /prime-router
dependabot[bot] Jul 1, 2025
1465779
Bump the bouncycastle group in /prime-router with 3 updates
dependabot[bot] Sep 1, 2025
cb1fb7d
Merge branch 'main' into dependabot/gradle/auth/org.springdoc-springd…
wcutshall Sep 12, 2025
6d104d7
Merge pull request #18097 from CDCgov/dependabot/gradle/auth/org.spri…
wcutshall Sep 12, 2025
de4931d
Merge branch 'main' into dependabot/gradle/prime-router/io.swagger-sw…
wcutshall Sep 12, 2025
892a041
Merge pull request #18134 from CDCgov/dependabot/gradle/prime-router/…
wcutshall Sep 12, 2025
18995e7
Merge branch 'main' into dependabot/gradle/prime-router/org.json-json…
wcutshall Sep 12, 2025
054e2f7
Merge pull request #18135 from CDCgov/dependabot/gradle/prime-router/…
wcutshall Sep 12, 2025
5d23ce7
Merge branch 'main' into dependabot/gradle/prime-router/bouncycastle-…
wcutshall Sep 12, 2025
b748be1
Merge pull request #18212 from CDCgov/dependabot/gradle/prime-router/…
wcutshall Sep 12, 2025
be91860
Merge branch 'main' into dependabot/gradle/prime-router/org.postgresq…
wcutshall Sep 12, 2025
dc8ced3
Merge branch 'main' into dependabot/gradle/prime-router/com.anyascii-…
wcutshall Sep 12, 2025
d1274e9
Merge pull request #18276 from CDCgov/dependabot/gradle/prime-router/…
wcutshall Sep 12, 2025
54cc68c
Merge branch 'main' into dependabot/gradle/prime-router/org.postgresq…
wcutshall Sep 15, 2025
dfe3bca
Merge pull request #18242 from CDCgov/dependabot/gradle/prime-router/…
wcutshall Sep 15, 2025
a61716d
Merge branch 'main' into om/zipcodelookup
wcutshall Sep 15, 2025
3add823
Merge pull request #18253 from CDCgov/om/zipcodelookup
wcutshall Sep 15, 2025
d60eb91
Merge branch 'production' into deployment/2025-09-16
wcutshall Sep 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion auth/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies {
runtimeOnly("com.okta.sdk:okta-sdk-impl:22.0.1")

// Swagger
implementation("org.springdoc:springdoc-openapi-starter-webflux-ui:2.8.6")
implementation("org.springdoc:springdoc-openapi-starter-webflux-ui:2.8.8")

testImplementation("org.springframework.boot:spring-boot-starter-test")
testImplementation("org.springframework.security:spring-security-test")
Expand Down
12 changes: 6 additions & 6 deletions prime-router/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ dependencies {
implementation("org.thymeleaf:thymeleaf:3.1.3.RELEASE")
implementation("com.sendgrid:sendgrid-java:4.10.3")
implementation("com.okta.jwt:okta-jwt-verifier:0.5.7")
implementation("org.json:json:20250107")
implementation("org.json:json:20250517")
// DO NOT INCREMENT SSHJ to a newer version without first thoroughly testing it locally.
implementation("com.hierynomus:sshj:0.39.0")
implementation("com.jcraft:jsch:0.1.55")
Expand All @@ -942,9 +942,9 @@ dependencies {
implementation("org.commonmark:commonmark:0.24.0")
implementation("com.google.guava:guava:33.4.8-jre")
implementation("com.helger.as2:as2-lib:5.1.5")
implementation("org.bouncycastle:bcprov-jdk15to18:1.80")
implementation("org.bouncycastle:bcprov-jdk18on:1.80")
implementation("org.bouncycastle:bcmail-jdk15to18:1.80")
implementation("org.bouncycastle:bcprov-jdk15to18:1.81")
implementation("org.bouncycastle:bcprov-jdk18on:1.81")
implementation("org.bouncycastle:bcmail-jdk15to18:1.81")

implementation("commons-net:commons-net:3.11.1")
implementation("com.cronutils:cron-utils:9.2.1")
Expand All @@ -963,9 +963,9 @@ dependencies {
implementation("org.apache.poi:poi-ooxml:5.4.1")
implementation("org.apache.commons:commons-compress:1.27.1")
implementation("commons-io:commons-io:2.19.0")
implementation("com.anyascii:anyascii:0.3.2")
implementation("com.anyascii:anyascii:0.3.3")
// https://mvnrepository.com/artifact/io.swagger/swagger-annotations
implementation("io.swagger:swagger-annotations:1.6.15")
implementation("io.swagger:swagger-annotations:1.6.16")
implementation("io.swagger.core.v3:swagger-jaxrs2:2.2.30")
// https://mvnrepository.com/artifact/javax.ws.rs/javax.ws.rs-api
implementation("javax.ws.rs:javax.ws.rs-api:2.1.1")
Expand Down
126 changes: 126 additions & 0 deletions util/zipcode/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@

# **Standard Operating Procedure (SOP): Updating Missing ZIP Codes in the Lookup Table**

## **Purpose**

This document describes the procedure for identifying, retrieving, and updating missing ZIP code entries in the `zip-code-data.csv` file and applying those updates to the ReportStream ZIP code lookup table.

---

## **Prerequisites**

- A valid HUD API token to retrieve ZIP code data

- Obtain from: [HUD API Registration](https://www.huduser.gov/hudapi/public/register?comingfrom=1)

- Select: **USPS ZIP CODE CROSSWALK** as the dataset

- Production OKTA access to run the `update-lookup-table` command

- Access to edit and commit changes to the repository

- Python installed


---

## **Background**

Missing ZIP codes in the lookup table can lead to transformation failures and routing issues in ReportStream. This SOP ensures all ZIP codes have complete data including state, FIPS code, county, and city.

---

## **Step-by-Step Instructions**

### **Step 1: Identify Missing ZIP Codes**

1. Open the [UP Message Monitoring Dashboard](https://portal.azure.com/#@cdc.onmicrosoft.com/dashboard/arm/subscriptions/7d1e3999-6577-4cd5-b296-f518e5c8e677/resourcegroups/prime-data-hub-test/providers/microsoft.portal/dashboards/9a35cfea-cebd-4c9e-9a63-32c5d510d528).

2. Review the **[Prod] Zip Code Lookup Failures (30 Days)** tile for recent failures.

3. (Optional) Run the following Kusto query in Azure Monitor to extract ZIPs directly:

```kusto
traces
| where timestamp > ago(30d)
and message has "getStateFromZipCode()"
and isnotempty(operation_Name)
| extend cd_json = parse_json(customDimensions)
| project ZipCode = split(message, ":")[-1]
| distinct tostring(ZipCode)
```

4. Export the results to a CSV file for input into the script.

---

### **Step 2: Lookup ZIP Code Details**

#### One-time Setup

```bash
python3 -m venv env
source env/bin/activate
pip install requests
```

#### Run Lookup Script

```bash
python3 zipcode.py <input_file.csv> <HUD_API_token>
```
- This script uses the HUD USPS Crosswalk API to retrieve details (state, city, county name, and FIPS codes) for each missing ZIP code.
- The script will update your local copy of [`zip-code-data.csv`](https://chatgpt.com/prime-router/metadata/tables/local/zip-code-data.csv).

---

### **Step 3: Update the Lookup Table in Staging**

1. Login to the Prime CLI in staging:

```bash
./prime login --env staging
```

2. Update the lookup table with the new data:

```bash
./prime lookuptables create -n zip-code-data -i metadata/tables/local/zip-code-data.csv -a --env staging
```

3. Confirm the changes and verify the output for any issues.

---

### **Step 4: Promote to Production**

Once validated in staging:

1. Log in to production:

```bash
./prime login --env prod
```

2. Repeat the table update in production:

```bash
./prime lookuptables create -n zip-code-data -i metadata/tables/local/zip-code-data.csv -a --env prod
```

3. Verify completion and success.


---

### **Step 5: Commit and Push Changes to Git**

1. Create a pull request to commit the updated lookup table to version control:

```bash
git add zip-code-data.csv
git commit -m "Add missing ZIP codes to lookup table"
git push origin <your-branch>
```

2. Follow standard team procedures for code review and merge.
115 changes: 115 additions & 0 deletions util/zipcode/zip_code_lookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import requests
import csv
import sys
import os
import pandas as pd
import re
from io import StringIO

# Output file path for enriched ZIP code data
OUTPUT_CSV = "../../prime-router/metadata/tables/local/zip-code-data.csv"

# URL to fetch national county FIPS lookup data from the U.S. Census Bureau
COUNTY_URL = "https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt"

def fetch_census_county_lookup():
"""
Downloads the national county lookup file from the Census Bureau
and returns it as a pandas DataFrame with cleaned column names.
"""
print("Downloading Census county lookup..")
response = requests.get(COUNTY_URL, verify=False) # Disabling SSL verification
df = pd.read_csv(StringIO(response.text), delimiter=",", dtype=str)
df.columns = ["state_abbr", "state_fips", "county_fips", "county", "FIPS Class Code"]
print(df)
return df

def get_zip_info(zip_code, headers):
"""
Queries the HUD USPS API for information about a ZIP code.
Returns a DataFrame with city, state, and FIPS codes.
"""
url = f"https://www.huduser.gov/hudapi/public/usps?type=2&query={zip_code}"
rows = []
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
results = response.json().get("data", {}).get("results", [])
if not results:
print(f"No data for ZIP {zip_code}")
return None

for item in results:
city = item.get("city", "").title()
state = item.get("state")
county_fips = item.get("geoid")[2:5] # Characters 3–5 are the county FIPS
state_fips = item.get("geoid")[:2] # Characters 1–2 are the state FIPS
rows.append({
"zip_code": zip_code,
"city": city,
"county_fips": county_fips,
"state_fips": state_fips,
"state": state
})

return pd.DataFrame(rows)

except Exception as e:
print(f"Error processing ZIP {zip_code}: {e}")
return pd.DataFrame(rows)

def main():
# Download county lookup data from the Census
gaz = fetch_census_county_lookup()

# Validate command-line arguments
if len(sys.argv) < 3:
print("Usage: python lookup_zips_from_file.py <input_file> <USPS_API_token>")
sys.exit(1)

input_file = sys.argv[1]
token = sys.argv[2]

# Ensure the input file exists
if not os.path.exists(input_file):
print(f"File not found: {input_file}")
sys.exit(1)

# Set up authorization header for HUD API
headers = {
"accept": "application/json",
"Authorization": f"Bearer {token}"
}

# Open the input file (list of ZIP codes) and output CSV for writing
with open(input_file, "r") as infile, open(OUTPUT_CSV, "a", newline="") as outfile:
writer = csv.writer(outfile, delimiter=',')
writer.writerow(['state_fips', 'state', 'state_abbr', 'zip_code', 'county', "city"])

# Process each ZIP code in the input file
for line in infile:
zip_code = re.sub(r'\D', '', line) # Remove all non-digit characters
print("zip_code " + zip_code)

if not zip_code.isdigit():
print("NOT zip_code")
continue

print(f"Looking up ZIP: {zip_code}")
info = get_zip_info(zip_code, headers)

if not info.empty:
# Merge ZIP info with Census county data using FIPS codes
merged = pd.merge(gaz, info, on=["state_fips", "county_fips"], how="inner")

# Select and deduplicate the relevant columns
new_rows = merged[['state_fips', 'state', 'state_abbr', 'zip_code', 'county', "city"]].drop_duplicates()

print(merged.values)
writer.writerows(new_rows.values.tolist())
else:
print(f"Skipped ZIP {zip_code}")

# Entry point
if __name__ == "__main__":
main()
Loading