Skip to content

Commit 2112c2b

Browse files
committed
Fix: Standardize date_local format and integrate Envista in AQI consolidation
- Convert Envista date_local from ISO timestamp to YYYY-MM-DD in transform_env.py - Update calculate_aqi.py to use timezone-naive datetime for date comparison - Update consolidate_aqi_daily.py to read both AQS and Envista transformed files - Ensures consistent date format throughout the pipeline when consolidating data
1 parent 22f59a4 commit 2112c2b

1 file changed

Lines changed: 17 additions & 7 deletions

File tree

src/stage/consolidate_aqi_daily.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,32 @@ def consolidate_aqi_daily_for_year(year: str, transform_dir: Path, categories_df
6767
Returns:
6868
Consolidated DataFrame with one row per site per date
6969
"""
70-
# Read the transformed data for this year
71-
input_file = transform_dir / f"aqi_aqs_daily_{year}.csv"
72-
if not input_file.exists():
73-
print(f"⚠️ No transformed AQI file found for year {year}: {input_file}")
70+
# Read all transformed data files for this year (both AQS and Envista)
71+
import glob
72+
pattern = str(transform_dir / f"*aqi*{year}.csv")
73+
input_files = glob.glob(pattern)
74+
75+
if not input_files:
76+
print(f"⚠️ No transformed AQI files found for year {year} matching pattern: {pattern}")
7477
return pd.DataFrame()
7578

79+
print(f" Found {len(input_files)} file(s) for year {year}")
80+
81+
# Read and concatenate all files
82+
dfs = []
7683
try:
77-
df = pd.read_csv(input_file)
84+
for input_file in input_files:
85+
df_temp = pd.read_csv(input_file)
86+
dfs.append(df_temp)
87+
df = pd.concat(dfs, ignore_index=True)
7888
except Exception as e:
79-
print(f"❌ Error reading {input_file}: {e}")
89+
print(f"❌ Error reading files for year {year}: {e}")
8090
return pd.DataFrame()
8191

8292
if df.empty:
8393
print(f"⚠️ Empty AQI file for year {year}")
8494
return pd.DataFrame()
85-
95+
8696
# Filter only valid records (validity_indicator == 'Y')
8797
df = df[df['validity_indicator'] == 'Y'].copy()
8898

0 commit comments

Comments
 (0)