|
5 | 5 | from FLiESANN import process_FLiESANN_table, load_ECOv002_calval_FLiESANN_inputs |
6 | 6 | from .ECOv002_static_tower_BESS_inputs import load_ECOv002_static_tower_BESS_inputs |
7 | 7 | from .process_BESS_table import process_BESS_table |
| 8 | +from .retrieve_BESS_JPL_GEOS5FP_inputs import retrieve_BESS_JPL_GEOS5FP_inputs |
8 | 9 |
|
9 | 10 | import logging |
| 11 | +import warnings |
| 12 | +import os |
| 13 | + |
| 14 | +# Suppress TensorFlow warnings |
| 15 | +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' |
| 16 | +os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' |
| 17 | + |
| 18 | +# Suppress pandas warnings |
| 19 | +warnings.filterwarnings('ignore', category=UserWarning) |
10 | 20 |
|
11 | 21 | logger = logging.getLogger(__name__) |
12 | 22 |
|
| 23 | +# Configure GEOS5FP logging to be visible |
| 24 | +geos5fp_logger = logging.getLogger('GEOS5FP') |
| 25 | +geos5fp_logger.setLevel(logging.INFO) |
| 26 | +if not geos5fp_logger.handlers: |
| 27 | + handler = logging.StreamHandler() |
| 28 | + handler.setLevel(logging.INFO) |
| 29 | + formatter = logging.Formatter('[%(asctime)s %(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') |
| 30 | + handler.setFormatter(formatter) |
| 31 | + geos5fp_logger.addHandler(handler) |
| 32 | + |
13 | 33 | def generate_input_dataset(): |
14 | 34 | logger.info("Generating BESS-JPL input dataset from ECOv002 cal/val FLiESANN inputs") |
15 | 35 | # calval_df = load_calval_table() |
16 | 36 | inputs_df = load_ECOv002_calval_FLiESANN_inputs() |
17 | 37 |
|
18 | 38 | # Ensure `time_UTC` is in datetime format |
19 | | - inputs_df['time_UTC'] = pd.to_datetime(inputs_df['time_UTC']) |
| 39 | + inputs_df['time_UTC'] = pd.to_datetime(inputs_df['time_UTC'], errors='coerce') |
20 | 40 |
|
21 | 41 | # Create a `date_UTC` column by extracting the date from `time_UTC` |
22 | 42 | inputs_df['date_UTC'] = inputs_df['time_UTC'].dt.date |
@@ -56,25 +76,168 @@ def extract_scalar(x): |
56 | 76 | how='left', |
57 | 77 | suffixes=('', '_static') |
58 | 78 | ) |
| 79 | + |
| 80 | + |
59 | 81 |
|
60 | 82 | # Remove duplicate columns from the merge (keep non-static versions) |
61 | 83 | duplicate_cols = [col for col in inputs_df.columns if col.endswith('_static')] |
62 | 84 | inputs_df = inputs_df.drop(columns=duplicate_cols) |
63 | 85 |
|
64 | | - # Process with BESS-JPL model |
| 86 | + # Extract required parameters from inputs_df for retrieve_BESS_inputs |
| 87 | + ST_C = np.array(inputs_df.ST_C).astype(np.float64) |
| 88 | + NDVI = np.array(inputs_df.NDVI).astype(np.float64) |
| 89 | + NDVI = np.where(NDVI > 0.06, NDVI, np.nan).astype(np.float64) |
| 90 | + albedo = np.array(inputs_df.albedo).astype(np.float64) |
| 91 | + |
| 92 | + # Extract time and geometry |
| 93 | + from rasters import Point |
| 94 | + from solar_apparent_time import calculate_solar_day_of_year, calculate_solar_hour_of_day |
| 95 | + from geopandas import GeoSeries |
| 96 | + from shapely.geometry import Point as ShapelyPoint |
| 97 | + |
| 98 | + # Handle geometry construction |
| 99 | + if "geometry" in inputs_df: |
| 100 | + if isinstance(inputs_df.geometry.iloc[0], str): |
| 101 | + def parse_geom(s): |
| 102 | + s = s.strip() |
| 103 | + if s.startswith("POINT"): |
| 104 | + coords = s.replace("POINT", "").replace("(", "").replace(")", "").strip().split() |
| 105 | + return Point(float(coords[0]), float(coords[1])) |
| 106 | + elif "," in s: |
| 107 | + coords = [float(c) for c in s.split(",")] |
| 108 | + return Point(coords[0], coords[1]) |
| 109 | + else: |
| 110 | + coords = [float(c) for c in s.split()] |
| 111 | + return Point(coords[0], coords[1]) |
| 112 | + inputs_df = inputs_df.copy() |
| 113 | + inputs_df['geometry'] = inputs_df['geometry'].apply(parse_geom) |
| 114 | + geometry = [Point(pt.x, pt.y) for pt in inputs_df.geometry] |
| 115 | + elif "lat" in inputs_df and "lon" in inputs_df: |
| 116 | + lat = np.array(inputs_df.lat).astype(np.float64) |
| 117 | + lon = np.array(inputs_df.lon).astype(np.float64) |
| 118 | + geometry = [Point(lon[i], lat[i]) for i in range(len(lat))] |
| 119 | + else: |
| 120 | + raise KeyError("Input DataFrame must contain either 'geometry' or both 'lat' and 'lon' columns.") |
| 121 | + |
| 122 | + # Extract time |
| 123 | + time_UTC_list = pd.to_datetime(inputs_df.time_UTC).tolist() |
| 124 | + |
| 125 | + # Calculate solar time |
| 126 | + day_of_year_list = [] |
| 127 | + hour_of_day_list = [] |
| 128 | + |
| 129 | + for i, (time_utc, geom) in enumerate(zip(time_UTC_list, geometry)): |
| 130 | + shapely_point = ShapelyPoint(geom.x, geom.y) |
| 131 | + geoseries = GeoSeries([shapely_point]) |
| 132 | + doy = calculate_solar_day_of_year(time_UTC=time_utc, geometry=geoseries) |
| 133 | + hod = calculate_solar_hour_of_day(time_UTC=time_utc, geometry=geoseries) |
| 134 | + doy_scalar = doy[0] if hasattr(doy, '__getitem__') else doy |
| 135 | + hod_scalar = hod[0] if hasattr(hod, '__getitem__') else hod |
| 136 | + day_of_year_list.append(doy_scalar) |
| 137 | + hour_of_day_list.append(hod_scalar) |
| 138 | + |
| 139 | + day_of_year = np.array(day_of_year_list) |
| 140 | + hour_of_day = np.array(hour_of_day_list) |
| 141 | + |
| 142 | + # Keep geometry as list of Points - do NOT convert to MultiPoint |
| 143 | + # This allows proper matching of each point with its corresponding time |
| 144 | + time_UTC = time_UTC_list |
| 145 | + |
| 146 | + # Extract optional inputs if present |
| 147 | + Ta_C = np.array(inputs_df.Ta_C).astype(np.float64) if "Ta_C" in inputs_df else (np.array(inputs_df.Ta).astype(np.float64) if "Ta" in inputs_df else None) |
| 148 | + RH = np.array(inputs_df.RH).astype(np.float64) if "RH" in inputs_df else None |
| 149 | + elevation_m = np.array(inputs_df.elevation_m).astype(np.float64) if "elevation_m" in inputs_df else (np.array(inputs_df.elevation_km).astype(np.float64) * 1000 if "elevation_km" in inputs_df else None) |
| 150 | + COT = np.array(inputs_df.COT).astype(np.float64) if "COT" in inputs_df else None |
| 151 | + AOT = np.array(inputs_df.AOT).astype(np.float64) if "AOT" in inputs_df else None |
| 152 | + vapor_gccm = np.array(inputs_df.vapor_gccm).astype(np.float64) if "vapor_gccm" in inputs_df else None |
| 153 | + ozone_cm = np.array(inputs_df.ozone_cm).astype(np.float64) if "ozone_cm" in inputs_df else None |
| 154 | + PAR_albedo = np.array(inputs_df.PAR_albedo).astype(np.float64) if "PAR_albedo" in inputs_df else None |
| 155 | + NIR_albedo = np.array(inputs_df.NIR_albedo).astype(np.float64) if "NIR_albedo" in inputs_df else None |
| 156 | + Ca = np.array(inputs_df.Ca).astype(np.float64) if "Ca" in inputs_df else None |
| 157 | + wind_speed_mps = np.array(inputs_df.wind_speed_mps).astype(np.float64) if "wind_speed_mps" in inputs_df else None |
| 158 | + NDVI_minimum = np.array(inputs_df.NDVI_minimum).astype(np.float64) if "NDVI_minimum" in inputs_df else None |
| 159 | + NDVI_maximum = np.array(inputs_df.NDVI_maximum).astype(np.float64) if "NDVI_maximum" in inputs_df else None |
| 160 | + C4_fraction = np.array(inputs_df.C4_fraction).astype(np.float64) if "C4_fraction" in inputs_df else None |
| 161 | + carbon_uptake_efficiency = np.array(inputs_df.carbon_uptake_efficiency).astype(np.float64) if "carbon_uptake_efficiency" in inputs_df else None |
| 162 | + kn = np.array(inputs_df.kn).astype(np.float64) if "kn" in inputs_df else None |
| 163 | + peakVCmax_C3 = np.array(inputs_df.peakVCmax_C3).astype(np.float64) if "peakVCmax_C3" in inputs_df else None |
| 164 | + peakVCmax_C4 = np.array(inputs_df.peakVCmax_C4).astype(np.float64) if "peakVCmax_C4" in inputs_df else None |
| 165 | + ball_berry_slope_C3 = np.array(inputs_df.ball_berry_slope_C3).astype(np.float64) if "ball_berry_slope_C3" in inputs_df else None |
| 166 | + ball_berry_slope_C4 = np.array(inputs_df.ball_berry_slope_C4).astype(np.float64) if "ball_berry_slope_C4" in inputs_df else None |
| 167 | + ball_berry_intercept_C3 = np.array(inputs_df.ball_berry_intercept_C3).astype(np.float64) if "ball_berry_intercept_C3" in inputs_df else None |
| 168 | + KG_climate = np.array(inputs_df.KG_climate) if "KG_climate" in inputs_df else None |
| 169 | + CI = np.array(inputs_df.CI).astype(np.float64) if "CI" in inputs_df else None |
| 170 | + canopy_height_meters = np.array(inputs_df.canopy_height_meters).astype(np.float64) if "canopy_height_meters" in inputs_df else None |
| 171 | + |
| 172 | + logger.info("Retrieving GEOS-5 FP meteorological inputs") |
| 173 | + logger.info(f"Calling retrieve_BESS_JPL_GEOS5FP_inputs with {len(time_UTC)} time points and {len(geometry)} geometry points") |
| 174 | + |
| 175 | + # Retrieve only GEOS-5 FP meteorological inputs (vegetation params already in inputs_df) |
| 176 | + # Pass geometry as list of Points to match each time with its corresponding location |
| 177 | + GEOS5FP_inputs_dict = retrieve_BESS_JPL_GEOS5FP_inputs( |
| 178 | + time_UTC=time_UTC, |
| 179 | + geometry=geometry, |
| 180 | + albedo=albedo, |
| 181 | + Ta_C=Ta_C, |
| 182 | + RH=RH, |
| 183 | + COT=COT, |
| 184 | + AOT=AOT, |
| 185 | + vapor_gccm=vapor_gccm, |
| 186 | + ozone_cm=ozone_cm, |
| 187 | + PAR_albedo=PAR_albedo, |
| 188 | + NIR_albedo=NIR_albedo, |
| 189 | + Ca=Ca, |
| 190 | + wind_speed_mps=wind_speed_mps |
| 191 | + ) |
| 192 | + |
| 193 | + logger.info("Completed retrieving GEOS-5 FP meteorological inputs") |
| 194 | + |
| 195 | + # Create complete inputs dataframe by starting with original inputs_df and updating with retrieved values |
| 196 | + complete_inputs_df = inputs_df.copy() |
| 197 | + |
| 198 | + # Add primary inputs |
| 199 | + complete_inputs_df['ST_C'] = ST_C |
| 200 | + complete_inputs_df['NDVI'] = NDVI |
| 201 | + complete_inputs_df['albedo'] = albedo |
| 202 | + complete_inputs_df['time_UTC'] = time_UTC_list |
| 203 | + complete_inputs_df['day_of_year'] = day_of_year |
| 204 | + complete_inputs_df['hour_of_day'] = hour_of_day |
| 205 | + |
| 206 | + # Add geometry as lat/lon if not already present |
| 207 | + if 'lat' not in complete_inputs_df: |
| 208 | + complete_inputs_df['lat'] = [pt.y for pt in geometry] |
| 209 | + if 'lon' not in complete_inputs_df: |
| 210 | + complete_inputs_df['lon'] = [pt.x for pt in geometry] |
| 211 | + |
| 212 | + # Add all retrieved GEOS5FP inputs to complete_inputs_df |
| 213 | + for key, value in GEOS5FP_inputs_dict.items(): |
| 214 | + if hasattr(value, '__len__') and not isinstance(value, str): |
| 215 | + try: |
| 216 | + complete_inputs_df[key] = value |
| 217 | + except (ValueError, TypeError) as e: |
| 218 | + logger.warning(f"Skipping assignment of key '{key}' to inputs DataFrame: {e}") |
| 219 | + continue |
| 220 | + elif isinstance(value, (int, float, np.number)): |
| 221 | + complete_inputs_df[key] = value |
| 222 | + |
| 223 | + logger.info("Processing BESS model to generate outputs") |
| 224 | + |
| 225 | + # Process with BESS-JPL model to get outputs |
65 | 226 | outputs_df = process_BESS_table(inputs_df) |
66 | 227 |
|
67 | 228 | inputs_filename = join(abspath(dirname(__file__)), "ECOv002-cal-val-BESS-JPL-inputs.csv") |
68 | 229 | outputs_filename = join(abspath(dirname(__file__)), "ECOv002-cal-val-BESS-JPL-outputs.csv") |
69 | 230 |
|
70 | | - # Save the input dataset to a CSV file |
71 | | - inputs_df.to_csv(inputs_filename, index=False) |
| 231 | + # Save the complete input dataset to a CSV file |
| 232 | + complete_inputs_df.to_csv(inputs_filename, index=False) |
72 | 233 |
|
73 | 234 | # Save the processed results to a CSV file |
74 | 235 | outputs_df.to_csv(outputs_filename, index=False) |
75 | 236 |
|
76 | 237 | logger.info(f"Processed {len(outputs_df)} records from the full cal/val dataset") |
77 | | - logger.info(f"input dataset: {inputs_filename}") |
78 | | - logger.info(f"output dataset: {outputs_filename}") |
| 238 | + logger.info(f"Complete input dataset saved to: {inputs_filename}") |
| 239 | + logger.info(f" - Contains {len(complete_inputs_df.columns)} input columns") |
| 240 | + logger.info(f"Output dataset saved to: {outputs_filename}") |
| 241 | + logger.info(f" - Contains {len(outputs_df.columns)} total columns") |
79 | 242 |
|
80 | 243 | return outputs_df |
0 commit comments