|
| 1 | + |
| 2 | +R version 4.4.2 (2024-10-31) -- "Pile of Leaves" |
| 3 | +Copyright (C) 2024 The R Foundation for Statistical Computing |
| 4 | +Platform: x86_64-pc-linux-gnu |
| 5 | + |
| 6 | +R is free software and comes with ABSOLUTELY NO WARRANTY. |
| 7 | +You are welcome to redistribute it under certain conditions. |
| 8 | +Type 'license()' or 'licence()' for distribution details. |
| 9 | + |
| 10 | +R is a collaborative project with many contributors. |
| 11 | +Type 'contributors()' for more information and |
| 12 | +'citation()' on how to cite R or R packages in publications. |
| 13 | + |
| 14 | +Type 'demo()' for some demos, 'help()' for on-line help, or |
| 15 | +'help.start()' for an HTML browser interface to help. |
| 16 | +Type 'q()' to quit R. |
| 17 | + |
| 18 | +- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4] |
| 19 | +> #!/usr/bin/env Rscript |
| 20 | +> |
| 21 | +> # Simple forecast data collection based on proven working patterns |
| 22 | +> library(jsonlite) |
| 23 | +> library(httr) # Use httr like in the working script |
| 24 | +> library(curl) |
| 25 | +Using libcurl 8.7.1 with OpenSSL/3.2.2 |
| 26 | + |
| 27 | +Attaching package: ‘curl’ |
| 28 | + |
| 29 | +The following object is masked from ‘package:httr’: |
| 30 | + |
| 31 | + handle_reset |
| 32 | + |
| 33 | +> library(dplyr) |
| 34 | + |
| 35 | +Attaching package: ‘dplyr’ |
| 36 | + |
| 37 | +The following objects are masked from ‘package:stats’: |
| 38 | + |
| 39 | + filter, lag |
| 40 | + |
| 41 | +The following objects are masked from ‘package:base’: |
| 42 | + |
| 43 | + intersect, setdiff, setequal, union |
| 44 | + |
| 45 | +> library(data.table) |
| 46 | + |
| 47 | +Attaching package: ‘data.table’ |
| 48 | + |
| 49 | +The following objects are masked from ‘package:dplyr’: |
| 50 | + |
| 51 | + between, first, last |
| 52 | + |
| 53 | +> library(lubridate) |
| 54 | + |
| 55 | +Attaching package: ‘lubridate’ |
| 56 | + |
| 57 | +The following objects are masked from ‘package:data.table’: |
| 58 | + |
| 59 | + hour, isoweek, mday, minute, month, quarter, second, wday, week, |
| 60 | + yday, year |
| 61 | + |
| 62 | +The following objects are masked from ‘package:base’: |
| 63 | + |
| 64 | + date, intersect, setdiff, union |
| 65 | + |
| 66 | +> |
| 67 | +> # Load API keys |
| 68 | +> source("auth/keys.R") |
| 69 | +> |
| 70 | +> cat("=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) ===\n") |
| 71 | +=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) === |
| 72 | +> cat("Started at:", format(Sys.time()), "\n") |
| 73 | +Started at: 2025-08-22 13:02:40 |
| 74 | +> |
| 75 | +> # Function to get municipality forecast using working pattern |
| 76 | +> get_municipality_forecast_v2 = function(municipio_code, municipio_name = NULL) { |
| 77 | ++ tryCatch({ |
| 78 | ++ cat("Processing", municipio_code, "\n") |
| 79 | ++ |
| 80 | ++ # Initialize curl handle with current API key |
| 81 | ++ h = new_handle() |
| 82 | ++ handle_setheaders(h, 'api_key' = get_current_api_key()) |
| 83 | ++ |
| 84 | ++ # Request forecast data URL |
| 85 | ++ response1 = curl_fetch_memory( |
| 86 | ++ paste0('https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/diaria/', municipio_code), |
| 87 | ++ handle = h |
| 88 | ++ ) |
| 89 | ++ |
| 90 | ++ if(response1$status_code == 429) { |
| 91 | ++ cat("Rate limit - rotating key...\n") |
| 92 | ++ rotate_api_key() |
| 93 | ++ handle_setheaders(h, 'api_key' = get_current_api_key()) |
| 94 | ++ Sys.sleep(3) |
| 95 | ++ |
| 96 | ++ response1 = curl_fetch_memory( |
| 97 | ++ paste0('https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/diaria/', municipio_code), |
| 98 | ++ handle = h |
| 99 | ++ ) |
| 100 | ++ } |
| 101 | ++ |
| 102 | ++ if(response1$status_code != 200) { |
| 103 | ++ cat("API request failed:", response1$status_code, "\n") |
| 104 | ++ return(NULL) |
| 105 | ++ } |
| 106 | ++ |
| 107 | ++ # Parse response to get data URL |
| 108 | ++ response_content = fromJSON(rawToChar(response1$content)) |
| 109 | ++ |
| 110 | ++ if(!"datos" %in% names(response_content)) { |
| 111 | ++ cat("No data URL in response\n") |
| 112 | ++ return(NULL) |
| 113 | ++ } |
| 114 | ++ |
| 115 | ++ # Fetch actual forecast data |
| 116 | ++ Sys.sleep(1) |
| 117 | ++ response2 = curl_fetch_memory(response_content$datos) |
| 118 | ++ |
| 119 | ++ if(response2$status_code != 200) { |
| 120 | ++ cat("Data request failed:", response2$status_code, "\n") |
| 121 | ++ return(NULL) |
| 122 | ++ } |
| 123 | ++ |
| 124 | ++ # Parse forecast data using your working approach |
| 125 | ++ this_string = rawToChar(response2$content) |
| 126 | ++ Encoding(this_string) = "latin1" |
| 127 | ++ forecast_data = fromJSON(this_string) |
| 128 | ++ |
| 129 | ++ # Extract municipality info |
| 130 | ++ municipio_nombre = forecast_data$nombre |
| 131 | ++ provincia = forecast_data$provincia |
| 132 | ++ elaborado = forecast_data$elaborado |
| 133 | ++ |
| 134 | ++ # Process all 7 days at once (wdia[[1]] contains vectors for all days) |
| 135 | ++ wdia = forecast_data$prediccion$dia |
| 136 | ++ |
| 137 | ++ # Extract vectors for all 7 days using your proven pattern |
| 138 | ++ fechas = as.Date(wdia[[1]]$fecha) # Direct conversion using as.Date |
| 139 | ++ temp_max = wdia[[1]]$temperatura$maxima |
| 140 | ++ temp_min = wdia[[1]]$temperatura$minima |
| 141 | ++ temp_avg = rowMeans(cbind(temp_max, temp_min), na.rm = TRUE) |
| 142 | ++ |
| 143 | ++ # Extract additional variables following your pattern |
| 144 | ++ humid_max = if("humedadRelativa" %in% names(wdia[[1]])) { |
| 145 | ++ wdia[[1]]$humedadRelativa$maxima |
| 146 | ++ } else rep(NA, length(fechas)) |
| 147 | ++ |
| 148 | ++ humid_min = if("humedadRelativa" %in% names(wdia[[1]])) { |
| 149 | ++ wdia[[1]]$humedadRelativa$minima |
| 150 | ++ } else rep(NA, length(fechas)) |
| 151 | ++ |
| 152 | ++ # Wind data (following your unlist/lapply pattern) |
| 153 | ++ wind_speed = if("viento" %in% names(wdia[[1]])) { |
| 154 | ++ unlist(lapply(wdia[[1]]$viento, function(x) { |
| 155 | ++ if(is.list(x) && "velocidad" %in% names(x)) { |
| 156 | ++ mean(x$velocidad, na.rm = TRUE) |
| 157 | ++ } else NA |
| 158 | ++ })) |
| 159 | ++ } else rep(NA, length(fechas)) |
| 160 | ++ |
| 161 | ++ cat("Extracted", length(fechas), "forecast days\n") |
| 162 | ++ cat("First day - Date:", as.character(fechas[1]), "Temp max:", temp_max[1], "Temp min:", temp_min[1], "Temp avg:", temp_avg[1], "\n") |
| 163 | ++ |
| 164 | ++ # Create result data frame with all 7 days |
| 165 | ++ result = data.frame( |
| 166 | ++ municipio_id = municipio_code, |
| 167 | ++ municipio_nombre = forecast_data$nombre, |
| 168 | ++ provincia = forecast_data$provincia, |
| 169 | ++ elaborado = forecast_data$elaborado, |
| 170 | ++ fecha = fechas, |
| 171 | ++ temp_max = temp_max, |
| 172 | ++ temp_min = temp_min, |
| 173 | ++ temp_avg = temp_avg, |
| 174 | ++ humid_max = humid_max, |
| 175 | ++ humid_min = humid_min, |
| 176 | ++ wind_speed = wind_speed, |
| 177 | ++ stringsAsFactors = FALSE |
| 178 | ++ ) |
| 179 | ++ |
| 180 | ++ return(result) |
| 181 | ++ |
| 182 | ++ }, error = function(e) { |
| 183 | ++ cat("✗ ERROR:", e$message, "\n") |
| 184 | ++ return(NULL) |
| 185 | ++ }) |
| 186 | ++ } |
| 187 | +> |
| 188 | +> # Load municipality data |
| 189 | +> cat("Loading municipality codes...\n") |
| 190 | +Loading municipality codes... |
| 191 | +> municipalities_data = fread("data/input/municipalities.csv.gz") |
| 192 | +> cat("Loaded", nrow(municipalities_data), "municipalities\n") |
| 193 | +Loaded 8129 municipalities |
| 194 | +> |
| 195 | +> # Use small sample for testing |
| 196 | +> SAMPLE_SIZE = 2 |
| 197 | +> working_municipalities = head(municipalities_data$CUMUN, SAMPLE_SIZE) |
| 198 | +> names(working_municipalities) = head(municipalities_data$NAMEUNIT, SAMPLE_SIZE) |
| 199 | +> |
| 200 | +> cat("Testing with", SAMPLE_SIZE, "municipalities\n\n") |
| 201 | +Testing with 2 municipalities |
| 202 | + |
| 203 | +> |
| 204 | +> # Collect forecasts |
| 205 | +> all_forecasts = list() |
| 206 | +> successful_collections = 0 |
| 207 | +> |
| 208 | +> for(i in seq_along(working_municipalities)) { |
| 209 | ++ city = names(working_municipalities)[i] |
| 210 | ++ code = working_municipalities[i] |
| 211 | ++ |
| 212 | ++ cat("Municipality", i, "of", length(working_municipalities), ":", city, "(", code, ")\n") |
| 213 | ++ |
| 214 | ++ if(i > 1) { |
| 215 | ++ cat("Waiting 15 seconds...\n") |
| 216 | ++ Sys.sleep(15) # Longer delay to avoid rate limits |
| 217 | ++ } |
| 218 | ++ |
| 219 | ++ forecast_data = get_municipality_forecast_v2(code, city) |
| 220 | ++ |
| 221 | ++ if(!is.null(forecast_data)) { |
| 222 | ++ all_forecasts[[code]] = forecast_data |
| 223 | ++ successful_collections = successful_collections + 1 |
| 224 | ++ } |
| 225 | ++ |
| 226 | ++ cat("\n") |
| 227 | ++ } |
| 228 | +Municipality 1 of 2 : Solórzano ( 39084 ) |
| 229 | +Processing 39084 |
| 230 | +Extracted 7 forecast days |
| 231 | +First day - Date: 2025-08-22 Temp max: 23 Temp min: 13 Temp avg: 18 |
| 232 | + |
| 233 | +Municipality 2 of 2 : Pino del Oro ( 49157 ) |
| 234 | +Waiting 15 seconds... |
| 235 | +Processing 49157 |
| 236 | +Extracted 7 forecast days |
| 237 | +First day - Date: 2025-08-22 Temp max: 29 Temp min: 12 Temp avg: 20.5 |
| 238 | + |
| 239 | +Warning messages: |
| 240 | +1: In data.frame(municipio_id = municipio_code, municipio_nombre = forecast_data$nombre, : |
| 241 | + row names were found from a short variable and have been discarded |
| 242 | +2: In data.frame(municipio_id = municipio_code, municipio_nombre = forecast_data$nombre, : |
| 243 | + row names were found from a short variable and have been discarded |
| 244 | +> |
| 245 | +> cat("=== RESULTS ===\n") |
| 246 | +=== RESULTS === |
| 247 | +> cat("Municipalities attempted:", length(working_municipalities), "\n") |
| 248 | +Municipalities attempted: 2 |
| 249 | +> cat("Successful collections:", successful_collections, "\n") |
| 250 | +Successful collections: 2 |
| 251 | +> |
| 252 | +> if(length(all_forecasts) > 0) { |
| 253 | ++ final_data = do.call(rbind, all_forecasts) |
| 254 | ++ |
| 255 | ++ # Add collection timestamp |
| 256 | ++ final_data$collected_at = Sys.time() |
| 257 | ++ |
| 258 | ++ cat("Total forecast records:", nrow(final_data), "\n") |
| 259 | ++ cat("Date range:", as.character(min(final_data$fecha)), "to", as.character(max(final_data$fecha)), "\n") |
| 260 | ++ cat("Sample data:\n") |
| 261 | ++ print(head(final_data, 3)) |
| 262 | ++ |
| 263 | ++ # Ensure output directory exists |
| 264 | ++ dir.create("data/output", recursive = TRUE, showWarnings = FALSE) |
| 265 | ++ |
| 266 | ++ # Save the data |
| 267 | ++ output_file = paste0("data/output/municipal_forecasts_", Sys.Date(), ".csv") |
| 268 | ++ write.csv(final_data, output_file, row.names = FALSE) |
| 269 | ++ cat("Data saved to:", output_file, "\n") |
| 270 | ++ } else { |
| 271 | ++ cat("No data collected\n") |
| 272 | ++ } |
| 273 | +Total forecast records: 14 |
| 274 | +Date range: 2025-08-22 to 2025-08-28 |
| 275 | +Sample data: |
| 276 | + municipio_id municipio_nombre provincia elaborado fecha |
| 277 | +1 39084 Solórzano Cantabria 2025-08-22T09:43:07 2025-08-22 |
| 278 | +2 39084 Solórzano Cantabria 2025-08-22T09:43:07 2025-08-23 |
| 279 | +3 39084 Solórzano Cantabria 2025-08-22T09:43:07 2025-08-24 |
| 280 | + temp_max temp_min temp_avg humid_max humid_min wind_speed collected_at |
| 281 | +1 23 13 18.0 100 70 2.857143 2025-08-22 13:02:58 |
| 282 | +2 25 12 18.5 95 65 16.428571 2025-08-22 13:02:58 |
| 283 | +3 27 15 21.0 95 70 10.000000 2025-08-22 13:02:58 |
| 284 | +Data saved to: data/output/municipal_forecasts_2025-08-22.csv |
| 285 | +> |
| 286 | +> cat("Completed at:", format(Sys.time()), "\n") |
| 287 | +Completed at: 2025-08-22 13:02:58 |
| 288 | +> |
| 289 | +> proc.time() |
| 290 | + user system elapsed |
| 291 | + 4.740 1.237 34.262 |
0 commit comments