|
| 1 | + |
| 2 | +R version 4.4.2 (2024-10-31) -- "Pile of Leaves" |
| 3 | +Copyright (C) 2024 The R Foundation for Statistical Computing |
| 4 | +Platform: x86_64-pc-linux-gnu |
| 5 | + |
| 6 | +R is free software and comes with ABSOLUTELY NO WARRANTY. |
| 7 | +You are welcome to redistribute it under certain conditions. |
| 8 | +Type 'license()' or 'licence()' for distribution details. |
| 9 | + |
| 10 | +R is a collaborative project with many contributors. |
| 11 | +Type 'contributors()' for more information and |
| 12 | +'citation()' on how to cite R or R packages in publications. |
| 13 | + |
| 14 | +Type 'demo()' for some demos, 'help()' for on-line help, or |
| 15 | +'help.start()' for an HTML browser interface to help. |
| 16 | +Type 'q()' to quit R. |
| 17 | + |
| 18 | +- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4] |
| 19 | +> # get_latest_data_expanded.R |
| 20 | +> # ---------------------- |
| 21 | +> # Purpose: Download and update the latest observation data from AEMET stations across Spain. |
| 22 | +> # |
| 23 | +> # This script fetches weather observations from the AEMET OpenData API using the 7 core variables |
| 24 | +> # that are compatible across current observations, historical data, and forecast endpoints. |
| 25 | +> # |
| 26 | +> # Core Variables (Safe for all endpoints): |
| 27 | +> # - ta: Air temperature (°C) |
| 28 | +> # - tamax: Maximum temperature (°C) |
| 29 | +> # - tamin: Minimum temperature (°C) |
| 30 | +> # - hr: Relative humidity (%) |
| 31 | +> # - prec: Precipitation (mm) |
| 32 | +> # - vv: Wind speed (km/h) |
| 33 | +> # - pres: Atmospheric pressure (hPa) |
| 34 | +> # |
| 35 | +> # Main Steps: |
| 36 | +> # 1. Load dependencies and API key. |
| 37 | +> # 2. Define functions to request and process data from the AEMET API, with error handling and retries. |
| 38 | +> # 3. Download the latest data and reshape it for storage. |
| 39 | +> # 4. Append new data to the local CSV file, ensuring no duplicates. |
| 40 | +> # |
| 41 | +> # Usage: |
| 42 | +> # - Requires a valid API key in 'auth/keys.R' as 'my_api_key'. |
| 43 | +> # - Run as an R script. Output is written to 'data/spain_weather_expanded.csv.gz'. |
| 44 | +> # |
| 45 | +> # Dependencies: tidyverse, lubridate, curl, jsonlite, data.table, R.utils |
| 46 | +> # |
| 47 | +> # Author: John Palmer |
| 48 | +> # Date: 2025-08-20 (Updated for 7-variable expansion) |
| 49 | +> |
| 50 | +> # Title #### |
| 51 | +> # For downloading latest observation data from AEMET stations all over Spain. This needs to be run at least every 12 hours, but better to run it every 2 because of API limits, failures etc. |
| 52 | +> |
| 53 | +> rm(list=ls()) |
| 54 | +> |
| 55 | +> |
| 56 | +> # Dependencies #### |
| 57 | +> library(tidyverse) |
| 58 | +── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── |
| 59 | +✔ dplyr 1.1.4 ✔ readr 2.1.5 |
| 60 | +✔ forcats 1.0.0 ✔ stringr 1.5.1 |
| 61 | +✔ ggplot2 3.5.2 ✔ tibble 3.3.0 |
| 62 | +✔ lubridate 1.9.4 ✔ tidyr 1.3.1 |
| 63 | +✔ purrr 1.1.0 |
| 64 | +── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── |
| 65 | +✖ dplyr::filter() masks stats::filter() |
| 66 | +✖ dplyr::lag() masks stats::lag() |
| 67 | +ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors |
| 68 | +> library(lubridate) |
| 69 | +> library(curl) |
| 70 | +Using libcurl 8.7.1 with OpenSSL/3.2.2 |
| 71 | + |
| 72 | +Attaching package: ‘curl’ |
| 73 | + |
| 74 | +The following object is masked from ‘package:readr’: |
| 75 | + |
| 76 | + parse_date |
| 77 | + |
| 78 | +> library(jsonlite) |
| 79 | + |
| 80 | +Attaching package: ‘jsonlite’ |
| 81 | + |
| 82 | +The following object is masked from ‘package:purrr’: |
| 83 | + |
| 84 | + flatten |
| 85 | + |
| 86 | +> library(data.table) |
| 87 | + |
| 88 | +Attaching package: ‘data.table’ |
| 89 | + |
| 90 | +The following objects are masked from ‘package:lubridate’: |
| 91 | + |
| 92 | + hour, isoweek, mday, minute, month, quarter, second, wday, week, |
| 93 | + yday, year |
| 94 | + |
| 95 | +The following objects are masked from ‘package:dplyr’: |
| 96 | + |
| 97 | + between, first, last |
| 98 | + |
| 99 | +The following object is masked from ‘package:purrr’: |
| 100 | + |
| 101 | + transpose |
| 102 | + |
| 103 | +> library(R.utils) |
| 104 | +Loading required package: R.oo |
| 105 | +Loading required package: R.methodsS3 |
| 106 | +R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help. |
| 107 | +R.oo v1.27.1 (2025-05-02 21:00:05 UTC) successfully loaded. See ?R.oo for help. |
| 108 | + |
| 109 | +Attaching package: ‘R.oo’ |
| 110 | + |
| 111 | +The following object is masked from ‘package:R.methodsS3’: |
| 112 | + |
| 113 | + throw |
| 114 | + |
| 115 | +The following objects are masked from ‘package:methods’: |
| 116 | + |
| 117 | + getClasses, getMethods |
| 118 | + |
| 119 | +The following objects are masked from ‘package:base’: |
| 120 | + |
| 121 | + attach, detach, load, save |
| 122 | + |
| 123 | +R.utils v2.13.0 (2025-02-24 21:20:02 UTC) successfully loaded. See ?R.utils for help. |
| 124 | + |
| 125 | +Attaching package: ‘R.utils’ |
| 126 | + |
| 127 | +The following object is masked from ‘package:jsonlite’: |
| 128 | + |
| 129 | + validate |
| 130 | + |
| 131 | +The following object is masked from ‘package:tidyr’: |
| 132 | + |
| 133 | + extract |
| 134 | + |
| 135 | +The following object is masked from ‘package:utils’: |
| 136 | + |
| 137 | + timestamp |
| 138 | + |
| 139 | +The following objects are masked from ‘package:base’: |
| 140 | + |
| 141 | + cat, commandArgs, getOption, isOpen, nullfile, parse, use, warnings |
| 142 | + |
| 143 | +> |
| 144 | +> # Set locale to UTF-8 for proper encoding handling |
| 145 | +> Sys.setlocale("LC_ALL", "en_US.UTF-8") |
| 146 | +[1] "" |
| 147 | +Warning message: |
| 148 | +In Sys.setlocale("LC_ALL", "en_US.UTF-8") : |
| 149 | + OS reports request to set locale to "en_US.UTF-8" cannot be honored |
| 150 | +> |
| 151 | +> # If you want to prevent concurrent runs of this script, set PREVENT_CONCURRENT_RUNS to TRUE. |
| 152 | +> PREVENT_CONCURRENT_RUNS = FALSE |
| 153 | +> |
| 154 | +> if(PREVENT_CONCURRENT_RUNS) { |
| 155 | ++ # Prevent concurrent runs by creating a lockfile |
| 156 | ++ # Lockfile management |
| 157 | ++ lockfile <- "tmp/get_latest_data_expanded.lock" |
| 158 | ++ # Check if lockfile exists |
| 159 | ++ if (file.exists(lockfile)) { |
| 160 | ++ cat("Another run is in progress. Exiting.\n") |
| 161 | ++ quit(save = "no", status = 0) |
| 162 | ++ } |
| 163 | ++ # Create a temporary directory and lockfile |
| 164 | ++ dir.create("tmp", showWarnings = FALSE) |
| 165 | ++ file.create(lockfile) |
| 166 | ++ # Ensure lockfile is removed on exit |
| 167 | ++ on.exit(unlink(lockfile), add = TRUE) |
| 168 | ++ } |
| 169 | +> |
| 170 | +> |
| 171 | +> # Load API keys |
| 172 | +> source("auth/keys.R") |
| 173 | +> |
| 174 | +> # aemet_api_request: Fetches latest weather observation data from AEMET API and returns as tibble. |
| 175 | +> # Only selects the 7 core variables that are compatible across all endpoints. |
| 176 | +> aemet_api_request = function(){ |
| 177 | ++ req = curl_fetch_memory(paste0('https://opendata.aemet.es/opendata/api/observacion/convencional/todas'), handle=h) |
| 178 | ++ wurl = fromJSON(rawToChar(req$content))$datos |
| 179 | ++ req = curl_fetch_memory(wurl) |
| 180 | ++ this_string = rawToChar(req$content) |
| 181 | ++ Encoding(this_string) = "latin1" |
| 182 | ++ wdia = fromJSON(this_string) %>% |
| 183 | ++ as_tibble() %>% |
| 184 | ++ dplyr::select(fint, idema, ta, tamax, tamin, hr, prec, vv, pres) |
| 185 | ++ return(wdia) |
| 186 | ++ } |
| 187 | +> |
| 188 | +> # get_data: Wrapper for aemet_api_request with error handling and retry logic. |
| 189 | +> get_data = function(){ |
| 190 | ++ tryCatch( |
| 191 | ++ expr = { |
| 192 | ++ return(aemet_api_request()) |
| 193 | ++ }, |
| 194 | ++ error = function(e){ |
| 195 | ++ # (Optional) |
| 196 | ++ # Do this if an error is caught... |
| 197 | ++ print(e) |
| 198 | ++ # waiting and then... |
| 199 | ++ Sys.sleep(50) |
| 200 | ++ # try again: |
| 201 | ++ wdia = get_data() |
| 202 | ++ return(NULL) |
| 203 | ++ }, |
| 204 | ++ warning = function(w){ |
| 205 | ++ print(w) |
| 206 | ++ # (Optional) |
| 207 | ++ # Do this if a warning is caught... |
| 208 | ++ return(NULL) |
| 209 | ++ }, |
| 210 | ++ finally = { |
| 211 | ++ # (Optional) |
| 212 | ++ # Do this at the end before quitting the tryCatch structure... |
| 213 | ++ } |
| 214 | ++ ) |
| 215 | ++ } |
| 216 | +> |
| 217 | +> # Ensure data directory exists |
| 218 | +> if(!dir.exists("data")) { |
| 219 | ++ dir.create("data") |
| 220 | ++ } |
| 221 | +> |
| 222 | +> # Set up cURL handle with API key |
| 223 | +> h <- new_handle() |
| 224 | +> handle_setheaders(h, 'api_key' = my_api_key) |
| 225 | +> |
| 226 | +> # Download latest data with retry logic |
| 227 | +> wdia = get_data() |
| 228 | +> if(is.null(wdia)){ |
| 229 | ++ # If data retrieval failed, wait and try again |
| 230 | ++ Sys.sleep(60) |
| 231 | ++ wdia = get_data() |
| 232 | ++ } |
| 233 | +> |
| 234 | +> # If data was successfully retrieved, process and save |
| 235 | +> if(!is.null(wdia) && nrow(wdia) > 0){ |
| 236 | ++ # Reshape and clean latest weather data - use all 7 core variables |
| 237 | ++ latest_weather = wdia %>% |
| 238 | ++ pivot_longer(cols = c(ta, tamax, tamin, hr, prec, vv, pres), |
| 239 | ++ names_to = "measure", |
| 240 | ++ values_to = "value") %>% |
| 241 | ++ filter(!is.na(value)) %>% |
| 242 | ++ mutate(fint = as_datetime(fint)) %>% |
| 243 | ++ as.data.table() |
| 244 | ++ |
| 245 | ++ print(paste0("Downloaded ", nrow(latest_weather), " new rows of data with 7 core variables.")) |
| 246 | ++ |
| 247 | ++ # Load previous weather data |
| 248 | ++ if(file.exists("data/spain_weather_expanded.csv.gz")) { |
| 249 | ++ previous_weather = fread("data/spain_weather_expanded.csv.gz") |
| 250 | ++ } else { |
| 251 | ++ previous_weather = data.table() |
| 252 | ++ print("Creating new expanded weather dataset file.") |
| 253 | ++ } |
| 254 | ++ |
| 255 | ++ # Combine and deduplicate |
| 256 | ++ spain_weather = bind_rows(latest_weather, previous_weather) %>% |
| 257 | ++ distinct() %>% |
| 258 | ++ arrange(desc(fint)) |
| 259 | ++ |
| 260 | ++ # Save updated data |
| 261 | ++ fwrite(as.data.table(spain_weather), "data/output/hourly_station_ongoing.csv.gz") |
| 262 | ++ |
| 263 | ++ print(paste0("Total dataset now contains ", nrow(spain_weather), " rows.")) |
| 264 | ++ } else{ |
| 265 | ++ print("No new data retrieved. Nothing saved.") |
| 266 | ++ } |
| 267 | +[1] "Downloaded 60289 new rows of data with 7 core variables." |
| 268 | +[1] "Creating new expanded weather dataset file." |
| 269 | +[1] "Total dataset now contains 60289 rows." |
| 270 | +> |
| 271 | +> |
| 272 | +> |
| 273 | +> proc.time() |
| 274 | + user system elapsed |
| 275 | + 6.138 1.350 21.998 |
0 commit comments