Skip to content

Commit bf74e2a

Browse files
committed
logs from cluster runs
1 parent e7ef686 commit bf74e2a

27 files changed

+1746
-0
lines changed
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
2+
R version 4.4.2 (2024-10-31) -- "Pile of Leaves"
3+
Copyright (C) 2024 The R Foundation for Statistical Computing
4+
Platform: x86_64-pc-linux-gnu
5+
6+
R is free software and comes with ABSOLUTELY NO WARRANTY.
7+
You are welcome to redistribute it under certain conditions.
8+
Type 'license()' or 'licence()' for distribution details.
9+
10+
R is a collaborative project with many contributors.
11+
Type 'contributors()' for more information and
12+
'citation()' on how to cite R or R packages in publications.
13+
14+
Type 'demo()' for some demos, 'help()' for on-line help, or
15+
'help.start()' for an HTML browser interface to help.
16+
Type 'q()' to quit R.
17+
18+
- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4]
19+
> # get_latest_data_expanded.R
20+
> # ----------------------
21+
> # Purpose: Download and update the latest observation data from AEMET stations across Spain.
22+
> #
23+
> # This script fetches weather observations from the AEMET OpenData API using the 7 core variables
24+
> # that are compatible across current observations, historical data, and forecast endpoints.
25+
> #
26+
> # Core Variables (Safe for all endpoints):
27+
> # - ta: Air temperature (°C)
28+
> # - tamax: Maximum temperature (°C)
29+
> # - tamin: Minimum temperature (°C)
30+
> # - hr: Relative humidity (%)
31+
> # - prec: Precipitation (mm)
32+
> # - vv: Wind speed (km/h)
33+
> # - pres: Atmospheric pressure (hPa)
34+
> #
35+
> # Main Steps:
36+
> # 1. Load dependencies and API key.
37+
> # 2. Define functions to request and process data from the AEMET API, with error handling and retries.
38+
> # 3. Download the latest data and reshape it for storage.
39+
> # 4. Append new data to the local CSV file, ensuring no duplicates.
40+
> #
41+
> # Usage:
42+
> # - Requires a valid API key in 'auth/keys.R' as 'my_api_key'.
43+
> # - Run as an R script. Output is written to 'data/spain_weather_expanded.csv.gz'.
44+
> #
45+
> # Dependencies: tidyverse, lubridate, curl, jsonlite, data.table, R.utils
46+
> #
47+
> # Author: John Palmer
48+
> # Date: 2025-08-20 (Updated for 7-variable expansion)
49+
>
50+
> # Title ####
51+
> # For downloading latest observation data from AEMET stations all over Spain. This needs to be run at least every 12 hours, but better to run it every 2 because of API limits, failures etc.
52+
>
53+
> rm(list=ls())
54+
>
55+
>
56+
> # Dependencies ####
57+
> library(tidyverse)
58+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
59+
✔ dplyr 1.1.4 ✔ readr 2.1.5
60+
✔ forcats 1.0.0 ✔ stringr 1.5.1
61+
✔ ggplot2 3.5.2 ✔ tibble 3.3.0
62+
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
63+
✔ purrr 1.1.0
64+
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
65+
✖ dplyr::filter() masks stats::filter()
66+
✖ dplyr::lag() masks stats::lag()
67+
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
68+
> library(lubridate)
69+
> library(curl)
70+
Using libcurl 8.7.1 with OpenSSL/3.2.2
71+
72+
Attaching package: ‘curl’
73+
74+
The following object is masked from ‘package:readr’:
75+
76+
parse_date
77+
78+
> library(jsonlite)
79+
80+
Attaching package: ‘jsonlite’
81+
82+
The following object is masked from ‘package:purrr’:
83+
84+
flatten
85+
86+
> library(data.table)
87+
88+
Attaching package: ‘data.table’
89+
90+
The following objects are masked from ‘package:lubridate’:
91+
92+
hour, isoweek, mday, minute, month, quarter, second, wday, week,
93+
yday, year
94+
95+
The following objects are masked from ‘package:dplyr’:
96+
97+
between, first, last
98+
99+
The following object is masked from ‘package:purrr’:
100+
101+
transpose
102+
103+
> library(R.utils)
104+
Loading required package: R.oo
105+
Loading required package: R.methodsS3
106+
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
107+
R.oo v1.27.1 (2025-05-02 21:00:05 UTC) successfully loaded. See ?R.oo for help.
108+
109+
Attaching package: ‘R.oo’
110+
111+
The following object is masked from ‘package:R.methodsS3’:
112+
113+
throw
114+
115+
The following objects are masked from ‘package:methods’:
116+
117+
getClasses, getMethods
118+
119+
The following objects are masked from ‘package:base’:
120+
121+
attach, detach, load, save
122+
123+
R.utils v2.13.0 (2025-02-24 21:20:02 UTC) successfully loaded. See ?R.utils for help.
124+
125+
Attaching package: ‘R.utils’
126+
127+
The following object is masked from ‘package:jsonlite’:
128+
129+
validate
130+
131+
The following object is masked from ‘package:tidyr’:
132+
133+
extract
134+
135+
The following object is masked from ‘package:utils’:
136+
137+
timestamp
138+
139+
The following objects are masked from ‘package:base’:
140+
141+
cat, commandArgs, getOption, isOpen, nullfile, parse, use, warnings
142+
143+
>
144+
> # Set locale to UTF-8 for proper encoding handling
145+
> Sys.setlocale("LC_ALL", "en_US.UTF-8")
146+
[1] ""
147+
Warning message:
148+
In Sys.setlocale("LC_ALL", "en_US.UTF-8") :
149+
OS reports request to set locale to "en_US.UTF-8" cannot be honored
150+
>
151+
> # If you want to prevent concurrent runs of this script, set PREVENT_CONCURRENT_RUNS to TRUE.
152+
> PREVENT_CONCURRENT_RUNS = FALSE
153+
>
154+
> if(PREVENT_CONCURRENT_RUNS) {
155+
+ # Prevent concurrent runs by creating a lockfile
156+
+ # Lockfile management
157+
+ lockfile <- "tmp/get_latest_data_expanded.lock"
158+
+ # Check if lockfile exists
159+
+ if (file.exists(lockfile)) {
160+
+ cat("Another run is in progress. Exiting.\n")
161+
+ quit(save = "no", status = 0)
162+
+ }
163+
+ # Create a temporary directory and lockfile
164+
+ dir.create("tmp", showWarnings = FALSE)
165+
+ file.create(lockfile)
166+
+ # Ensure lockfile is removed on exit
167+
+ on.exit(unlink(lockfile), add = TRUE)
168+
+ }
169+
>
170+
>
171+
> # Load API keys
172+
> source("auth/keys.R")
173+
>
174+
> # aemet_api_request: Fetches latest weather observation data from AEMET API and returns as tibble.
175+
> # Only selects the 7 core variables that are compatible across all endpoints.
176+
> aemet_api_request = function(){
177+
+ req = curl_fetch_memory(paste0('https://opendata.aemet.es/opendata/api/observacion/convencional/todas'), handle=h)
178+
+ wurl = fromJSON(rawToChar(req$content))$datos
179+
+ req = curl_fetch_memory(wurl)
180+
+ this_string = rawToChar(req$content)
181+
+ Encoding(this_string) = "latin1"
182+
+ wdia = fromJSON(this_string) %>%
183+
+ as_tibble() %>%
184+
+ dplyr::select(fint, idema, ta, tamax, tamin, hr, prec, vv, pres)
185+
+ return(wdia)
186+
+ }
187+
>
188+
> # get_data: Wrapper for aemet_api_request with error handling and retry logic.
189+
> get_data = function(){
190+
+ tryCatch(
191+
+ expr = {
192+
+ return(aemet_api_request())
193+
+ },
194+
+ error = function(e){
195+
+ # (Optional)
196+
+ # Do this if an error is caught...
197+
+ print(e)
198+
+ # waiting and then...
199+
+ Sys.sleep(50)
200+
+ # try again:
201+
+ wdia = get_data()
202+
+ return(NULL)
203+
+ },
204+
+ warning = function(w){
205+
+ print(w)
206+
+ # (Optional)
207+
+ # Do this if a warning is caught...
208+
+ return(NULL)
209+
+ },
210+
+ finally = {
211+
+ # (Optional)
212+
+ # Do this at the end before quitting the tryCatch structure...
213+
+ }
214+
+ )
215+
+ }
216+
>
217+
> # Ensure data directory exists
218+
> if(!dir.exists("data")) {
219+
+ dir.create("data")
220+
+ }
221+
>
222+
> # Set up cURL handle with API key
223+
> h <- new_handle()
224+
> handle_setheaders(h, 'api_key' = my_api_key)
225+
>
226+
> # Download latest data with retry logic
227+
> wdia = get_data()
228+
> if(is.null(wdia)){
229+
+ # If data retrieval failed, wait and try again
230+
+ Sys.sleep(60)
231+
+ wdia = get_data()
232+
+ }
233+
>
234+
> # If data was successfully retrieved, process and save
235+
> if(!is.null(wdia) && nrow(wdia) > 0){
236+
+ # Reshape and clean latest weather data - use all 7 core variables
237+
+ latest_weather = wdia %>%
238+
+ pivot_longer(cols = c(ta, tamax, tamin, hr, prec, vv, pres),
239+
+ names_to = "measure",
240+
+ values_to = "value") %>%
241+
+ filter(!is.na(value)) %>%
242+
+ mutate(fint = as_datetime(fint)) %>%
243+
+ as.data.table()
244+
+
245+
+ print(paste0("Downloaded ", nrow(latest_weather), " new rows of data with 7 core variables."))
246+
+
247+
+ # Load previous weather data
248+
+ if(file.exists("data/spain_weather_expanded.csv.gz")) {
249+
+ previous_weather = fread("data/spain_weather_expanded.csv.gz")
250+
+ } else {
251+
+ previous_weather = data.table()
252+
+ print("Creating new expanded weather dataset file.")
253+
+ }
254+
+
255+
+ # Combine and deduplicate
256+
+ spain_weather = bind_rows(latest_weather, previous_weather) %>%
257+
+ distinct() %>%
258+
+ arrange(desc(fint))
259+
+
260+
+ # Save updated data
261+
+ fwrite(as.data.table(spain_weather), "data/output/hourly_station_ongoing.csv.gz")
262+
+
263+
+ print(paste0("Total dataset now contains ", nrow(spain_weather), " rows."))
264+
+ } else{
265+
+ print("No new data retrieved. Nothing saved.")
266+
+ }
267+
[1] "Downloaded 60289 new rows of data with 7 core variables."
268+
[1] "Creating new expanded weather dataset file."
269+
[1] "Total dataset now contains 60289 rows."
270+
>
271+
>
272+
>
273+
> proc.time()
274+
user system elapsed
275+
6.138 1.350 21.998

0 commit comments

Comments
 (0)