@@ -45,6 +45,10 @@ library(data.table)
4545library(curl )
4646library(jsonlite )
4747
48+ # Set output data file path
49+ output_data_file_path = " data/output/daily_station_historical.csv.gz"
50+
51+
4852# If you want to prevent concurrent runs of this script, set PREVENT_CONCURRENT_RUNS to TRUE.
4953PREVENT_CONCURRENT_RUNS = FALSE
5054
@@ -76,15 +80,15 @@ start_date = as_date("2013-07-01")
7680
7781# Set up curl handle with API key for authentication and increased timeout
7882h <- new_handle()
79- handle_setheaders(h , ' api_key' = my_api_key )
83+ handle_setheaders(h , ' api_key' = get_current_api_key() )
8084handle_setopt(h , timeout = 60 , connecttimeout = 30 ) # Increase timeout values
8185
8286# Generate sequence of all dates to check (from start_date to 4 days before today)
8387all_dates = seq.Date(from = start_date , to = today()- 4 , by = " day" )
8488
8589# Load existing historical weather data
86- if (file.exists(" data/output/daily_station_historical.csv.gz " )){
87- stored_weather_daily = fread(" data/output/daily_station_historical.csv.gz " )
90+ if (file.exists(output_data_file_path )){
91+ stored_weather_daily = fread(output_data_file_path )
8892} else {stored_weather_daily = NULL }
8993
9094
@@ -118,6 +122,22 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
118122 # Request historical daily climatological data for specific date
119123 req = curl_fetch_memory(paste0(' https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/' , start_date , ' T00%3A00%3A00UTC/fechafin/' , start_date , ' T23%3A59%3A59UTC/todasestaciones' ), handle = h )
120124
125+ if (req $ status_code == 429 ) {
126+ cat(" Rate limit - rotating key...\n " )
127+ rotate_api_key()
128+ handle_setheaders(h , ' api_key' = get_current_api_key())
129+ Sys.sleep(3 )
130+ req = curl_fetch_memory(paste0(' https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/' , start_date , ' T00%3A00%3A00UTC/fechafin/' , start_date , ' T23%3A59%3A59UTC/todasestaciones' ), handle = h )
131+
132+ }
133+
134+ if (req $ status_code != 200 ) {
135+ cat(" API request failed:" , req $ status_code , " \n " )
136+ return (NULL )
137+ }
138+
139+
140+
121141 wurl = fromJSON(rawToChar(req $ content ))$ datos
122142
123143 req = curl_fetch_memory(wurl )
@@ -162,7 +182,9 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
162182 },
163183 error = function (e ){
164184 cat(" ERROR on date" , as.character(start_date ), " :" , e $ message , " \n " )
165- Sys.sleep(60 ) # Longer sleep on error
185+ rotate_api_key()
186+ handle_setheaders(h , ' api_key' = get_current_api_key())
187+ Sys.sleep(3 )
166188 return (NULL )
167189 },
168190 warning = function (w ){
@@ -179,8 +201,8 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
179201
180202 print(paste0(" Just grabbed " , nrow(weather_daily ), " new records" ))
181203
182- if (file.exists(" data/output/daily_station_historical.csv.gz " )){
183- stored_weather_daily = fread(" data/output/daily_station_historical.csv.gz " )
204+ if (file.exists(output_data_file_path )){
205+ stored_weather_daily = fread(output_data_file_path )
184206
185207 print(paste0(" We already had " , nrow(stored_weather_daily ), " records stored" ))
186208
@@ -191,8 +213,8 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
191213
192214 fwrite(weather_daily , " data/output/daily_station_historical.csv.gz" )
193215
194- print(" pausing 60 seconds" )
195- Sys.sleep(60 ) # Increased pause between chunks
216+ # print("pausing 60 seconds")
217+ # Sys.sleep(60) # Increased pause between chunks
196218
197219 })
198220
0 commit comments