Skip to content

Commit ffed638

Browse files
committed
fixed all scripts for getting data
1 parent bf74e2a commit ffed638

File tree

3 files changed

+49
-18
lines changed

3 files changed

+49
-18
lines changed

code/get_forecast_data.R

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ library(lubridate)
1111
# Load API keys
1212
source("auth/keys.R")
1313

14-
cat("=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) ===\n")
14+
# Set testing mode to TRUE and specify N_TEST_MUNICIPALITIES to get forecase for only selected municipalities
15+
TESTING_MODE = TRUE
16+
N_TEST_MUNICIPALITIES = 2
17+
18+
cat("=== AEMET FORECAST DATA COLLECTION ===\n")
1519
cat("Started at:", format(Sys.time()), "\n")
1620

1721
# Function to get municipality forecast using working pattern
@@ -132,12 +136,13 @@ cat("Loading municipality codes...\n")
132136
municipalities_data = fread("data/input/municipalities.csv.gz")
133137
cat("Loaded", nrow(municipalities_data), "municipalities\n")
134138

135-
# Use small sample for testing
136-
SAMPLE_SIZE = 2
137-
working_municipalities = head(municipalities_data$CUMUN, SAMPLE_SIZE)
138-
names(working_municipalities) = head(municipalities_data$NAMEUNIT, SAMPLE_SIZE)
139+
working_municipalities = municipalities_data$CUMUN
140+
names(working_municipalities) = municipalities_data$NAMEUNIT
139141

140-
cat("Testing with", SAMPLE_SIZE, "municipalities\n\n")
142+
if(TESTING_MODE){
143+
working_municipalities = head(working_municipalities, N_TEST_MUNICIPALITIES)
144+
cat("Testing with", N_TEST_MUNICIPALITIES, "municipalities\n\n")
145+
}
141146

142147
# Collect forecasts
143148
all_forecasts = list()

code/get_historical_data.R

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ library(data.table)
4545
library(curl)
4646
library(jsonlite)
4747

48+
# Set output data file path
49+
output_data_file_path = "data/output/daily_station_historical.csv.gz"
50+
51+
4852
# If you want to prevent concurrent runs of this script, set PREVENT_CONCURRENT_RUNS to TRUE.
4953
PREVENT_CONCURRENT_RUNS = FALSE
5054

@@ -76,15 +80,15 @@ start_date = as_date("2013-07-01")
7680

7781
# Set up curl handle with API key for authentication and increased timeout
7882
h <- new_handle()
79-
handle_setheaders(h, 'api_key' = my_api_key)
83+
handle_setheaders(h, 'api_key' = get_current_api_key())
8084
handle_setopt(h, timeout = 60, connecttimeout = 30) # Increase timeout values
8185

8286
# Generate sequence of all dates to check (from start_date to 4 days before today)
8387
all_dates = seq.Date(from = start_date, to=today()-4, by = "day")
8488

8589
# Load existing historical weather data
86-
if(file.exists("data/output/daily_station_historical.csv.gz")){
87-
stored_weather_daily = fread("data/output/daily_station_historical.csv.gz")
90+
if(file.exists(output_data_file_path)){
91+
stored_weather_daily = fread(output_data_file_path)
8892
} else{stored_weather_daily = NULL}
8993

9094

@@ -118,6 +122,22 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
118122
# Request historical daily climatological data for specific date
119123
req = curl_fetch_memory(paste0('https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/', start_date, 'T00%3A00%3A00UTC/fechafin/', start_date, 'T23%3A59%3A59UTC/todasestaciones'), handle=h)
120124

125+
if(req$status_code == 429) {
126+
cat("Rate limit - rotating key...\n")
127+
rotate_api_key()
128+
handle_setheaders(h, 'api_key' = get_current_api_key())
129+
Sys.sleep(3)
130+
req = curl_fetch_memory(paste0('https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/', start_date, 'T00%3A00%3A00UTC/fechafin/', start_date, 'T23%3A59%3A59UTC/todasestaciones'), handle=h)
131+
132+
}
133+
134+
if(req$status_code != 200) {
135+
cat("API request failed:", req$status_code, "\n")
136+
return(NULL)
137+
}
138+
139+
140+
121141
wurl = fromJSON(rawToChar(req$content))$datos
122142

123143
req = curl_fetch_memory(wurl)
@@ -162,7 +182,9 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
162182
},
163183
error = function(e){
164184
cat("ERROR on date", as.character(start_date), ":", e$message, "\n")
165-
Sys.sleep(60) # Longer sleep on error
185+
rotate_api_key()
186+
handle_setheaders(h, 'api_key' = get_current_api_key())
187+
Sys.sleep(3)
166188
return(NULL)
167189
},
168190
warning = function(w){
@@ -179,8 +201,8 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
179201

180202
print(paste0("Just grabbed ", nrow(weather_daily), " new records"))
181203

182-
if(file.exists("data/output/daily_station_historical.csv.gz")){
183-
stored_weather_daily = fread("data/output/daily_station_historical.csv.gz")
204+
if(file.exists(output_data_file_path)){
205+
stored_weather_daily = fread(output_data_file_path)
184206

185207
print(paste0("We already had ", nrow(stored_weather_daily), " records stored"))
186208

@@ -191,8 +213,8 @@ lapply(seq(1, length(these_dates), chunksize), function(j){
191213

192214
fwrite(weather_daily, "data/output/daily_station_historical.csv.gz")
193215

194-
print("pausing 60 seconds")
195-
Sys.sleep(60) # Increased pause between chunks
216+
# print("pausing 60 seconds")
217+
# Sys.sleep(60) # Increased pause between chunks
196218

197219
})
198220

code/get_latest_data.R

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ library(R.utils)
4646
# Set locale to UTF-8 for proper encoding handling
4747
Sys.setlocale("LC_ALL", "en_US.UTF-8")
4848

49+
# Set output data file path
50+
output_data_file_path = "data/output/hourly_station_ongoing.csv.gz"
51+
4952
# If you want to prevent concurrent runs of this script, set PREVENT_CONCURRENT_RUNS to TRUE.
5053
PREVENT_CONCURRENT_RUNS = FALSE
5154

@@ -143,20 +146,21 @@ if(!is.null(wdia) && nrow(wdia) > 0){
143146
print(paste0("Downloaded ", nrow(latest_weather), " new rows of data with 7 core variables."))
144147

145148
# Load previous weather data
146-
if(file.exists("data/spain_weather_expanded.csv.gz")) {
147-
previous_weather = fread("data/spain_weather_expanded.csv.gz")
149+
if(file.exists(output_data_file_path)) {
150+
previous_weather = fread(output_data_file_path)
151+
print(paste0("Previous dataset file has ", nrow(previous_weather), " rows."))
148152
} else {
149153
previous_weather = data.table()
150154
print("Creating new expanded weather dataset file.")
151155
}
152156

153157
# Combine and deduplicate
154-
spain_weather = bind_rows(latest_weather, previous_weather) %>%
158+
spain_weather = bind_rows(latest_weather, previous_weather) %>% filter(!is.na(value)) %>%
155159
distinct() %>%
156160
arrange(desc(fint))
157161

158162
# Save updated data
159-
fwrite(as.data.table(spain_weather), "data/output/hourly_station_ongoing.csv.gz")
163+
fwrite(spain_weather, output_data_file_path)
160164

161165
print(paste0("Total dataset now contains ", nrow(spain_weather), " rows."))
162166
} else{

0 commit comments

Comments
 (0)