Skip to content

Commit 5eb05ac

Browse files
committed
Merge remote-tracking branch 'origin/main'
2 parents 7530734 + b733f12 commit 5eb05ac

File tree

4 files changed

+523
-0
lines changed

4 files changed

+523
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Identity added: /home/j.palmer/.ssh/id_rsa (/home/j.palmer/.ssh/id_rsa)
2+
Identity added: /home/j.palmer/.ssh/id_rsa (/home/j.palmer/.ssh/id_rsa)
3+
Identity added: /home/j.palmer/.ssh/id_rsa (/home/j.palmer/.ssh/id_rsa)
4+
Identity added: /home/j.palmer/.ssh/id_rsa (/home/j.palmer/.ssh/id_rsa)
5+
Identity added: /home/j.palmer/.ssh/id_rsa (/home/j.palmer/.ssh/id_rsa)
6+
Identity added: /home/j.palmer/.ssh/id_rsa (/home/j.palmer/.ssh/id_rsa)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Agent pid 438823
2+
Agent pid 438839
3+
Status updated for municipal-forecast-priority: running
4+
Starting priority municipal data generation: Thu Aug 21 20:44:14 CEST 2025
5+
Collecting municipal forecasts for immediate model use...
6+
Agent pid 438905
7+
Status updated for municipal-forecast-priority: running
8+
Agent pid 438986
9+
Status updated for municipal-forecast-priority: running
10+
✅ Forecast collection successful
11+
Generating municipal data backwards from present...
12+
Agent pid 439017
13+
Status updated for municipal-forecast-priority: running
14+
Agent pid 439092
15+
Status updated for municipal-forecast-priority: completed
16+
✅ Priority municipal data generation completed: Thu Aug 21 20:45:55 CEST 2025
17+
Models can now use: data/output/daily_municipal_extended.csv.gz
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
2+
R version 4.4.2 (2024-10-31) -- "Pile of Leaves"
3+
Copyright (C) 2024 The R Foundation for Statistical Computing
4+
Platform: x86_64-pc-linux-gnu
5+
6+
R is free software and comes with ABSOLUTELY NO WARRANTY.
7+
You are welcome to redistribute it under certain conditions.
8+
Type 'license()' or 'licence()' for distribution details.
9+
10+
R is a collaborative project with many contributors.
11+
Type 'contributors()' for more information and
12+
'citation()' on how to cite R or R packages in publications.
13+
14+
Type 'demo()' for some demos, 'help()' for on-line help, or
15+
'help.start()' for an HTML browser interface to help.
16+
Type 'q()' to quit R.
17+
18+
- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4]
19+
> #!/usr/bin/env Rscript
20+
>
21+
> # Simple forecast data collection based on proven working patterns
22+
> library(jsonlite)
23+
> library(httr) # Use httr like in the working script
24+
> library(curl)
25+
Using libcurl 8.7.1 with OpenSSL/3.2.2
26+
27+
Attaching package: ‘curl’
28+
29+
The following object is masked from ‘package:httr’:
30+
31+
handle_reset
32+
33+
> library(dplyr)
34+
35+
Attaching package: ‘dplyr’
36+
37+
The following objects are masked from ‘package:stats’:
38+
39+
filter, lag
40+
41+
The following objects are masked from ‘package:base’:
42+
43+
intersect, setdiff, setequal, union
44+
45+
> library(data.table)
46+
47+
Attaching package: ‘data.table’
48+
49+
The following objects are masked from ‘package:dplyr’:
50+
51+
between, first, last
52+
53+
> library(lubridate)
54+
55+
Attaching package: ‘lubridate’
56+
57+
The following objects are masked from ‘package:data.table’:
58+
59+
hour, isoweek, mday, minute, month, quarter, second, wday, week,
60+
yday, year
61+
62+
The following objects are masked from ‘package:base’:
63+
64+
date, intersect, setdiff, union
65+
66+
>
67+
> # Load API keys
68+
> source("auth/keys.R")
69+
>
70+
> cat("=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) ===\n")
71+
=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) ===
72+
> cat("Started at:", format(Sys.time()), "\n")
73+
Started at: 2025-08-21 20:44:30
74+
>
75+
> # Function to get municipality forecast using working pattern
76+
> get_municipality_forecast_v2 = function(municipio_code, municipio_name = NULL) {
77+
+ tryCatch({
78+
+ cat("Processing", municipio_code, "\n")
79+
+
80+
+ # Initialize curl handle with current API key
81+
+ h = new_handle()
82+
+ handle_setheaders(h, 'api_key' = get_current_api_key())
83+
+
84+
+ # Request forecast data URL
85+
+ response1 = curl_fetch_memory(
86+
+ paste0('https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/diaria/', municipio_code),
87+
+ handle = h
88+
+ )
89+
+
90+
+ if(response1$status_code == 429) {
91+
+ cat("Rate limit - rotating key...\n")
92+
+ rotate_api_key()
93+
+ handle_setheaders(h, 'api_key' = get_current_api_key())
94+
+ Sys.sleep(3)
95+
+
96+
+ response1 = curl_fetch_memory(
97+
+ paste0('https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/diaria/', municipio_code),
98+
+ handle = h
99+
+ )
100+
+ }
101+
+
102+
+ if(response1$status_code != 200) {
103+
+ cat("API request failed:", response1$status_code, "\n")
104+
+ return(NULL)
105+
+ }
106+
+
107+
+ # Parse response to get data URL
108+
+ response_content = fromJSON(rawToChar(response1$content))
109+
+
110+
+ if(!"datos" %in% names(response_content)) {
111+
+ cat("No data URL in response\n")
112+
+ return(NULL)
113+
+ }
114+
+
115+
+ # Fetch actual forecast data
116+
+ Sys.sleep(1)
117+
+ response2 = curl_fetch_memory(response_content$datos)
118+
+
119+
+ if(response2$status_code != 200) {
120+
+ cat("Data request failed:", response2$status_code, "\n")
121+
+ return(NULL)
122+
+ }
123+
+
124+
+ # Parse forecast data using your working approach
125+
+ this_string = rawToChar(response2$content)
126+
+ Encoding(this_string) = "latin1"
127+
+ forecast_data = fromJSON(this_string)
128+
+
129+
+ # Extract municipality info
130+
+ municipio_nombre = forecast_data$nombre
131+
+ provincia = forecast_data$provincia
132+
+ elaborado = forecast_data$elaborado
133+
+
134+
+ # Process all 7 days at once (wdia[[1]] contains vectors for all days)
135+
+ wdia = forecast_data$prediccion$dia
136+
+
137+
+ # Extract vectors for all 7 days using your proven pattern
138+
+ fechas = as.Date(wdia[[1]]$fecha) # Direct conversion using as.Date
139+
+ temp_max = wdia[[1]]$temperatura$maxima
140+
+ temp_min = wdia[[1]]$temperatura$minima
141+
+ temp_avg = rowMeans(cbind(temp_max, temp_min), na.rm = TRUE)
142+
+
143+
+ # Extract additional variables following your pattern
144+
+ humid_max = if("humedadRelativa" %in% names(wdia[[1]])) {
145+
+ wdia[[1]]$humedadRelativa$maxima
146+
+ } else rep(NA, length(fechas))
147+
+
148+
+ humid_min = if("humedadRelativa" %in% names(wdia[[1]])) {
149+
+ wdia[[1]]$humedadRelativa$minima
150+
+ } else rep(NA, length(fechas))
151+
+
152+
+ # Wind data (following your unlist/lapply pattern)
153+
+ wind_speed = if("viento" %in% names(wdia[[1]])) {
154+
+ unlist(lapply(wdia[[1]]$viento, function(x) {
155+
+ if(is.list(x) && "velocidad" %in% names(x)) {
156+
+ mean(x$velocidad, na.rm = TRUE)
157+
+ } else NA
158+
+ }))
159+
+ } else rep(NA, length(fechas))
160+
+
161+
+ cat("Extracted", length(fechas), "forecast days\n")
162+
+ cat("First day - Date:", as.character(fechas[1]), "Temp max:", temp_max[1], "Temp min:", temp_min[1], "Temp avg:", temp_avg[1], "\n")
163+
+
164+
+ # Create result data frame with all 7 days
165+
+ result = data.frame(
166+
+ municipio_id = municipio_code,
167+
+ municipio_nombre = forecast_data$nombre,
168+
+ provincia = forecast_data$provincia,
169+
+ elaborado = forecast_data$elaborado,
170+
+ fecha = fechas,
171+
+ temp_max = temp_max,
172+
+ temp_min = temp_min,
173+
+ temp_avg = temp_avg,
174+
+ humid_max = humid_max,
175+
+ humid_min = humid_min,
176+
+ wind_speed = wind_speed,
177+
+ stringsAsFactors = FALSE
178+
+ )
179+
+
180+
+ return(result)
181+
+
182+
+ }, error = function(e) {
183+
+ cat("✗ ERROR:", e$message, "\n")
184+
+ return(NULL)
185+
+ })
186+
+ }
187+
>
188+
> # Load municipality data
189+
> cat("Loading municipality codes...\n")
190+
Loading municipality codes...
191+
> municipalities_data = fread("data/input/municipalities.csv.gz")
192+
> cat("Loaded", nrow(municipalities_data), "municipalities\n")
193+
Loaded 8129 municipalities
194+
>
195+
> # Use small sample for testing
196+
> SAMPLE_SIZE = 2
197+
> working_municipalities = head(municipalities_data$CUMUN, SAMPLE_SIZE)
198+
> names(working_municipalities) = head(municipalities_data$NAMEUNIT, SAMPLE_SIZE)
199+
>
200+
> cat("Testing with", SAMPLE_SIZE, "municipalities\n\n")
201+
Testing with 2 municipalities
202+
203+
>
204+
> # Collect forecasts
205+
> all_forecasts = list()
206+
> successful_collections = 0
207+
>
208+
> for(i in seq_along(working_municipalities)) {
209+
+ city = names(working_municipalities)[i]
210+
+ code = working_municipalities[i]
211+
+
212+
+ cat("Municipality", i, "of", length(working_municipalities), ":", city, "(", code, ")\n")
213+
+
214+
+ if(i > 1) {
215+
+ cat("Waiting 15 seconds...\n")
216+
+ Sys.sleep(15) # Longer delay to avoid rate limits
217+
+ }
218+
+
219+
+ forecast_data = get_municipality_forecast_v2(code, city)
220+
+
221+
+ if(!is.null(forecast_data)) {
222+
+ all_forecasts[[code]] = forecast_data
223+
+ successful_collections = successful_collections + 1
224+
+ }
225+
+
226+
+ cat("\n")
227+
+ }
228+
Municipality 1 of 2 : Solórzano ( 39084 )
229+
Processing 39084
230+
Extracted 7 forecast days
231+
First day - Date: 2025-08-21 Temp max: 22 Temp min: 16 Temp avg: 19
232+
233+
Municipality 2 of 2 : Pino del Oro ( 49157 )
234+
Waiting 15 seconds...
235+
Processing 49157
236+
No data URL in response
237+
238+
Warning message:
239+
In data.frame(municipio_id = municipio_code, municipio_nombre = forecast_data$nombre, :
240+
row names were found from a short variable and have been discarded
241+
>
242+
> cat("=== RESULTS ===\n")
243+
=== RESULTS ===
244+
> cat("Municipalities attempted:", length(working_municipalities), "\n")
245+
Municipalities attempted: 2
246+
> cat("Successful collections:", successful_collections, "\n")
247+
Successful collections: 1
248+
>
249+
> if(length(all_forecasts) > 0) {
250+
+ final_data = do.call(rbind, all_forecasts)
251+
+
252+
+ # Add collection timestamp
253+
+ final_data$collected_at = Sys.time()
254+
+
255+
+ cat("Total forecast records:", nrow(final_data), "\n")
256+
+ cat("Date range:", as.character(min(final_data$fecha)), "to", as.character(max(final_data$fecha)), "\n")
257+
+ cat("Sample data:\n")
258+
+ print(head(final_data, 3))
259+
+
260+
+ # Ensure output directory exists
261+
+ dir.create("data/output", recursive = TRUE, showWarnings = FALSE)
262+
+
263+
+ # Save the data
264+
+ output_file = paste0("data/output/municipal_forecasts_", Sys.Date(), ".csv")
265+
+ write.csv(final_data, output_file, row.names = FALSE)
266+
+ cat("Data saved to:", output_file, "\n")
267+
+ } else {
268+
+ cat("No data collected\n")
269+
+ }
270+
Total forecast records: 7
271+
Date range: 2025-08-21 to 2025-08-27
272+
Sample data:
273+
municipio_id municipio_nombre provincia elaborado fecha
274+
1 39084 Solórzano Cantabria 2025-08-21T18:32:14 2025-08-21
275+
2 39084 Solórzano Cantabria 2025-08-21T18:32:14 2025-08-22
276+
3 39084 Solórzano Cantabria 2025-08-21T18:32:14 2025-08-23
277+
temp_max temp_min temp_avg humid_max humid_min wind_speed collected_at
278+
1 22 16 19.0 100 80 4.285714 2025-08-21 20:45:34
279+
2 22 14 18.0 100 65 2.142857 2025-08-21 20:45:34
280+
3 25 12 18.5 100 65 20.000000 2025-08-21 20:45:34
281+
Data saved to: data/output/municipal_forecasts_2025-08-21.csv
282+
>
283+
> cat("Completed at:", format(Sys.time()), "\n")
284+
Completed at: 2025-08-21 20:45:34
285+
>
286+
> proc.time()
287+
user system elapsed
288+
4.509 1.139 78.780

0 commit comments

Comments
 (0)