Skip to content

Commit 4978b9f

Browse files
committed
logs from failed run
1 parent 94583fa commit 4978b9f

12 files changed

+3256
-0
lines changed
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
2+
R version 4.4.2 (2024-10-31) -- "Pile of Leaves"
3+
Copyright (C) 2024 The R Foundation for Statistical Computing
4+
Platform: x86_64-pc-linux-gnu
5+
6+
R is free software and comes with ABSOLUTELY NO WARRANTY.
7+
You are welcome to redistribute it under certain conditions.
8+
Type 'license()' or 'licence()' for distribution details.
9+
10+
R is a collaborative project with many contributors.
11+
Type 'contributors()' for more information and
12+
'citation()' on how to cite R or R packages in publications.
13+
14+
Type 'demo()' for some demos, 'help()' for on-line help, or
15+
'help.start()' for an HTML browser interface to help.
16+
Type 'q()' to quit R.
17+
18+
- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4]
19+
- The project is out-of-sync -- use `renv::status()` for details.
20+
> #!/usr/bin/env Rscript
21+
>
22+
> # Simple forecast data collection based on proven working patterns
23+
> library(jsonlite)
24+
> library(httr) # Use httr like in the working script
25+
> library(curl)
26+
Using libcurl 8.7.1 with OpenSSL/3.2.2
27+
28+
Attaching package: ‘curl’
29+
30+
The following object is masked from ‘package:httr’:
31+
32+
handle_reset
33+
34+
> library(dplyr)
35+
36+
Attaching package: ‘dplyr’
37+
38+
The following objects are masked from ‘package:stats’:
39+
40+
filter, lag
41+
42+
The following objects are masked from ‘package:base’:
43+
44+
intersect, setdiff, setequal, union
45+
46+
> library(data.table)
47+
48+
Attaching package: ‘data.table’
49+
50+
The following objects are masked from ‘package:dplyr’:
51+
52+
between, first, last
53+
54+
> library(lubridate)
55+
56+
Attaching package: ‘lubridate’
57+
58+
The following objects are masked from ‘package:data.table’:
59+
60+
hour, isoweek, mday, minute, month, quarter, second, wday, week,
61+
yday, year
62+
63+
The following objects are masked from ‘package:base’:
64+
65+
date, intersect, setdiff, union
66+
67+
>
68+
> # Load API keys
69+
> source("auth/keys.R")
70+
>
71+
> cat("=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) ===\n")
72+
=== AEMET FORECAST DATA COLLECTION (SIMPLE v2) ===
73+
> cat("Started at:", format(Sys.time()), "\n")
74+
Started at: 2025-08-22 01:31:06
75+
>
76+
> # Function to get municipality forecast using working pattern
77+
> get_municipality_forecast_v2 = function(municipio_code, municipio_name = NULL) {
78+
+ tryCatch({
79+
+ cat("Processing", municipio_code, "\n")
80+
+
81+
+ # Initialize curl handle with current API key
82+
+ h = new_handle()
83+
+ handle_setheaders(h, 'api_key' = get_current_api_key())
84+
+
85+
+ # Request forecast data URL
86+
+ response1 = curl_fetch_memory(
87+
+ paste0('https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/diaria/', municipio_code),
88+
+ handle = h
89+
+ )
90+
+
91+
+ if(response1$status_code == 429) {
92+
+ cat("Rate limit - rotating key...\n")
93+
+ rotate_api_key()
94+
+ handle_setheaders(h, 'api_key' = get_current_api_key())
95+
+ Sys.sleep(3)
96+
+
97+
+ response1 = curl_fetch_memory(
98+
+ paste0('https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/diaria/', municipio_code),
99+
+ handle = h
100+
+ )
101+
+ }
102+
+
103+
+ if(response1$status_code != 200) {
104+
+ cat("API request failed:", response1$status_code, "\n")
105+
+ return(NULL)
106+
+ }
107+
+
108+
+ # Parse response to get data URL
109+
+ response_content = fromJSON(rawToChar(response1$content))
110+
+
111+
+ if(!"datos" %in% names(response_content)) {
112+
+ cat("No data URL in response\n")
113+
+ return(NULL)
114+
+ }
115+
+
116+
+ # Fetch actual forecast data
117+
+ Sys.sleep(1)
118+
+ response2 = curl_fetch_memory(response_content$datos)
119+
+
120+
+ if(response2$status_code != 200) {
121+
+ cat("Data request failed:", response2$status_code, "\n")
122+
+ return(NULL)
123+
+ }
124+
+
125+
+ # Parse forecast data using your working approach
126+
+ this_string = rawToChar(response2$content)
127+
+ Encoding(this_string) = "latin1"
128+
+ forecast_data = fromJSON(this_string)
129+
+
130+
+ # Extract municipality info
131+
+ municipio_nombre = forecast_data$nombre
132+
+ provincia = forecast_data$provincia
133+
+ elaborado = forecast_data$elaborado
134+
+
135+
+ # Process all 7 days at once (wdia[[1]] contains vectors for all days)
136+
+ wdia = forecast_data$prediccion$dia
137+
+
138+
+ # Extract vectors for all 7 days using your proven pattern
139+
+ fechas = as.Date(wdia[[1]]$fecha) # Direct conversion using as.Date
140+
+ temp_max = wdia[[1]]$temperatura$maxima
141+
+ temp_min = wdia[[1]]$temperatura$minima
142+
+ temp_avg = rowMeans(cbind(temp_max, temp_min), na.rm = TRUE)
143+
+
144+
+ # Extract additional variables following your pattern
145+
+ humid_max = if("humedadRelativa" %in% names(wdia[[1]])) {
146+
+ wdia[[1]]$humedadRelativa$maxima
147+
+ } else rep(NA, length(fechas))
148+
+
149+
+ humid_min = if("humedadRelativa" %in% names(wdia[[1]])) {
150+
+ wdia[[1]]$humedadRelativa$minima
151+
+ } else rep(NA, length(fechas))
152+
+
153+
+ # Wind data (following your unlist/lapply pattern)
154+
+ wind_speed = if("viento" %in% names(wdia[[1]])) {
155+
+ unlist(lapply(wdia[[1]]$viento, function(x) {
156+
+ if(is.list(x) && "velocidad" %in% names(x)) {
157+
+ mean(x$velocidad, na.rm = TRUE)
158+
+ } else NA
159+
+ }))
160+
+ } else rep(NA, length(fechas))
161+
+
162+
+ cat("Extracted", length(fechas), "forecast days\n")
163+
+ cat("First day - Date:", as.character(fechas[1]), "Temp max:", temp_max[1], "Temp min:", temp_min[1], "Temp avg:", temp_avg[1], "\n")
164+
+
165+
+ # Create result data frame with all 7 days
166+
+ result = data.frame(
167+
+ municipio_id = municipio_code,
168+
+ municipio_nombre = forecast_data$nombre,
169+
+ provincia = forecast_data$provincia,
170+
+ elaborado = forecast_data$elaborado,
171+
+ fecha = fechas,
172+
+ temp_max = temp_max,
173+
+ temp_min = temp_min,
174+
+ temp_avg = temp_avg,
175+
+ humid_max = humid_max,
176+
+ humid_min = humid_min,
177+
+ wind_speed = wind_speed,
178+
+ stringsAsFactors = FALSE
179+
+ )
180+
+
181+
+ return(result)
182+
+
183+
+ }, error = function(e) {
184+
+ cat("✗ ERROR:", e$message, "\n")
185+
+ return(NULL)
186+
+ })
187+
+ }
188+
>
189+
> # Load municipality data
190+
> cat("Loading municipality codes...\n")
191+
Loading municipality codes...
192+
> municipalities_data = fread("data/input/municipalities.csv.gz")
193+
> cat("Loaded", nrow(municipalities_data), "municipalities\n")
194+
Loaded 8129 municipalities
195+
>
196+
> # Use small sample for testing
197+
> SAMPLE_SIZE = 2
198+
> working_municipalities = head(municipalities_data$CUMUN, SAMPLE_SIZE)
199+
> names(working_municipalities) = head(municipalities_data$NAMEUNIT, SAMPLE_SIZE)
200+
>
201+
> cat("Testing with", SAMPLE_SIZE, "municipalities\n\n")
202+
Testing with 2 municipalities
203+
204+
>
205+
> # Collect forecasts
206+
> all_forecasts = list()
207+
> successful_collections = 0
208+
>
209+
> for(i in seq_along(working_municipalities)) {
210+
+ city = names(working_municipalities)[i]
211+
+ code = working_municipalities[i]
212+
+
213+
+ cat("Municipality", i, "of", length(working_municipalities), ":", city, "(", code, ")\n")
214+
+
215+
+ if(i > 1) {
216+
+ cat("Waiting 15 seconds...\n")
217+
+ Sys.sleep(15) # Longer delay to avoid rate limits
218+
+ }
219+
+
220+
+ forecast_data = get_municipality_forecast_v2(code, city)
221+
+
222+
+ if(!is.null(forecast_data)) {
223+
+ all_forecasts[[code]] = forecast_data
224+
+ successful_collections = successful_collections + 1
225+
+ }
226+
+
227+
+ cat("\n")
228+
+ }
229+
Municipality 1 of 2 : Solórzano ( 39084 )
230+
Processing 39084
231+
Extracted 7 forecast days
232+
First day - Date: 2025-08-21 Temp max: 22 Temp min: 16 Temp avg: 19
233+
234+
Municipality 2 of 2 : Pino del Oro ( 49157 )
235+
Waiting 15 seconds...
236+
Processing 49157
237+
Extracted 7 forecast days
238+
First day - Date: 2025-08-21 Temp max: 28 Temp min: 12 Temp avg: 20
239+
240+
Warning messages:
241+
1: In data.frame(municipio_id = municipio_code, municipio_nombre = forecast_data$nombre, :
242+
row names were found from a short variable and have been discarded
243+
2: In data.frame(municipio_id = municipio_code, municipio_nombre = forecast_data$nombre, :
244+
row names were found from a short variable and have been discarded
245+
>
246+
> cat("=== RESULTS ===\n")
247+
=== RESULTS ===
248+
> cat("Municipalities attempted:", length(working_municipalities), "\n")
249+
Municipalities attempted: 2
250+
> cat("Successful collections:", successful_collections, "\n")
251+
Successful collections: 2
252+
>
253+
> if(length(all_forecasts) > 0) {
254+
+ final_data = do.call(rbind, all_forecasts)
255+
+
256+
+ # Add collection timestamp
257+
+ final_data$collected_at = Sys.time()
258+
+
259+
+ cat("Total forecast records:", nrow(final_data), "\n")
260+
+ cat("Date range:", as.character(min(final_data$fecha)), "to", as.character(max(final_data$fecha)), "\n")
261+
+ cat("Sample data:\n")
262+
+ print(head(final_data, 3))
263+
+
264+
+ # Ensure output directory exists
265+
+ dir.create("data/output", recursive = TRUE, showWarnings = FALSE)
266+
+
267+
+ # Save the data
268+
+ output_file = paste0("data/output/municipal_forecasts_", Sys.Date(), ".csv")
269+
+ write.csv(final_data, output_file, row.names = FALSE)
270+
+ cat("Data saved to:", output_file, "\n")
271+
+ } else {
272+
+ cat("No data collected\n")
273+
+ }
274+
Total forecast records: 14
275+
Date range: 2025-08-21 to 2025-08-27
276+
Sample data:
277+
municipio_id municipio_nombre provincia elaborado fecha
278+
1 39084 Solórzano Cantabria 2025-08-21T21:07:12 2025-08-21
279+
2 39084 Solórzano Cantabria 2025-08-21T21:07:12 2025-08-22
280+
3 39084 Solórzano Cantabria 2025-08-21T21:07:12 2025-08-23
281+
temp_max temp_min temp_avg humid_max humid_min wind_speed collected_at
282+
1 22 16 19.0 100 80 1.428571 2025-08-22 01:31:24
283+
2 22 14 18.0 100 65 3.571429 2025-08-22 01:31:24
284+
3 25 12 18.5 95 60 20.000000 2025-08-22 01:31:24
285+
Data saved to: data/output/municipal_forecasts_2025-08-22.csv
286+
>
287+
> cat("Completed at:", format(Sys.time()), "\n")
288+
Completed at: 2025-08-22 01:31:24
289+
>
290+
> proc.time()
291+
user system elapsed
292+
4.779 1.195 33.940

0 commit comments

Comments
 (0)