Skip to content

Commit 95b7285

Browse files
committed
Adds logs from cluster runs and fixes module and test flag issues causing errors
1 parent ffed638 commit 95b7285

File tree

101 files changed

+19599
-4
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+19599
-4
lines changed

code/get_forecast_data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ library(lubridate)
1212
source("auth/keys.R")
1313

1414
# Set testing mode to TRUE and specify N_TEST_MUNICIPALITIES to get forecase for only selected municipalities
15-
TESTING_MODE = TRUE
15+
TESTING_MODE = FALSE
1616
N_TEST_MUNICIPALITIES = 2
1717

1818
cat("=== AEMET FORECAST DATA COLLECTION ===\n")

generate_all_datasets.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ set -e # Exit on any error
2525
# Set working directory to script location
2626
cd "$(dirname "$0")"
2727

28+
# Load modules if running in SLURM environment (modules might not be inherited by sub-shells)
29+
if [ -n "$SLURM_JOB_ID" ]; then
30+
module load GDAL/3.10.0-foss-2024a
31+
module load R/4.4.2-gfbf-2024a
32+
module load LibTIFF/4.6.0-GCCcore-13.3.0
33+
module load cURL/8.7.1-GCCcore-13.3.0
34+
module load OpenSSL/3
35+
fi
36+
2837
# Create necessary directories
2938
mkdir -p logs
3039
mkdir -p data/output
@@ -37,6 +46,18 @@ echo "=== GENERATING ALL REQUIRED DATASETS ==="
3746
echo "Started at: $(date)"
3847
echo "Log files will be saved with prefix: ${LOG_PREFIX}"
3948

49+
# Check for test mode
50+
if [[ "$1" == "--test-only" ]]; then
51+
echo "TEST MODE: Only checking R availability"
52+
if command -v R &> /dev/null; then
53+
echo "✅ R is available"
54+
R --version | head -1
55+
else
56+
echo "❌ R is not available"
57+
fi
58+
exit 0
59+
fi
60+
4061
# Check if R is available
4162
if ! command -v R &> /dev/null; then
4263
echo "ERROR: R is not available. Please install R or load the R module."
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
2+
R version 4.4.2 (2024-10-31) -- "Pile of Leaves"
3+
Copyright (C) 2024 The R Foundation for Statistical Computing
4+
Platform: x86_64-pc-linux-gnu
5+
6+
R is free software and comes with ABSOLUTELY NO WARRANTY.
7+
You are welcome to redistribute it under certain conditions.
8+
Type 'license()' or 'licence()' for distribution details.
9+
10+
R is a collaborative project with many contributors.
11+
Type 'contributors()' for more information and
12+
'citation()' on how to cite R or R packages in publications.
13+
14+
Type 'demo()' for some demos, 'help()' for on-line help, or
15+
'help.start()' for an HTML browser interface to help.
16+
Type 'q()' to quit R.
17+
18+
- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4]
19+
> #!/usr/bin/env Rscript
20+
>
21+
> # aggregate_daily_station_data.R
22+
> # -------------------------------
23+
> # Purpose: Create daily aggregated weather data by station from hourly observations
24+
> #
25+
> # This script processes the hourly expanded weather data to create daily summaries
26+
> # by station. It combines historical daily data with aggregated current observations
27+
> # to provide a complete time series from 2013 to present.
28+
> #
29+
> # Output: Daily means, minimums, maximums, and totals by weather station
30+
> #
31+
> # Data Sources:
32+
> # 1. Historical daily data (2013 to T-4 days) from AEMET climatological endpoint
33+
> # 2. Current hourly data (T-4 days to present) aggregated to daily values
34+
> #
35+
> # Author: John Palmer
36+
> # Date: 2025-08-20
37+
>
38+
> rm(list=ls())
39+
>
40+
> # Dependencies ####
41+
> library(tidyverse)
42+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
43+
✔ dplyr 1.1.4 ✔ readr 2.1.5
44+
✔ forcats 1.0.0 ✔ stringr 1.5.1
45+
✔ ggplot2 3.5.2 ✔ tibble 3.3.0
46+
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
47+
✔ purrr 1.1.0
48+
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
49+
✖ dplyr::filter() masks stats::filter()
50+
✖ dplyr::lag() masks stats::lag()
51+
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
52+
> library(lubridate)
53+
> library(data.table)
54+
55+
Attaching package: ‘data.table’
56+
57+
The following objects are masked from ‘package:lubridate’:
58+
59+
hour, isoweek, mday, minute, month, quarter, second, wday, week,
60+
yday, year
61+
62+
The following objects are masked from ‘package:dplyr’:
63+
64+
between, first, last
65+
66+
The following object is masked from ‘package:purrr’:
67+
68+
transpose
69+
70+
>
71+
> cat("=== DAILY STATION DATA AGGREGATION ===\n")
72+
=== DAILY STATION DATA AGGREGATION ===
73+
>
74+
> # Check if expanded hourly data exists
75+
> if(!file.exists("data/output/hourly_station_ongoing.csv.gz")) {
76+
+ cat("ERROR: Hourly weather data not found. Run get_latest_data.R first.\n")
77+
+ quit(save="no", status=1)
78+
+ }
79+
>
80+
> # Load expanded hourly data
81+
> cat("Loading hourly weather data...\n")
82+
Loading hourly weather data...
83+
> hourly_data = fread("data/output/hourly_station_ongoing.csv.gz")
84+
> hourly_data$fint = as_datetime(hourly_data$fint)
85+
> hourly_data$date = as.Date(hourly_data$fint)
86+
>
87+
> cat("Loaded", nrow(hourly_data), "hourly observation records.\n")
88+
Loaded 107633 hourly observation records.
89+
> cat("Date range:", min(hourly_data$date, na.rm=TRUE), "to", max(hourly_data$date, na.rm=TRUE), "\n")
90+
Date range: 20323 to 20324
91+
>
92+
> # Load historical daily data if it exists
93+
> historical_daily = NULL
94+
> if(file.exists("data/output/daily_station_historical.csv.gz")) {
95+
+ cat("Loading historical daily data...\n")
96+
+ historical_daily = fread("data/output/daily_station_historical.csv.gz")
97+
+
98+
+ # Standardize historical data format
99+
+ if("fecha" %in% names(historical_daily)) {
100+
+ historical_daily$date = as.Date(historical_daily$fecha)
101+
+ }
102+
+
103+
+ # Select compatible variables and reshape to match hourly format
104+
+ historical_compatible = historical_daily %>%
105+
+ filter(!is.na(date)) %>%
106+
+ select(any_of(c("date", "idema", "ta", "tamax", "tamin", "hr", "prec", "vv", "p"))) %>%
107+
+ pivot_longer(cols = c(-date, -idema), names_to = "measure", values_to = "value") %>%
108+
+ filter(!is.na(value)) %>%
109+
+ mutate(source = "historical_daily") %>%
110+
+ as.data.table()
111+
+
112+
+ cat("Loaded", nrow(historical_compatible), "historical daily records.\n")
113+
+ cat("Historical date range:", min(historical_compatible$date, na.rm=TRUE), "to", max(historical_compatible$date, na.rm=TRUE), "\n")
114+
+ } else {
115+
+ cat("No historical daily data found. Using only current observations.\n")
116+
+ historical_compatible = data.table()
117+
+ }
118+
Loading historical daily data...
119+
Error in `pivot_longer()`:
120+
! Can't select columns that don't exist.
121+
✖ Column `idema` doesn't exist.
122+
Backtrace:
123+
124+
1. ├─... %>% as.data.table()
125+
2. ├─data.table::as.data.table(.)
126+
3. ├─dplyr::mutate(., source = "historical_daily")
127+
4. ├─dplyr::filter(., !is.na(value))
128+
5. ├─tidyr::pivot_longer(...)
129+
6. ├─tidyr:::pivot_longer.data.frame(., cols = c(-date, -idema), names_to = "measure", values_to = "value")
130+
7. │ └─tidyr::build_longer_spec(...)
131+
8. │ └─tidyselect::eval_select(...)
132+
9. │ └─tidyselect:::eval_select_impl(...)
133+
10. │ ├─tidyselect:::with_subscript_errors(...)
134+
11. │ │ └─base::withCallingHandlers(...)
135+
12. │ └─tidyselect:::vars_select_eval(...)
136+
13. │ └─tidyselect:::walk_data_tree(expr, data_mask, context_mask)
137+
14. │ └─tidyselect:::eval_c(expr, data_mask, context_mask)
138+
15. │ └─tidyselect:::reduce_sels(node, data_mask, context_mask, init = init)
139+
16. │ └─tidyselect:::walk_data_tree(new, data_mask, context_mask)
140+
17. │ └─tidyselect:::as_indices_sel_impl(...)
141+
18. │ └─tidyselect:::as_indices_impl(...)
142+
19. │ └─tidyselect:::chr_as_locations(x, vars, call = call, arg = arg)
143+
20. │ └─vctrs::vec_as_location(...)
144+
21. └─vctrs (local) `<fn>`()
145+
22. └─vctrs:::stop_subscript_oob(...)
146+
23. └─vctrs:::stop_subscript(...)
147+
24. └─rlang::abort(...)
148+
Execution halted

0 commit comments

Comments
 (0)