|
| 1 | + |
| 2 | +R version 4.4.2 (2024-10-31) -- "Pile of Leaves" |
| 3 | +Copyright (C) 2024 The R Foundation for Statistical Computing |
| 4 | +Platform: x86_64-pc-linux-gnu |
| 5 | + |
| 6 | +R is free software and comes with ABSOLUTELY NO WARRANTY. |
| 7 | +You are welcome to redistribute it under certain conditions. |
| 8 | +Type 'license()' or 'licence()' for distribution details. |
| 9 | + |
| 10 | +R is a collaborative project with many contributors. |
| 11 | +Type 'contributors()' for more information and |
| 12 | +'citation()' on how to cite R or R packages in publications. |
| 13 | + |
| 14 | +Type 'demo()' for some demos, 'help()' for on-line help, or |
| 15 | +'help.start()' for an HTML browser interface to help. |
| 16 | +Type 'q()' to quit R. |
| 17 | + |
| 18 | +- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4] |
| 19 | +> #!/usr/bin/env Rscript |
| 20 | +> |
| 21 | +> # aggregate_daily_station_data.R |
| 22 | +> # ------------------------------- |
| 23 | +> # Purpose: Create daily aggregated weather data by station from hourly observations |
| 24 | +> # |
| 25 | +> # This script processes the hourly expanded weather data to create daily summaries |
| 26 | +> # by station. It combines historical daily data with aggregated current observations |
| 27 | +> # to provide a complete time series from 2013 to present. |
| 28 | +> # |
| 29 | +> # Output: Daily means, minimums, maximums, and totals by weather station |
| 30 | +> # |
| 31 | +> # Data Sources: |
| 32 | +> # 1. Historical daily data (2013 to T-4 days) from AEMET climatological endpoint |
| 33 | +> # 2. Current hourly data (T-4 days to present) aggregated to daily values |
| 34 | +> # |
| 35 | +> # Author: John Palmer |
| 36 | +> # Date: 2025-08-20 |
| 37 | +> |
| 38 | +> rm(list=ls()) |
| 39 | +> |
| 40 | +> # Dependencies #### |
| 41 | +> library(tidyverse) |
| 42 | +── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── |
| 43 | +✔ dplyr 1.1.4 ✔ readr 2.1.5 |
| 44 | +✔ forcats 1.0.0 ✔ stringr 1.5.1 |
| 45 | +✔ ggplot2 3.5.2 ✔ tibble 3.3.0 |
| 46 | +✔ lubridate 1.9.4 ✔ tidyr 1.3.1 |
| 47 | +✔ purrr 1.1.0 |
| 48 | +── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── |
| 49 | +✖ dplyr::filter() masks stats::filter() |
| 50 | +✖ dplyr::lag() masks stats::lag() |
| 51 | +ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors |
| 52 | +> library(lubridate) |
| 53 | +> library(data.table) |
| 54 | + |
| 55 | +Attaching package: ‘data.table’ |
| 56 | + |
| 57 | +The following objects are masked from ‘package:lubridate’: |
| 58 | + |
| 59 | + hour, isoweek, mday, minute, month, quarter, second, wday, week, |
| 60 | + yday, year |
| 61 | + |
| 62 | +The following objects are masked from ‘package:dplyr’: |
| 63 | + |
| 64 | + between, first, last |
| 65 | + |
| 66 | +The following object is masked from ‘package:purrr’: |
| 67 | + |
| 68 | + transpose |
| 69 | + |
| 70 | +> |
| 71 | +> cat("=== DAILY STATION DATA AGGREGATION ===\n") |
| 72 | +=== DAILY STATION DATA AGGREGATION === |
| 73 | +> |
| 74 | +> # Check if expanded hourly data exists |
| 75 | +> if(!file.exists("data/output/hourly_station_ongoing.csv.gz")) { |
| 76 | ++ cat("ERROR: Hourly weather data not found. Run get_latest_data.R first.\n") |
| 77 | ++ quit(save="no", status=1) |
| 78 | ++ } |
| 79 | +> |
| 80 | +> # Load expanded hourly data |
| 81 | +> cat("Loading hourly weather data...\n") |
| 82 | +Loading hourly weather data... |
| 83 | +> hourly_data = fread("data/output/hourly_station_ongoing.csv.gz") |
| 84 | +> hourly_data$fint = as_datetime(hourly_data$fint) |
| 85 | +> hourly_data$date = as.Date(hourly_data$fint) |
| 86 | +> |
| 87 | +> cat("Loaded", nrow(hourly_data), "hourly observation records.\n") |
| 88 | +Loaded 107633 hourly observation records. |
| 89 | +> cat("Date range:", min(hourly_data$date, na.rm=TRUE), "to", max(hourly_data$date, na.rm=TRUE), "\n") |
| 90 | +Date range: 20323 to 20324 |
| 91 | +> |
| 92 | +> # Load historical daily data if it exists |
| 93 | +> historical_daily = NULL |
| 94 | +> if(file.exists("data/output/daily_station_historical.csv.gz")) { |
| 95 | ++ cat("Loading historical daily data...\n") |
| 96 | ++ historical_daily = fread("data/output/daily_station_historical.csv.gz") |
| 97 | ++ |
| 98 | ++ # Standardize historical data format |
| 99 | ++ if("fecha" %in% names(historical_daily)) { |
| 100 | ++ historical_daily$date = as.Date(historical_daily$fecha) |
| 101 | ++ } |
| 102 | ++ |
| 103 | ++ # Select compatible variables and reshape to match hourly format |
| 104 | ++ historical_compatible = historical_daily %>% |
| 105 | ++ filter(!is.na(date)) %>% |
| 106 | ++ select(any_of(c("date", "idema", "ta", "tamax", "tamin", "hr", "prec", "vv", "p"))) %>% |
| 107 | ++ pivot_longer(cols = c(-date, -idema), names_to = "measure", values_to = "value") %>% |
| 108 | ++ filter(!is.na(value)) %>% |
| 109 | ++ mutate(source = "historical_daily") %>% |
| 110 | ++ as.data.table() |
| 111 | ++ |
| 112 | ++ cat("Loaded", nrow(historical_compatible), "historical daily records.\n") |
| 113 | ++ cat("Historical date range:", min(historical_compatible$date, na.rm=TRUE), "to", max(historical_compatible$date, na.rm=TRUE), "\n") |
| 114 | ++ } else { |
| 115 | ++ cat("No historical daily data found. Using only current observations.\n") |
| 116 | ++ historical_compatible = data.table() |
| 117 | ++ } |
| 118 | +Loading historical daily data... |
| 119 | +Error in `pivot_longer()`: |
| 120 | +! Can't select columns that don't exist. |
| 121 | +✖ Column `idema` doesn't exist. |
| 122 | +Backtrace: |
| 123 | + ▆ |
| 124 | + 1. ├─... %>% as.data.table() |
| 125 | + 2. ├─data.table::as.data.table(.) |
| 126 | + 3. ├─dplyr::mutate(., source = "historical_daily") |
| 127 | + 4. ├─dplyr::filter(., !is.na(value)) |
| 128 | + 5. ├─tidyr::pivot_longer(...) |
| 129 | + 6. ├─tidyr:::pivot_longer.data.frame(., cols = c(-date, -idema), names_to = "measure", values_to = "value") |
| 130 | + 7. │ └─tidyr::build_longer_spec(...) |
| 131 | + 8. │ └─tidyselect::eval_select(...) |
| 132 | + 9. │ └─tidyselect:::eval_select_impl(...) |
| 133 | + 10. │ ├─tidyselect:::with_subscript_errors(...) |
| 134 | + 11. │ │ └─base::withCallingHandlers(...) |
| 135 | + 12. │ └─tidyselect:::vars_select_eval(...) |
| 136 | + 13. │ └─tidyselect:::walk_data_tree(expr, data_mask, context_mask) |
| 137 | + 14. │ └─tidyselect:::eval_c(expr, data_mask, context_mask) |
| 138 | + 15. │ └─tidyselect:::reduce_sels(node, data_mask, context_mask, init = init) |
| 139 | + 16. │ └─tidyselect:::walk_data_tree(new, data_mask, context_mask) |
| 140 | + 17. │ └─tidyselect:::as_indices_sel_impl(...) |
| 141 | + 18. │ └─tidyselect:::as_indices_impl(...) |
| 142 | + 19. │ └─tidyselect:::chr_as_locations(x, vars, call = call, arg = arg) |
| 143 | + 20. │ └─vctrs::vec_as_location(...) |
| 144 | + 21. └─vctrs (local) `<fn>`() |
| 145 | + 22. └─vctrs:::stop_subscript_oob(...) |
| 146 | + 23. └─vctrs:::stop_subscript(...) |
| 147 | + 24. └─rlang::abort(...) |
| 148 | +Execution halted |
0 commit comments