|
| 1 | + |
| 2 | +R version 4.4.2 (2024-10-31) -- "Pile of Leaves" |
| 3 | +Copyright (C) 2024 The R Foundation for Statistical Computing |
| 4 | +Platform: x86_64-pc-linux-gnu |
| 5 | + |
| 6 | +R is free software and comes with ABSOLUTELY NO WARRANTY. |
| 7 | +You are welcome to redistribute it under certain conditions. |
| 8 | +Type 'license()' or 'licence()' for distribution details. |
| 9 | + |
| 10 | +R is a collaborative project with many contributors. |
| 11 | +Type 'contributors()' for more information and |
| 12 | +'citation()' on how to cite R or R packages in publications. |
| 13 | + |
| 14 | +Type 'demo()' for some demos, 'help()' for on-line help, or |
| 15 | +'help.start()' for an HTML browser interface to help. |
| 16 | +Type 'q()' to quit R. |
| 17 | + |
| 18 | +- Project '~/research/weather-data-collector-spain' loaded. [renv 1.1.4] |
| 19 | +> # get_historical_data_expanded.R |
| 20 | +> # ---------------------- |
| 21 | +> # Purpose: Download and update historical daily weather data for Spain from the AEMET OpenData API. |
| 22 | +> # |
| 23 | +> # This script checks for missing dates in the local historical weather dataset and downloads any missing data in chunks. |
| 24 | +> # Data is fetched from the AEMET API, processed, and appended to the local CSV file. |
| 25 | +> # |
| 26 | +> # Concurrency Control: |
| 27 | +> # - Set PREVENT_CONCURRENT_RUNS = TRUE to enable lockfile-based run prevention |
| 28 | +> # - Set PREVENT_CONCURRENT_RUNS = FALSE (default) to allow multiple concurrent runs |
| 29 | +> # |
| 30 | +> # Main Steps: |
| 31 | +> # 1. Load dependencies and API key. |
| 32 | +> # 2. Determine which dates are missing from the local dataset. |
| 33 | +> # 3. Download missing data in chunks, handling API rate limits and errors. |
| 34 | +> # 4. Append new data to the historical dataset. |
| 35 | +> # |
| 36 | +> # Usage: |
| 37 | +> # - Requires a valid API key in 'auth/keys.R' as 'my_api_key'. |
| 38 | +> # - Run as an R script. Output is written to 'data/spain_weather_daily_historical.csv.gz'. |
| 39 | +> # |
| 40 | +> # Dependencies: tidyverse, lubridate, data.table, curl, jsonlite, RSocrata |
| 41 | +> # |
| 42 | +> # Author: [Your Name] |
| 43 | +> # Date: [YYYY-MM-DD] |
| 44 | +> |
| 45 | +> # Title #### |
| 46 | +> # For downloading and preparing historical weather data. |
| 47 | +> |
| 48 | +> rm(list=ls()) |
| 49 | +> |
| 50 | +> ####Dependencies#### |
| 51 | +> library(tidyverse) |
| 52 | +── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── |
| 53 | +✔ dplyr 1.1.4 ✔ readr 2.1.5 |
| 54 | +✔ forcats 1.0.0 ✔ stringr 1.5.1 |
| 55 | +✔ ggplot2 3.5.2 ✔ tibble 3.3.0 |
| 56 | +✔ lubridate 1.9.4 ✔ tidyr 1.3.1 |
| 57 | +✔ purrr 1.1.0 |
| 58 | +── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── |
| 59 | +✖ dplyr::filter() masks stats::filter() |
| 60 | +✖ dplyr::lag() masks stats::lag() |
| 61 | +ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors |
| 62 | +> library(lubridate) |
| 63 | +> library(data.table) |
| 64 | + |
| 65 | +Attaching package: ‘data.table’ |
| 66 | + |
| 67 | +The following objects are masked from ‘package:lubridate’: |
| 68 | + |
| 69 | + hour, isoweek, mday, minute, month, quarter, second, wday, week, |
| 70 | + yday, year |
| 71 | + |
| 72 | +The following objects are masked from ‘package:dplyr’: |
| 73 | + |
| 74 | + between, first, last |
| 75 | + |
| 76 | +The following object is masked from ‘package:purrr’: |
| 77 | + |
| 78 | + transpose |
| 79 | + |
| 80 | +> library(curl) |
| 81 | +Using libcurl 8.7.1 with OpenSSL/3.2.2 |
| 82 | + |
| 83 | +Attaching package: ‘curl’ |
| 84 | + |
| 85 | +The following object is masked from ‘package:readr’: |
| 86 | + |
| 87 | + parse_date |
| 88 | + |
| 89 | +> library(jsonlite) |
| 90 | + |
| 91 | +Attaching package: ‘jsonlite’ |
| 92 | + |
| 93 | +The following object is masked from ‘package:purrr’: |
| 94 | + |
| 95 | + flatten |
| 96 | + |
| 97 | +> library(RSocrata) |
| 98 | +> |
| 99 | +> # If you want to prevent concurrent runs of this script, set PREVENT_CONCURRENT_RUNS to TRUE. |
| 100 | +> PREVENT_CONCURRENT_RUNS = FALSE |
| 101 | +> |
| 102 | +> if(PREVENT_CONCURRENT_RUNS) { |
| 103 | ++ # Prevent concurrent runs by creating a lockfile |
| 104 | ++ # Lockfile management |
| 105 | ++ lockfile <- "tmp/get_historical_data_expanded.lock" |
| 106 | ++ # Check if lockfile exists |
| 107 | ++ if (file.exists(lockfile)) { |
| 108 | ++ cat("Another run is in progress. Exiting.\n") |
| 109 | ++ quit(save = "no", status = 0) |
| 110 | ++ } |
| 111 | ++ # Create a temporary directory and lockfile |
| 112 | ++ dir.create("tmp", showWarnings = FALSE) |
| 113 | ++ file.create(lockfile) |
| 114 | ++ # Ensure lockfile is removed on exit |
| 115 | ++ on.exit(unlink(lockfile), add = TRUE) |
| 116 | ++ } |
| 117 | +> |
| 118 | +> # Load API keys |
| 119 | +> source("auth/keys.R") |
| 120 | +> |
| 121 | +> # SETTING DATES #### |
| 122 | +> # Set the start date for historical data collection |
| 123 | +> start_date = as_date("2013-07-01") |
| 124 | +> |
| 125 | +> # Set up curl handle with API key for authentication |
| 126 | +> h <- new_handle() |
| 127 | +> handle_setheaders(h, 'api_key' = my_api_key) |
| 128 | +> |
| 129 | +> # Generate sequence of all dates to check (from start_date to 4 days before today) |
| 130 | +> all_dates = seq.Date(from = start_date, to=today()-4, by = "day") |
| 131 | +> |
| 132 | +> # Load existing historical weather data |
| 133 | +> stored_weather_daily = fread("data/spain_weather_daily_historical.csv.gz") |
| 134 | +Error in fread("data/spain_weather_daily_historical.csv.gz") : |
| 135 | + File 'data/spain_weather_daily_historical.csv.gz' does not exist or is non-readable. getwd()=='/home/j.palmer/research/weather-data-collector-spain' |
| 136 | +Calls: fread -> stopf -> raise_condition -> signal |
| 137 | +Execution halted |
0 commit comments