Skip to content

Commit b312bd2

Browse files
committed
Add crontab schedule and scripts for weather data collection
Time limit: Increased from 6 to 12 hours for full runs Historical data: Made optional with SKIP_HISTORICAL variable Created specialized scripts: Hourly (1hr), daily (2hr), weekly full (12hr)
1 parent ac1efa8 commit b312bd2

File tree

4 files changed

+165
-9
lines changed

4 files changed

+165
-9
lines changed

crontab_recommended.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Weather Data Collection Crontab Schedule
2+
# ========================================
3+
4+
# Hourly collection (every hour at 5 minutes past)
5+
# Collects latest 12-hour observation data to build continuous database
6+
5 * * * * cd ~/research/weather-data-collector-spain && sbatch update_weather_hourly.sh
7+
8+
# Daily collection (6:30 AM daily)
9+
# Collects forecasts and generates aggregated datasets
10+
30 6 * * * cd ~/research/weather-data-collector-spain && sbatch update_weather_daily.sh
11+
12+
# Weekly historical update (Sundays at 2 AM)
13+
# Updates historical database (can take 6+ hours)
14+
0 2 * * 0 cd ~/research/weather-data-collector-spain && sbatch update_weather.sh
15+
16+
# Monthly deep historical collection (1st of month at 1 AM)
17+
# Full historical data collection with extra time
18+
0 1 1 * * cd ~/research/weather-data-collector-spain && SKIP_HISTORICAL=false sbatch update_weather.sh

update_weather.sh

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#SBATCH --ntasks=1
66
#SBATCH --cpus-per-task=2
77
#SBATCH --mem=8G
8-
#SBATCH --time=06:00:00
8+
#SBATCH --time=12:00:00
99
#SBATCH --mail-type=BEGIN,END,FAIL
1010
1111
#SBATCH --output=logs/weather_collection_%j.out
@@ -76,15 +76,19 @@ else
7676
$STATUS_SCRIPT "weather-hourly" "failed" $(($(date +%s) - START_TIME)) 50
7777
fi
7878

79-
# Priority 3: Historical data update
80-
echo "Updating historical data..."
81-
$STATUS_SCRIPT "weather-historical" "running" $(($(date +%s) - START_TIME)) 70
82-
R CMD BATCH --no-save --no-restore code/get_historical_data.R logs/get_historical_data_$(date +%Y%m%d_%H%M%S).out
83-
84-
if [ $? -eq 0 ]; then
85-
$STATUS_SCRIPT "weather-historical" "completed" $(($(date +%s) - START_TIME)) 100
79+
# Priority 3: Historical data update (optional for regular runs)
80+
if [ "${SKIP_HISTORICAL:-false}" != "true" ]; then
81+
echo "Updating historical data..."
82+
$STATUS_SCRIPT "weather-historical" "running" $(($(date +%s) - START_TIME)) 70
83+
R CMD BATCH --no-save --no-restore code/get_historical_data.R logs/get_historical_data_$(date +%Y%m%d_%H%M%S).out
84+
85+
if [ $? -eq 0 ]; then
86+
$STATUS_SCRIPT "weather-historical" "completed" $(($(date +%s) - START_TIME)) 100
87+
else
88+
$STATUS_SCRIPT "weather-historical" "failed" $(($(date +%s) - START_TIME)) 70
89+
fi
8690
else
87-
$STATUS_SCRIPT "weather-historical" "failed" $(($(date +%s) - START_TIME)) 70
91+
echo "Skipping historical data update (SKIP_HISTORICAL=true)"
8892
fi
8993

9094
# Priority 4: Generate aggregated datasets

update_weather_daily.sh

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/bin/bash
2+
#SBATCH --job-name=weather-daily
3+
#SBATCH --partition=ceab
4+
#SBATCH --nodes=1
5+
#SBATCH --ntasks=1
6+
#SBATCH --cpus-per-task=2
7+
#SBATCH --mem=6G
8+
#SBATCH --time=02:00:00
9+
#SBATCH --mail-type=FAIL
10+
11+
#SBATCH --output=logs/weather_daily_%j.out
12+
#SBATCH --error=logs/weather_daily_%j.err
13+
14+
# Load required modules
15+
module load LibTIFF/4.6.0-GCCcore-13.3.0
16+
module load R/4.4.2-gfbf-2024a
17+
module load cURL/8.7.1-GCCcore-13.3.0
18+
module load OpenSSL/3
19+
20+
# Set working directory
21+
cd ~/research/weather-data-collector-spain
22+
23+
# Create logs directory if it doesn't exist
24+
mkdir -p logs
25+
26+
# Create output directory if it doesn't exist
27+
mkdir -p data/output
28+
29+
# Initialize status reporting
30+
JOB_NAME="weather-daily"
31+
STATUS_SCRIPT="./scripts/update_weather_status.sh"
32+
START_TIME=$(date +%s)
33+
34+
# Report job started
35+
$STATUS_SCRIPT "$JOB_NAME" "running" 0 5
36+
37+
echo "Starting daily weather data collection: $(date)"
38+
39+
# Municipal forecasts
40+
echo "Collecting municipal forecasts..."
41+
$STATUS_SCRIPT "weather-forecast" "running" $(($(date +%s) - START_TIME)) 25
42+
R CMD BATCH --no-save --no-restore code/get_forecast_data.R logs/get_forecast_data_$(date +%Y%m%d_%H%M%S).out
43+
44+
if [ $? -eq 0 ]; then
45+
$STATUS_SCRIPT "weather-forecast" "completed" $(($(date +%s) - START_TIME)) 100
46+
else
47+
$STATUS_SCRIPT "weather-forecast" "failed" $(($(date +%s) - START_TIME)) 30
48+
fi
49+
50+
# Hourly observations
51+
echo "Collecting hourly observations..."
52+
$STATUS_SCRIPT "$JOB_NAME" "running" $(($(date +%s) - START_TIME)) 50
53+
R CMD BATCH --no-save --no-restore code/get_latest_data.R logs/get_latest_data_$(date +%Y%m%d_%H%M%S).out
54+
55+
if [ $? -eq 0 ]; then
56+
$STATUS_SCRIPT "weather-hourly" "completed" $(($(date +%s) - START_TIME)) 100
57+
else
58+
$STATUS_SCRIPT "weather-hourly" "failed" $(($(date +%s) - START_TIME)) 50
59+
fi
60+
61+
# Generate aggregated datasets
62+
echo "Generating aggregated datasets..."
63+
$STATUS_SCRIPT "weather-aggregation" "running" $(($(date +%s) - START_TIME)) 90
64+
./generate_all_datasets.sh
65+
66+
if [ $? -eq 0 ]; then
67+
$STATUS_SCRIPT "weather-aggregation" "completed" $(($(date +%s) - START_TIME)) 100
68+
echo "✅ All datasets generated successfully"
69+
else
70+
$STATUS_SCRIPT "weather-aggregation" "failed" $(($(date +%s) - START_TIME)) 90
71+
echo "❌ Dataset generation failed"
72+
fi
73+
74+
echo "Daily weather data collection completed: $(date)"
75+
76+
# Final status update
77+
FINAL_DURATION=$(($(date +%s) - START_TIME))
78+
$STATUS_SCRIPT "$JOB_NAME" "completed" $FINAL_DURATION 100

update_weather_hourly.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/bin/bash
2+
#SBATCH --job-name=weather-hourly
3+
#SBATCH --partition=ceab
4+
#SBATCH --nodes=1
5+
#SBATCH --ntasks=1
6+
#SBATCH --cpus-per-task=1
7+
#SBATCH --mem=4G
8+
#SBATCH --time=01:00:00
9+
#SBATCH --mail-type=FAIL
10+
11+
#SBATCH --output=logs/weather_hourly_%j.out
12+
#SBATCH --error=logs/weather_hourly_%j.err
13+
14+
# Load required modules
15+
module load LibTIFF/4.6.0-GCCcore-13.3.0
16+
module load R/4.4.2-gfbf-2024a
17+
module load cURL/8.7.1-GCCcore-13.3.0
18+
module load OpenSSL/3
19+
20+
# Set working directory
21+
cd ~/research/weather-data-collector-spain
22+
23+
# Create logs directory if it doesn't exist
24+
mkdir -p logs
25+
26+
# Create output directory if it doesn't exist
27+
mkdir -p data/output
28+
29+
# Initialize status reporting
30+
JOB_NAME="weather-hourly-quick"
31+
STATUS_SCRIPT="./scripts/update_weather_status.sh"
32+
START_TIME=$(date +%s)
33+
34+
# Report job started
35+
$STATUS_SCRIPT "$JOB_NAME" "running" 0 5
36+
37+
echo "Starting hourly weather data collection: $(date)"
38+
39+
# Only collect current hourly observations
40+
echo "Collecting hourly observations..."
41+
$STATUS_SCRIPT "$JOB_NAME" "running" $(($(date +%s) - START_TIME)) 50
42+
R CMD BATCH --no-save --no-restore code/get_latest_data.R logs/get_latest_data_$(date +%Y%m%d_%H%M%S).out
43+
44+
if [ $? -eq 0 ]; then
45+
$STATUS_SCRIPT "$JOB_NAME" "completed" $(($(date +%s) - START_TIME)) 100
46+
echo "✅ Hourly data collection successful"
47+
else
48+
$STATUS_SCRIPT "$JOB_NAME" "failed" $(($(date +%s) - START_TIME)) 50
49+
echo "❌ Hourly data collection failed"
50+
fi
51+
52+
echo "Hourly weather data collection completed: $(date)"
53+
54+
# Final status update
55+
FINAL_DURATION=$(($(date +%s) - START_TIME))
56+
$STATUS_SCRIPT "$JOB_NAME" "completed" $FINAL_DURATION 100

0 commit comments

Comments
 (0)