Skip to content

Commit 21f5977

Browse files
committed
Add dashboard integration and monitoring features for weather data collection, and fix dependencies
- Introduced README updates for real-time monitoring and dashboard integration. - Added configuration file for mosquito-alert-model-monitor dashboard. - Enhanced deployment script to include dashboard setup. - Created monitoring guide with detailed job types and metrics. - Implemented status reporting scripts for job monitoring. - Updated weather data collection scripts to support new monitoring features.
1 parent 438dcbc commit 21f5977

11 files changed

+747
-35
lines changed

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,28 @@ gantt
7070
Accumulating Archive :active, archive, 2025-08-01, 2025-08-21
7171
```
7272

73+
## Monitoring & Dashboard Integration
74+
75+
### 🖥️ **Real-time Monitoring**
76+
This project integrates with the [mosquito-alert-model-monitor](https://github.com/Mosquito-Alert/mosquito-alert-model-monitor) dashboard for real-time job monitoring.
77+
78+
**Monitored Jobs:**
79+
- `weather-forecast`: Municipal forecasts (every 6 hours) - **CRITICAL PRIORITY**
80+
- `weather-hourly`: Station observations (every 2 hours) - **MEDIUM PRIORITY**
81+
- `weather-historical`: Historical data updates (daily) - **LOW PRIORITY**
82+
- `municipal-forecast-priority`: Immediate municipal data - **CRITICAL PRIORITY**
83+
84+
**Setup Dashboard Monitoring:**
85+
```bash
86+
# Test integration
87+
./scripts/test_dashboard_integration.sh
88+
89+
# Check dashboard at: /path/to/mosquito-alert-model-monitor/docs/index.html
90+
```
91+
92+
**Status Reporting:**
93+
All SLURM scripts automatically report job status, progress, and resource usage to the monitoring dashboard.
94+
7395
## Features
7496
- **Real-time Observations**: Fetches current hourly weather from all AEMET stations
7597
- **Historical Data**: Updates and maintains daily historical weather dataset

config/dashboard_integration.yml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Weather Data Collector - Dashboard Integration
2+
# ==============================================
3+
# Configuration for mosquito-alert-model-monitor dashboard
4+
5+
# Copy this to your mosquito-alert-model-monitor repository
6+
# in a file called config/weather_jobs.yml
7+
8+
weather_data_collector:
9+
description: "Spanish Weather Data Collection System"
10+
repository: "https://github.com/Mosquito-Alert/weather-data-collector-spain"
11+
jobs:
12+
weather-data-collector:
13+
name: "Complete Weather Collection"
14+
description: "Full weather data collection pipeline"
15+
schedule: "Every 6 hours"
16+
priority: "high"
17+
datasets:
18+
- "Daily Station Historical"
19+
- "Municipal Extended"
20+
- "Hourly Station Ongoing"
21+
22+
weather-forecast:
23+
name: "Municipal Forecasts"
24+
description: "7-day municipal forecasts for 8,129 Spanish municipalities"
25+
schedule: "Every 6 hours"
26+
priority: "critical"
27+
datasets:
28+
- "Municipal Extended (forecast portion)"
29+
30+
weather-hourly:
31+
name: "Hourly Station Data"
32+
description: "Current observations from AEMET stations"
33+
schedule: "Every 2 hours"
34+
priority: "medium"
35+
datasets:
36+
- "Hourly Station Ongoing"
37+
38+
weather-historical:
39+
name: "Historical Data Update"
40+
description: "Daily historical weather data maintenance"
41+
schedule: "Daily at 3 AM"
42+
priority: "low"
43+
datasets:
44+
- "Daily Station Historical"
45+
46+
municipal-forecast-priority:
47+
name: "Priority Municipal Data"
48+
description: "Immediate municipal data for model use"
49+
schedule: "On demand"
50+
priority: "critical"
51+
datasets:
52+
- "Municipal Extended (immediate availability)"
53+
54+
alerts:
55+
api_rate_limits:
56+
threshold: 3
57+
description: "AEMET API rate limit exceeded"
58+
59+
empty_responses:
60+
threshold: 5
61+
description: "Server returned empty responses"
62+
63+
forecast_data_age:
64+
threshold: 12 # hours
65+
description: "Forecast data is stale"
66+
67+
disk_space:
68+
threshold: 90 # percent
69+
description: "Low disk space for data outputs"

deploy_to_cluster.sh

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,26 @@ start_priority_generation() {
6262
echo "✅ Priority generation started - check logs/municipal_priority.log"
6363
}
6464

65-
# Function to setup cron jobs for continuous operation
65+
# Function to setup dashboard integration
66+
setup_dashboard_integration() {
67+
echo "=== Setting up dashboard integration ==="
68+
69+
# Check if monitor repo exists on cluster
70+
if ssh "$CLUSTER_USER@$CLUSTER_HOST" "[ -d mosquito-alert-model-monitor ]"; then
71+
echo "✅ Monitor repository found on cluster"
72+
73+
# Ensure status directory exists
74+
ssh "$CLUSTER_USER@$CLUSTER_HOST" "mkdir -p mosquito-alert-model-monitor/data/status"
75+
76+
# Test status reporting
77+
ssh "$CLUSTER_USER@$CLUSTER_HOST" "cd $CLUSTER_PATH && ./scripts/update_weather_status.sh weather-test running 0 0"
78+
79+
echo "✅ Dashboard integration configured"
80+
else
81+
echo "⚠️ Monitor repository not found - clone it manually:"
82+
echo " git clone https://github.com/Mosquito-Alert/mosquito-alert-model-monitor.git"
83+
fi
84+
}
6685
setup_cron_jobs() {
6786
echo "=== Setting up cron jobs ==="
6887

@@ -103,18 +122,22 @@ main() {
103122
"start")
104123
start_priority_generation
105124
;;
125+
"dashboard")
126+
setup_dashboard_integration
127+
;;
106128
"cron")
107129
setup_cron_jobs
108130
;;
109131
"all")
110132
deploy_code
111133
setup_api_keys
112134
install_dependencies
135+
setup_dashboard_integration
113136
setup_cron_jobs
114137
start_priority_generation
115138
;;
116139
*)
117-
echo "Usage: $0 [code|keys|deps|start|cron|all]"
140+
echo "Usage: $0 [code|keys|deps|dashboard|start|cron|all]"
118141
exit 1
119142
;;
120143
esac

docs/monitoring.md

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# Weather Data Collector - Monitoring Guide
2+
3+
## Dashboard Integration
4+
5+
This project is integrated with the [mosquito-alert-model-monitor](https://github.com/Mosquito-Alert/mosquito-alert-model-monitor) dashboard for comprehensive monitoring of weather data collection jobs.
6+
7+
## Monitored Components
8+
9+
### Job Types
10+
11+
| Job Name | Description | Schedule | Priority | SLURM Script |
12+
|----------|------------|----------|----------|--------------|
13+
| `weather-forecast` | Municipal forecasts collection | Every 6 hours | CRITICAL | `priority_municipal_forecast.sh` |
14+
| `weather-hourly` | Hourly station observations | Every 2 hours | MEDIUM | Included in `update_weather.sh` |
15+
| `weather-historical` | Historical data updates | Daily (3 AM) | LOW | `update_historical_weather.sh` |
16+
| `municipal-forecast-priority` | Immediate municipal data | On-demand | CRITICAL | `priority_municipal_forecast.sh` |
17+
| `weather-data-collector` | Complete collection pipeline | Every 6 hours | HIGH | `update_weather.sh` |
18+
19+
### Metrics Tracked
20+
21+
- **Job Status**: running, completed, failed
22+
- **Progress**: 0-100% completion
23+
- **Duration**: Execution time in seconds
24+
- **Resource Usage**: CPU and memory utilization
25+
- **Data Metrics**: Output file count and size
26+
- **API Health**: Rate limits, errors, response times
27+
28+
### Alert Conditions
29+
30+
- **API Rate Limits**: >3 rate limit hits
31+
- **Empty Responses**: >5 consecutive empty server responses
32+
- **Stale Data**: Forecast data >12 hours old
33+
- **Disk Space**: >90% disk usage in output directory
34+
- **Failed Jobs**: Any job failing repeatedly
35+
36+
## Setup Instructions
37+
38+
### 1. Clone Monitor Repository
39+
40+
```bash
41+
# On cluster
42+
git clone https://github.com/Mosquito-Alert/mosquito-alert-model-monitor.git
43+
cd mosquito-alert-model-monitor
44+
45+
# Setup monitoring environment
46+
conda env create -f environment.yml
47+
conda activate mosquito-alert-monitor
48+
```
49+
50+
### 2. Test Integration
51+
52+
```bash
53+
# From weather project directory
54+
./scripts/test_dashboard_integration.sh
55+
```
56+
57+
### 3. Deploy Dashboard
58+
59+
```bash
60+
# Local rendering
61+
cd mosquito-alert-model-monitor
62+
quarto preview index.qmd
63+
64+
# GitHub Pages deployment (automatic on push)
65+
git add .
66+
git commit -m "Add weather monitoring integration"
67+
git push origin main
68+
```
69+
70+
## Status File Format
71+
72+
Each weather job creates/updates a JSON status file in the monitor repository:
73+
74+
```json
75+
{
76+
"job_name": "weather-forecast",
77+
"status": "running",
78+
"last_updated": "2025-08-21T18:30:00Z",
79+
"duration": 1800,
80+
"progress": 75,
81+
"cpu_usage": 85.2,
82+
"memory_usage": 2048,
83+
"next_scheduled_run": "2025-08-22T00:30:00Z",
84+
"config": {
85+
"project_type": "weather_data_collection",
86+
"data_source": "AEMET OpenData API",
87+
"collection_scope": "Spain",
88+
"municipalities": 8129,
89+
"api_keys": 3,
90+
"output_datasets": 3
91+
},
92+
"metrics": {
93+
"output_files": 3,
94+
"total_size_mb": 245.6
95+
},
96+
"alerts": {
97+
"api_errors": false,
98+
"disk_space_low": false,
99+
"rate_limit_exceeded": false
100+
}
101+
}
102+
```
103+
104+
## Dashboard Views
105+
106+
### Main Dashboard
107+
- Real-time status of all weather jobs
108+
- Resource usage charts
109+
- Recent activity timeline
110+
- Alert notifications
111+
112+
### Job Details
113+
- Individual job logs and configuration
114+
- Performance trends over time
115+
- Error analysis and troubleshooting
116+
117+
### Historical Analytics
118+
- Success rates by job type
119+
- Performance trends
120+
- Data collection statistics
121+
122+
## Troubleshooting
123+
124+
### Common Issues
125+
126+
1. **Status Files Not Appearing**
127+
- Check monitor repository path in scripts
128+
- Verify write permissions on status directory
129+
- Ensure status script is executable
130+
131+
2. **Dashboard Not Updating**
132+
- Check JSON syntax in status files
133+
- Verify Quarto rendering process
134+
- Check GitHub Pages deployment
135+
136+
3. **Missing Resource Metrics**
137+
- Verify `ps` command availability
138+
- Check script permissions for system monitoring
139+
140+
### Log Files
141+
142+
Monitor these log files for debugging:
143+
144+
- `logs/weather_collection_*.out` - Main collection pipeline
145+
- `logs/municipal_priority_*.out` - Priority municipal data
146+
- `logs/get_forecast_data_*.out` - Forecast collection
147+
- `logs/get_latest_data_*.out` - Hourly observations
148+
- `logs/get_historical_data_*.out` - Historical updates
149+
150+
## Integration Commands
151+
152+
```bash
153+
# Manual status update
154+
./scripts/update_weather_status.sh "job_name" "status" duration progress
155+
156+
# Test dashboard integration
157+
./scripts/test_dashboard_integration.sh
158+
159+
# Deploy with monitoring
160+
./deploy_to_cluster.sh dashboard
161+
```

priority_municipal_data.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/bin/bash
2+
# filepath: /Users/palmer/research/weather-data-collector-spain/priority_municipal_data.sh
3+
#SBATCH --job-name=municipal-priority
4+
#SBATCH --partition=standard
5+
#SBATCH --nodes=1
6+
#SBATCH --ntasks=1
7+
#SBATCH --cpus-per-task=1
8+
#SBATCH --mem=4G
9+
#SBATCH --time=02:00:00
10+
#SBATCH --mail-type=BEGIN,END,FAIL
11+
12+
#SBATCH --output=logs/municipal_priority_%j.out
13+
#SBATCH --error=logs/municipal_priority_%j.err
14+
15+
# Load required modules
16+
module load R/4.4.2-gfbf-2024a
17+
module load cURL/8.7.1-GCCcore-13.3.0
18+
19+
# Set working directory
20+
cd ~/research/weather-data-collector-spain
21+
22+
# Create logs directory
23+
mkdir -p logs
24+
25+
# Activate renv
26+
R --slave --no-restore --file=- <<EOF
27+
renv::activate()
28+
EOF
29+
30+
echo "Starting priority municipal data generation: $(date)"
31+
32+
# Get forecasts first (immediate availability)
33+
echo "Collecting municipal forecasts for immediate model use..."
34+
R CMD BATCH --no-save --no-restore code/get_forecast_data.R logs/priority_forecast_$(date +%Y%m%d_%H%M%S).out
35+
36+
# Generate backwards municipal data
37+
echo "Generating municipal data backwards from present..."
38+
R CMD BATCH --no-save --no-restore code/generate_municipal_priority.R logs/priority_municipal_$(date +%Y%m%d_%H%M%S).out
39+
40+
echo "Priority municipal data generation completed: $(date)"
41+
echo "Models can now use: data/output/daily_municipal_extended.csv.gz"
42+
43+
# Submit: sbatch priority_municipal_data.sh

0 commit comments

Comments
 (0)