Skip to content

Uptime Monitoring #2313

Uptime Monitoring

Uptime Monitoring #2313

Workflow file for this run

name: Uptime Monitoring
on:
schedule:
# Run every 15 minutes
- cron: '*/15 * * * *'
workflow_dispatch: # Allow manual trigger
permissions:
issues: write
contents: read
jobs:
check-website:
name: Check lumos-lang.org
runs-on: ubuntu-latest
steps:
- name: Check website availability
id: check
run: |
echo "Checking lumos-lang.org..."
# Check with detailed output
HTTP_CODE=$(curl -o /dev/null -s -w "%{http_code}" -L --max-time 30 https://lumos-lang.org)
echo "http_code=$HTTP_CODE" >> $GITHUB_OUTPUT
echo "timestamp=$(date -u +"%Y-%m-%d %H:%M:%S UTC")" >> $GITHUB_OUTPUT
if [ "$HTTP_CODE" -eq 200 ]; then
echo "✅ Website is UP (HTTP $HTTP_CODE)"
exit 0
else
echo "❌ Website is DOWN or unreachable (HTTP $HTTP_CODE)"
exit 1
fi
- name: Additional diagnostics (on failure)
if: failure()
run: |
echo "Running additional diagnostics..."
echo "=== DNS Resolution ==="
nslookup lumos-lang.org || true
echo "=== Ping Test ==="
ping -c 3 lumos-lang.org || true
echo "=== curl verbose ==="
curl -v --max-time 30 https://lumos-lang.org || true
- name: Create issue on failure
if: failure()
uses: actions/github-script@v8
with:
script: |
const httpCode = '${{ steps.check.outputs.http_code }}';
const timestamp = '${{ steps.check.outputs.timestamp }}';
// Check if an open issue already exists
const existingIssues = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'uptime-alert',
per_page: 1
});
if (existingIssues.data.length > 0) {
// Update existing issue with new failure
const issue = existingIssues.data[0];
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issue.number,
body: `🔴 **Another downtime detected**\n\n` +
`- **Time**: ${timestamp}\n` +
`- **HTTP Code**: ${httpCode}\n` +
`- **Workflow Run**: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
});
console.log(`Updated existing issue #${issue.number}`);
} else {
// Create new issue
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: '🚨 lumos-lang.org is DOWN',
labels: ['uptime-alert', 'priority:critical', 'area:infrastructure'],
body: `## Website Downtime Detected\n\n` +
`Automated monitoring detected that **lumos-lang.org** is unreachable.\n\n` +
`### Details\n\n` +
`- **Time**: ${timestamp}\n` +
`- **HTTP Code**: ${httpCode}\n` +
`- **Expected**: 200 OK\n` +
`- **Workflow Run**: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}\n\n` +
`### Action Required\n\n` +
`1. Check website hosting status\n` +
`2. Verify DNS configuration\n` +
`3. Check Cloudflare Pages deployment\n` +
`4. Review recent changes to docs-lumos repository\n\n` +
`### Monitoring\n\n` +
`This issue will automatically update with additional downtime events. ` +
`Close this issue once the website is confirmed to be back online.\n\n` +
`---\n` +
`*This issue was automatically created by the uptime monitoring workflow.*`
});
console.log('Created new uptime alert issue');
}
check-crates-io:
name: Check crates.io packages
runs-on: ubuntu-latest
steps:
- name: Check crates.io packages with retry
id: check-crates
run: |
check_crate() {
local crate=$1
local max_retries=3
local retry_delay=10
for i in $(seq 1 $max_retries); do
HTTP_CODE=$(curl -o /dev/null -s -w "%{http_code}" --max-time 30 -H "User-Agent: lumos-uptime-monitor (https://github.com/getlumos/lumos)" "https://crates.io/api/v1/crates/$crate")
if [ "$HTTP_CODE" -eq 200 ]; then
echo "✅ $crate is accessible (attempt $i)"
return 0
fi
echo "⚠️ $crate returned HTTP $HTTP_CODE (attempt $i/$max_retries)"
if [ $i -lt $max_retries ]; then
echo "Retrying in ${retry_delay}s..."
sleep $retry_delay
fi
done
echo "❌ $crate not accessible after $max_retries attempts"
return 1
}
FAILED=""
echo "=== Checking lumos-core ==="
if ! check_crate "lumos-core"; then
FAILED="$FAILED lumos-core"
fi
echo "=== Checking lumos-cli ==="
if ! check_crate "lumos-cli"; then
FAILED="$FAILED lumos-cli"
fi
echo "=== Checking lumos-lsp ==="
if ! check_crate "lumos-lsp"; then
FAILED="$FAILED lumos-lsp"
fi
if [ -n "$FAILED" ]; then
echo "failed_crates=$FAILED" >> $GITHUB_OUTPUT
exit 1
fi
echo "✅ All crates accessible"
- name: Create or update issue on crates.io failure
if: failure()
uses: actions/github-script@v8
with:
script: |
const timestamp = new Date().toISOString();
const failedCrates = '${{ steps.check-crates.outputs.failed_crates }}' || 'unknown';
// Check if an open crates.io issue already exists
const existingIssues = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'uptime-alert',
per_page: 100
});
const cratesIssue = existingIssues.data.find(
issue => issue.title.includes('crates.io')
);
if (cratesIssue) {
// Update existing issue with new failure
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: cratesIssue.number,
body: `🔴 **Another check failure**\n\n` +
`- **Time**: ${timestamp}\n` +
`- **Failed crates**: ${failedCrates}\n` +
`- **Workflow Run**: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
});
console.log(`Updated existing issue #${cratesIssue.number}`);
} else {
// Create new issue
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: '🚨 crates.io packages unreachable',
labels: ['uptime-alert', 'priority:high', 'area:infrastructure'],
body: `## crates.io Downtime Detected\n\n` +
`Automated monitoring detected that LUMOS packages on crates.io are unreachable.\n\n` +
`### Details\n\n` +
`- **Time**: ${timestamp}\n` +
`- **Failed crates**: ${failedCrates}\n` +
`- **Workflow Run**: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}\n\n` +
`### Packages to Check\n\n` +
`- lumos-core: https://crates.io/crates/lumos-core\n` +
`- lumos-cli: https://crates.io/crates/lumos-cli\n` +
`- lumos-lsp: https://crates.io/crates/lumos-lsp\n\n` +
`### Possible Causes\n\n` +
`1. crates.io is experiencing downtime\n` +
`2. Packages were yanked or unpublished\n` +
`3. Network issues\n\n` +
`### Monitoring\n\n` +
`This issue will automatically update with additional failures. ` +
`Close this issue once packages are confirmed accessible.\n\n` +
`---\n` +
`*This issue was automatically created by the uptime monitoring workflow.*`
});
console.log('Created new crates.io alert issue');
}