Skip to content

Flow - Random Stress Chaos #23

Flow - Random Stress Chaos

Flow - Random Stress Chaos #23

name: Flow - Random Stress Chaos
on:
schedule:
# Run every hour
- cron: '0 * * * *'
workflow_dispatch: # Allow manual trigger
jobs:
trigger-stress:
runs-on: ubuntu-latest
environment: events
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install requests locust
- name: Select random stress scenario
id: random
run: |
# Array of stress scenarios
scenarios=("relibank-cpu-stress-test" "relibank-high-cpu-stress" "relibank-memory-stress-test" "relibank-high-memory-stress" "relibank-combined-stress-test")
# Select random scenario
RANDOM_INDEX=$((RANDOM % ${#scenarios[@]}))
SCENARIO=${scenarios[$RANDOM_INDEX]}
echo "Selected stress scenario: $SCENARIO"
echo "scenario=$SCENARIO" >> $GITHUB_OUTPUT
# Determine target service and locust file
if [[ "$SCENARIO" == *"cpu"* ]] || [[ "$SCENARIO" == *"combined"* ]]; then
echo "target_service=transaction-service" >> $GITHUB_OUTPUT
echo "locust_file=transaction_service_load.py" >> $GITHUB_OUTPUT
else
echo "target_service=bill-pay-service" >> $GITHUB_OUTPUT
echo "locust_file=bill_pay_service_load.py" >> $GITHUB_OUTPUT
fi
- name: Reset rate limit
run: |
echo "Resetting rate limit to ensure scenario can be triggered..."
curl -X POST "${{ vars.SCENARIO_SERVICE_URL }}/api/chaos-rate-limit-reset" || echo "Rate limit reset failed (may not be available)"
- name: Trigger stress scenario
id: trigger
run: |
echo "Triggering stress scenario: ${{ steps.random.outputs.scenario }}"
echo "Target service: ${{ steps.random.outputs.target_service }}"
RESPONSE=$(curl -s -X POST "${{ vars.SCENARIO_SERVICE_URL }}/api/trigger_stress/${{ steps.random.outputs.scenario }}")
echo "$RESPONSE"
STATUS=$(echo "$RESPONSE" | jq -r '.status')
if [ "$STATUS" != "success" ]; then
echo "Failed to trigger stress scenario"
echo "$RESPONSE" | jq .
exit 1
fi
echo "✓ Stress scenario triggered successfully"
- name: Run load test during stress
run: |
echo "Starting load test on ${{ steps.random.outputs.target_service }}..."
# Run locust in headless mode for 3 minutes (matching stress duration)
# 3 users, spawn rate of 1 user/second
cd demo_flows/stress_loadgen && \
locust -f ${{ steps.random.outputs.locust_file }} \
--host=${{ vars.BASE_URL }} \
--users=3 \
--spawn-rate=1 \
--run-time=3m \
--headless \
--only-summary &
LOCUST_PID=$!
echo "Locust running with PID: $LOCUST_PID"
# Wait for stress experiment to complete (3 minutes)
echo "Waiting 3 minutes for stress experiment and load test to complete..."
sleep 180
# Ensure locust has finished
wait $LOCUST_PID || echo "Locust completed"
echo "✓ Load test completed"
- name: Wait for metrics to stabilize
run: |
echo "Waiting 30 seconds for metrics to be ingested in New Relic..."
sleep 30
- name: Validate stress with New Relic
env:
NEW_RELIC_API_KEY: ${{ secrets.NR_USER_API_KEY }}
NEW_RELIC_ACCOUNT_ID: ${{ vars.NR_ACCOUNT_ID }}
run: |
python tests/workflow_validation/validate_stress_metrics.py "${{ steps.random.outputs.scenario }}" 5
- name: Summary
if: always()
run: |
echo "## Stress Chaos Test Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Scenario**: ${{ steps.random.outputs.scenario }}" >> $GITHUB_STEP_SUMMARY
echo "**Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Check New Relic dashboards for detailed metrics during the stress period." >> $GITHUB_STEP_SUMMARY
- name: Execute New Relic Log Export Script
if: always()
# Set the environment variables required by export_to_newrelic.py
env:
# --- GitHub Action Context Variables ---
JOB_STATUS: ${{ job.status }}
JOB_NAME: ${{ github.job }}
RUN_ID: ${{ github.run_id }}
REPOSITORY: ${{ github.repository }}
SERVER_URL: ${{ github.server_url }}
WORKFLOW: ${{ github.workflow }}
# --- New Relic Configuration (Set these as repository secrets) ---
NR_LICENSE_KEY: ${{ secrets.NR_LICENSE_KEY_ALERTS }}
NR_ACCOUNT_ID: ${{ vars.NR_ACCOUNT_ID_ALERTS }}
ENVIRONMENT: ${{ matrix.environment }}
# Base URL for the New Relic API endpoint (US region)
NR_ENDPOINT_BASE: "https://log-api.newrelic.com/log/v1"
run: |
cd github_action_monitoring && python export_to_newrelic.py