Flow - Random Stress Chaos #23
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Flow - Random Stress Chaos | |
| on: | |
| schedule: | |
| # Run every hour | |
| - cron: '0 * * * *' | |
| workflow_dispatch: # Allow manual trigger | |
| jobs: | |
| trigger-stress: | |
| runs-on: ubuntu-latest | |
| environment: events | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install dependencies | |
| run: | | |
| pip install requests locust | |
| - name: Select random stress scenario | |
| id: random | |
| run: | | |
| # Array of stress scenarios | |
| scenarios=("relibank-cpu-stress-test" "relibank-high-cpu-stress" "relibank-memory-stress-test" "relibank-high-memory-stress" "relibank-combined-stress-test") | |
| # Select random scenario | |
| RANDOM_INDEX=$((RANDOM % ${#scenarios[@]})) | |
| SCENARIO=${scenarios[$RANDOM_INDEX]} | |
| echo "Selected stress scenario: $SCENARIO" | |
| echo "scenario=$SCENARIO" >> $GITHUB_OUTPUT | |
| # Determine target service and locust file | |
| if [[ "$SCENARIO" == *"cpu"* ]] || [[ "$SCENARIO" == *"combined"* ]]; then | |
| echo "target_service=transaction-service" >> $GITHUB_OUTPUT | |
| echo "locust_file=transaction_service_load.py" >> $GITHUB_OUTPUT | |
| else | |
| echo "target_service=bill-pay-service" >> $GITHUB_OUTPUT | |
| echo "locust_file=bill_pay_service_load.py" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Reset rate limit | |
| run: | | |
| echo "Resetting rate limit to ensure scenario can be triggered..." | |
| curl -X POST "${{ vars.SCENARIO_SERVICE_URL }}/api/chaos-rate-limit-reset" || echo "Rate limit reset failed (may not be available)" | |
| - name: Trigger stress scenario | |
| id: trigger | |
| run: | | |
| echo "Triggering stress scenario: ${{ steps.random.outputs.scenario }}" | |
| echo "Target service: ${{ steps.random.outputs.target_service }}" | |
| RESPONSE=$(curl -s -X POST "${{ vars.SCENARIO_SERVICE_URL }}/api/trigger_stress/${{ steps.random.outputs.scenario }}") | |
| echo "$RESPONSE" | |
| STATUS=$(echo "$RESPONSE" | jq -r '.status') | |
| if [ "$STATUS" != "success" ]; then | |
| echo "Failed to trigger stress scenario" | |
| echo "$RESPONSE" | jq . | |
| exit 1 | |
| fi | |
| echo "✓ Stress scenario triggered successfully" | |
| - name: Run load test during stress | |
| run: | | |
| echo "Starting load test on ${{ steps.random.outputs.target_service }}..." | |
| # Run locust in headless mode for 3 minutes (matching stress duration) | |
| # 3 users, spawn rate of 1 user/second | |
| cd demo_flows/stress_loadgen && \ | |
| locust -f ${{ steps.random.outputs.locust_file }} \ | |
| --host=${{ vars.BASE_URL }} \ | |
| --users=3 \ | |
| --spawn-rate=1 \ | |
| --run-time=3m \ | |
| --headless \ | |
| --only-summary & | |
| LOCUST_PID=$! | |
| echo "Locust running with PID: $LOCUST_PID" | |
| # Wait for stress experiment to complete (3 minutes) | |
| echo "Waiting 3 minutes for stress experiment and load test to complete..." | |
| sleep 180 | |
| # Ensure locust has finished | |
| wait $LOCUST_PID || echo "Locust completed" | |
| echo "✓ Load test completed" | |
| - name: Wait for metrics to stabilize | |
| run: | | |
| echo "Waiting 30 seconds for metrics to be ingested in New Relic..." | |
| sleep 30 | |
| - name: Validate stress with New Relic | |
| env: | |
| NEW_RELIC_API_KEY: ${{ secrets.NR_USER_API_KEY }} | |
| NEW_RELIC_ACCOUNT_ID: ${{ vars.NR_ACCOUNT_ID }} | |
| run: | | |
| python tests/workflow_validation/validate_stress_metrics.py "${{ steps.random.outputs.scenario }}" 5 | |
| - name: Summary | |
| if: always() | |
| run: | | |
| echo "## Stress Chaos Test Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "**Scenario**: ${{ steps.random.outputs.scenario }}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "Check New Relic dashboards for detailed metrics during the stress period." >> $GITHUB_STEP_SUMMARY | |
| - name: Execute New Relic Log Export Script | |
| if: always() | |
| # Set the environment variables required by export_to_newrelic.py | |
| env: | |
| # --- GitHub Action Context Variables --- | |
| JOB_STATUS: ${{ job.status }} | |
| JOB_NAME: ${{ github.job }} | |
| RUN_ID: ${{ github.run_id }} | |
| REPOSITORY: ${{ github.repository }} | |
| SERVER_URL: ${{ github.server_url }} | |
| WORKFLOW: ${{ github.workflow }} | |
| # --- New Relic Configuration (Set these as repository secrets) --- | |
| NR_LICENSE_KEY: ${{ secrets.NR_LICENSE_KEY_ALERTS }} | |
| NR_ACCOUNT_ID: ${{ vars.NR_ACCOUNT_ID_ALERTS }} | |
| ENVIRONMENT: ${{ matrix.environment }} | |
| # Base URL for the New Relic API endpoint (US region) | |
| NR_ENDPOINT_BASE: "https://log-api.newrelic.com/log/v1" | |
| run: | | |
| cd github_action_monitoring && python export_to_newrelic.py |