Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions .github/workflows/nightly-throughput-stress.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
name: Nightly Throughput Stress

on:
schedule:
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
- cron: '00 11 * * *'
push:
branches:
- add-nightly-throughput-stress-workflow
workflow_dispatch:
inputs:
duration:
description: 'Test duration (e.g., 6h, 1h)'
required: false
default: '6h'
type: string
timeout:
description: 'Scenario timeout (should be greater than duration)'
required: false
default: '6h30m'
type: string
job_timeout_minutes:
description: 'GitHub Actions job timeout in minutes'
required: false
default: 420
type: number

env:
# Workflow configuration
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '6h' }}
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '6h30m' }}

# Logging and artifacts
WORKER_LOG_DIR: /tmp/throughput-stress-logs

# Omes configuration
OMES_REPO: temporalio/omes
OMES_REF: main
RUN_ID: ${{ github.run_id }}-throughput-stress
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May want to amend the SDK to the run ID, but if we do we should do for all SDKs


jobs:
throughput-stress:
runs-on: ubuntu-latest-4-cores
timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || (vars.NIGHTLY_JOB_TIMEOUT_MINUTES || '420')) }}

steps:
- name: Print test configuration
run: |
echo "=== Throughput Stress Test Configuration ==="
echo "Duration: $TEST_DURATION"
echo "Timeout: $TEST_TIMEOUT"
echo "Run ID: $RUN_ID"
echo "=========================================="

- name: Checkout SDK
uses: actions/checkout@v4
with:
submodules: recursive

- name: Checkout OMES
uses: actions/checkout@v4
with:
repository: ${{ env.OMES_REPO }}
ref: ${{ env.OMES_REF }}
path: omes

- name: Setup Go for OMES
uses: actions/setup-go@v5
with:
go-version-file: omes/go.mod
cache-dependency-path: omes/go.sum

- name: Setup Go for SDK
uses: actions/setup-go@v5
with:
go-version: "stable"

- name: Install Temporal CLI
uses: temporalio/setup-temporal@v0

- name: Setup log directory
run: mkdir -p $WORKER_LOG_DIR

- name: Start Temporal Server
run: |
temporal server start-dev \
--db-filename temporal-throughput-stress.sqlite \
--sqlite-pragma journal_mode=WAL \
--sqlite-pragma synchronous=OFF \
--headless &> $WORKER_LOG_DIR/temporal-server.log &

- name: Run throughput stress scenario with local SDK
working-directory: omes
continue-on-error: true
run: |
# Use run-scenario-with-worker to build and run in one step
# Pass the SDK directory as --version for local testing
# Note: The hardcoded values below match OMES defaults, except:
# - visibility-count-timeout: 5m (vs 3m default)
# to give CI a bit more time for visibility consistency
go run ./cmd run-scenario-with-worker \
--scenario throughput_stress \
--language go \
--version $(pwd)/.. \
--run-id $RUN_ID \
--duration $TEST_DURATION \
--timeout $TEST_TIMEOUT \
--max-concurrent 10 \
--option internal-iterations=10 \
--option continue-as-new-after-iterations=3 \
--option sleep-time=1s \
--option visibility-count-timeout=5m \
--option min-throughput-per-hour=1000 \
2>&1 | tee $WORKER_LOG_DIR/scenario.log

echo "SCENARIO_EXIT_CODE=${PIPESTATUS[0]}" >> $GITHUB_ENV

- name: Upload logs on failure/cancellation
if: failure() || cancelled()
uses: actions/upload-artifact@v4
with:
name: throughput-stress-logs
path: ${{ env.WORKER_LOG_DIR }}
retention-days: 30

- name: Notify Slack on failure/cancellation
if: failure() || cancelled()
uses: slackapi/slack-github-action@v2
with:
webhook-type: incoming-webhook
payload: |
{
"text": "Nightly Go throughput stress test failed",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
}
}
]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}

- name: Fail if scenario failed
if: always()
run: |
if [ "${SCENARIO_EXIT_CODE:-1}" != "0" ]; then
echo "❌ Throughput stress test failed with exit code ${SCENARIO_EXIT_CODE}"
echo "Check the artifacts for detailed logs and state"
exit 1
else
echo "✅ Throughput stress test completed successfully"
fi
Loading