-
Notifications
You must be signed in to change notification settings - Fork 213
157 lines (138 loc) · 5.15 KB
/
nightly-throughput-stress.yml
File metadata and controls
157 lines (138 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
name: Nightly Throughput Stress
on:
schedule:
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
- cron: '00 11 * * *'
push:
branches:
- add-nightly-throughput-stress-workflow
workflow_dispatch:
inputs:
duration:
description: 'Test duration (e.g., 6h, 1h)'
required: false
default: '5h'
type: string
timeout:
description: 'Scenario timeout (should always be more than duration)'
required: false
default: '5h30m'
type: string
job_timeout_minutes:
description: 'GitHub Actions job timeout in minutes'
required: false
default: 360
type: number
permissions:
contents: read
env:
# Workflow configuration
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
# Logging and artifacts
WORKER_LOG_DIR: /tmp/throughput-stress-logs
# Omes configuration
OMES_REPO: temporalio/omes
OMES_REF: main
RUN_ID: ${{ github.run_id }}-throughput-stress
jobs:
throughput-stress:
runs-on: ubuntu-latest-4-cores
timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 360) }}
steps:
- name: Print test configuration
run: |
echo "=== Throughput Stress Test Configuration ==="
echo "Duration: $TEST_DURATION"
echo "Timeout: $TEST_TIMEOUT"
echo "Run ID: $RUN_ID"
echo "=========================================="
- name: Checkout SDK
uses: actions/checkout@v5
with:
submodules: recursive
fetch-depth: 0
- name: Checkout OMES
uses: actions/checkout@v5
with:
repository: ${{ env.OMES_REPO }}
ref: ${{ env.OMES_REF }}
path: omes
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version-file: omes/go.mod
cache-dependency-path: omes/go.sum
- name: Set up Java
uses: actions/setup-java@v5
with:
java-version: "11"
distribution: "temurin"
- name: Set up Gradle
uses: gradle/actions/setup-gradle@v4
- name: Build SDK
run: ./gradlew build -x test -x virtualThreadTests
- name: Install Temporal CLI
uses: temporalio/setup-temporal@v0
- name: Setup log directory
run: mkdir -p $WORKER_LOG_DIR
- name: Start Temporal Server
run: |
temporal server start-dev \
--db-filename temporal-throughput-stress.sqlite \
--sqlite-pragma journal_mode=WAL \
--sqlite-pragma synchronous=OFF \
--headless &> $WORKER_LOG_DIR/temporal-server.log &
- name: Run throughput stress scenario with local SDK
working-directory: omes
run: |
# This makes the pipeline return the exit code of the first failing command
# Otherwise the output of the `tee` command will be used
# (which is troublesome when the scenario fails but the `tee` command succeeds)
set -o pipefail
# Use run-scenario-with-worker to build and run in one step
# Pass the SDK directory as --version for local testing
# Note: The hardcoded values below match OMES defaults, except:
# - visibility-count-timeout: 5m (vs 3m default)
# to give CI a bit more time for visibility consistency
go run ./cmd run-scenario-with-worker \
--scenario throughput_stress \
--language java \
--version $(pwd)/.. \
--run-id $RUN_ID \
--duration $TEST_DURATION \
--timeout $TEST_TIMEOUT \
--max-concurrent 10 \
--option internal-iterations=10 \
--option continue-as-new-after-iterations=3 \
--option sleep-time=1s \
--option visibility-count-timeout=5m \
--option min-throughput-per-hour=1000 \
2>&1 | tee $WORKER_LOG_DIR/scenario.log
- name: Upload logs on failure
if: failure() || cancelled()
uses: actions/upload-artifact@v4
with:
name: throughput-stress-logs
path: ${{ env.WORKER_LOG_DIR }}
retention-days: 30
- name: Notify Slack on failure
if: failure() || cancelled()
uses: slackapi/slack-github-action@v2
with:
webhook-type: incoming-webhook
payload: |
{
"text": "Nightly Java throughput stress test failed",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "*Nightly Throughput Stress Failed* :x:\n\n*Repository:* ${{ github.repository }}\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
}
}
]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}