Skip to content

CI Stress

CI Stress #31

Workflow file for this run

name: CI Stress
on:
workflow_dispatch:
inputs:
iterations:
description: "Total iterations to run (split across shards)"
required: true
default: "100"
type: string
target:
description: "Which test suite(s) to stress"
required: true
default: "all"
type: choice
options:
- unit
- selftests
- aot
- integration
- all
shards:
description: "Number of parallel runners (iterations are divided across shards)"
required: true
default: "20"
type: string
filter:
description: "Optional --self-test --filter prefix for the aot target (e.g. TTV_). Empty = full suite."
required: false
default: ""
type: string
jobs:
plan:
name: Plan shards
runs-on: ubuntu-latest
outputs:
shard-list: ${{ steps.shards.outputs.list }}
per-shard: ${{ steps.shards.outputs.per-shard }}
remainder: ${{ steps.shards.outputs.remainder }}
steps:
- id: shards
env:
ITERS: ${{ inputs.iterations }}
SHARDS: ${{ inputs.shards }}
run: |
set -euo pipefail
if ! [[ "$ITERS" =~ ^[0-9]+$ ]] || [ "$ITERS" -lt 1 ]; then
echo "iterations must be a positive integer; got '$ITERS'" >&2
exit 1
fi
if ! [[ "$SHARDS" =~ ^[0-9]+$ ]] || [ "$SHARDS" -lt 1 ] || [ "$SHARDS" -gt 256 ]; then
echo "shards must be 1..256; got '$SHARDS'" >&2
exit 1
fi
if [ "$SHARDS" -gt "$ITERS" ]; then
SHARDS="$ITERS"
fi
per=$(( ITERS / SHARDS ))
rem=$(( ITERS % SHARDS ))
list=$(seq 1 "$SHARDS" | jq -R -s -c 'split("\n") | map(select(length>0) | tonumber)')
echo "list=$list" >> "$GITHUB_OUTPUT"
echo "per-shard=$per" >> "$GITHUB_OUTPUT"
echo "remainder=$rem" >> "$GITHUB_OUTPUT"
echo "Plan: $ITERS iterations across $SHARDS shards (~$per each, +1 for first $rem)"
unit-tests:
name: Unit (shard ${{ matrix.shard }})
needs: plan
if: ${{ inputs.target == 'unit' || inputs.target == 'all' }}
runs-on: windows-latest
timeout-minutes: 350
strategy:
fail-fast: false
matrix:
shard: ${{ fromJSON(needs.plan.outputs.shard-list) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup .NET
uses: actions/setup-dotnet@9a946fdbd5fb07b82b2f5a4466058b876ab72bb2 # v5.3.0
with:
dotnet-version: 10.0.x
- name: Restore
run: dotnet restore tests/Reactor.Tests/Reactor.Tests.csproj -p:Platform=x64
- name: Build (once)
run: dotnet build tests/Reactor.Tests/Reactor.Tests.csproj --no-restore --configuration Debug -p:Platform=x64
- name: Stress loop
shell: pwsh
env:
PER_SHARD: ${{ needs.plan.outputs.per-shard }}
REMAINDER: ${{ needs.plan.outputs.remainder }}
SHARD_INDEX: ${{ matrix.shard }}
run: |
$per = [int]$env:PER_SHARD
$rem = [int]$env:REMAINDER
$idx = [int]$env:SHARD_INDEX
$count = if ($idx -le $rem) { $per + 1 } else { $per }
if ($count -lt 1) { Write-Host "Shard $idx has no work."; exit 0 }
$failures = New-Object System.Collections.Generic.List[int]
for ($i = 1; $i -le $count; $i++) {
Write-Host "::group::Unit iteration $i / $count (shard $idx)"
dotnet test tests/Reactor.Tests/Reactor.Tests.csproj --no-restore --no-build -p:Platform=x64 --logger "console;verbosity=normal"
$code = $LASTEXITCODE
Write-Host "::endgroup::"
if ($code -ne 0) {
Write-Host "::warning::Unit iteration $i (shard $idx) failed with exit $code"
$failures.Add($i) | Out-Null
}
}
if ($failures.Count -gt 0) {
Write-Host "::error::Shard $idx had $($failures.Count) failed iteration(s): $($failures -join ', ')"
exit 1
}
Write-Host "Shard ${idx}: $count iterations passed."
selftests:
name: Selftests (shard ${{ matrix.shard }})
needs: plan
if: ${{ inputs.target == 'selftests' || inputs.target == 'all' }}
runs-on: windows-latest
timeout-minutes: 350
strategy:
fail-fast: false
matrix:
shard: ${{ fromJSON(needs.plan.outputs.shard-list) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup .NET
uses: actions/setup-dotnet@9a946fdbd5fb07b82b2f5a4466058b876ab72bb2 # v5.3.0
with:
dotnet-version: 10.0.x
- name: Restore
run: dotnet restore tests/Reactor.SelfTests/Reactor.SelfTests.csproj -p:Platform=x64
- name: Build (once)
run: dotnet build tests/Reactor.SelfTests/Reactor.SelfTests.csproj --no-restore --configuration Debug -p:Platform=x64
- name: Stress loop
shell: pwsh
env:
PER_SHARD: ${{ needs.plan.outputs.per-shard }}
REMAINDER: ${{ needs.plan.outputs.remainder }}
SHARD_INDEX: ${{ matrix.shard }}
run: |
$per = [int]$env:PER_SHARD
$rem = [int]$env:REMAINDER
$idx = [int]$env:SHARD_INDEX
$count = if ($idx -le $rem) { $per + 1 } else { $per }
if ($count -lt 1) { Write-Host "Shard $idx has no work."; exit 0 }
$failures = New-Object System.Collections.Generic.List[int]
for ($i = 1; $i -le $count; $i++) {
Write-Host "::group::Selftest iteration $i / $count (shard $idx)"
dotnet test tests/Reactor.SelfTests/Reactor.SelfTests.csproj --no-restore --no-build -p:Platform=x64 --logger "console;verbosity=normal"
$code = $LASTEXITCODE
Write-Host "::endgroup::"
if ($code -ne 0) {
Write-Host "::warning::Selftest iteration $i (shard $idx) failed with exit $code"
$failures.Add($i) | Out-Null
}
}
if ($failures.Count -gt 0) {
Write-Host "::error::Shard $idx had $($failures.Count) failed iteration(s): $($failures -join ', ')"
exit 1
}
Write-Host "Shard ${idx}: $count iterations passed."
integration-tests:
name: Integration (shard ${{ matrix.shard }})
needs: plan
if: ${{ inputs.target == 'integration' || inputs.target == 'all' }}
runs-on: windows-latest
timeout-minutes: 350
strategy:
fail-fast: false
matrix:
shard: ${{ fromJSON(needs.plan.outputs.shard-list) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup .NET
uses: actions/setup-dotnet@9a946fdbd5fb07b82b2f5a4466058b876ab72bb2 # v5.3.0
with:
dotnet-version: 10.0.x
- name: Restore
run: dotnet restore Reactor.slnx
- name: Build (once)
run: dotnet build tests/Reactor.IntegrationTests/Reactor.IntegrationTests.csproj --no-restore --configuration Debug
- name: Stress loop
shell: pwsh
env:
PER_SHARD: ${{ needs.plan.outputs.per-shard }}
REMAINDER: ${{ needs.plan.outputs.remainder }}
SHARD_INDEX: ${{ matrix.shard }}
run: |
$per = [int]$env:PER_SHARD
$rem = [int]$env:REMAINDER
$idx = [int]$env:SHARD_INDEX
$count = if ($idx -le $rem) { $per + 1 } else { $per }
if ($count -lt 1) { Write-Host "Shard $idx has no work."; exit 0 }
$failures = New-Object System.Collections.Generic.List[int]
for ($i = 1; $i -le $count; $i++) {
Write-Host "::group::Integration iteration $i / $count (shard $idx)"
dotnet test tests/Reactor.IntegrationTests/Reactor.IntegrationTests.csproj --no-restore --no-build --logger "console;verbosity=normal"
$code = $LASTEXITCODE
Write-Host "::endgroup::"
if ($code -ne 0) {
Write-Host "::warning::Integration iteration $i (shard $idx) failed with exit $code"
$failures.Add($i) | Out-Null
}
}
if ($failures.Count -gt 0) {
Write-Host "::error::Shard $idx had $($failures.Count) failed iteration(s): $($failures -join ', ')"
exit 1
}
Write-Host "Shard ${idx}: $count iterations passed."
aot-selftests:
name: AOT Selftests (shard ${{ matrix.shard }})
needs: plan
if: ${{ inputs.target == 'aot' || inputs.target == 'all' }}
runs-on: windows-latest
timeout-minutes: 350
strategy:
fail-fast: false
matrix:
shard: ${{ fromJSON(needs.plan.outputs.shard-list) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup .NET
uses: actions/setup-dotnet@9a946fdbd5fb07b82b2f5a4466058b876ab72bb2 # v5.3.0
with:
dotnet-version: 10.0.x
# Publish the NativeAOT selftest host once per shard (same knobs as ci.yml).
- name: Publish AOT host (once)
run: >
dotnet publish tests/Reactor.AppTests.Host
-p:PublishAotInternal=true
-p:Platform=x64
-r win-x64
-c Release
-o ${{ runner.temp }}/aot-publish
--nologo
- name: Stress loop
shell: pwsh
env:
PER_SHARD: ${{ needs.plan.outputs.per-shard }}
REMAINDER: ${{ needs.plan.outputs.remainder }}
SHARD_INDEX: ${{ matrix.shard }}
FILTER: ${{ inputs.filter }}
run: |
$exe = Join-Path '${{ runner.temp }}/aot-publish' 'Reactor.AppTests.Host.exe'
if (-not (Test-Path $exe)) { Write-Host "::error::AOT host not found at $exe"; exit 1 }
$per = [int]$env:PER_SHARD
$rem = [int]$env:REMAINDER
$idx = [int]$env:SHARD_INDEX
$count = if ($idx -le $rem) { $per + 1 } else { $per }
if ($count -lt 1) { Write-Host "Shard $idx has no work."; exit 0 }
$args = @('--self-test')
if ($env:FILTER) { $args += @('--filter', $env:FILTER) }
$failures = New-Object System.Collections.Generic.List[int]
for ($i = 1; $i -le $count; $i++) {
Write-Host "::group::AOT selftest iteration $i / $count (shard $idx)"
& $exe @args 2>&1 | Tee-Object -FilePath "aot-$idx-$i.tap"
$code = $LASTEXITCODE
# A nonzero exit OR any 'not ok' line is a failure for this iteration.
$notOk = Select-String -Path "aot-$idx-$i.tap" -Pattern '^not ok ' -SimpleMatch -Quiet
Write-Host "::endgroup::"
if ($code -ne 0 -or $notOk) {
Write-Host "::warning::AOT iteration $i (shard $idx) failed (exit $code, notOk=$notOk)"
$failures.Add($i) | Out-Null
}
}
if ($failures.Count -gt 0) {
Write-Host "::error::Shard $idx had $($failures.Count) failed iteration(s): $($failures -join ', ')"
exit 1
}
Write-Host "Shard ${idx}: $count AOT iterations passed."
- name: Upload failing TAP
if: ${{ failure() }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: aot-stress-tap-shard-${{ matrix.shard }}
path: aot-*.tap
if-no-files-found: ignore
summary:
name: Stress summary
needs: [plan, unit-tests, selftests, aot-selftests, integration-tests]
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Report
env:
UNIT_RESULT: ${{ needs.unit-tests.result }}
SELFTESTS_RESULT: ${{ needs.selftests.result }}
AOT_RESULT: ${{ needs.aot-selftests.result }}
INTEGRATION_RESULT: ${{ needs.integration-tests.result }}
run: |
echo "Unit shards: $UNIT_RESULT"
echo "Selftest shards: $SELFTESTS_RESULT"
echo "AOT selftest shards: $AOT_RESULT"
echo "Integration shards: $INTEGRATION_RESULT"
fail=0
# 'skipped' is OK (target filter); 'success' is OK; anything else is a fail.
for r in "$UNIT_RESULT" "$SELFTESTS_RESULT" "$AOT_RESULT" "$INTEGRATION_RESULT"; do
case "$r" in
success|skipped|"") ;;
*) fail=1 ;;
esac
done
if [ "$fail" -eq 1 ]; then
echo "::error::One or more stress shards failed. See per-shard logs."
exit 1
fi
echo "All shards passed."