Skip to content

feat(transport/cc): spurious congestion event detection #1172

feat(transport/cc): spurious congestion event detection

feat(transport/cc): spurious congestion event detection #1172

Workflow file for this run

name: Performance comparison
on:
workflow_call:
pull_request:
branches: ["main"]
merge_group:
workflow_dispatch:
inputs:
bencher:
type: choice
description: Which benchmarking testbed to run on.
required: true
default: on-prem
options:
- on-prem
- gcp
env:
CARGO_PROFILE_BENCH_BUILD_OVERRIDE_DEBUG: true
CARGO_PROFILE_RELEASE_DEBUG: true
RUSTUP_TOOLCHAIN: stable
PERF_OPT: record -F2999 --call-graph fp -g
SCCACHE_CACHE_SIZE: 128G
SCCACHE_DIRECT: true
MTU: 1504 # https://github.com/microsoft/msquic/issues/4618
CFLAGS: -fno-omit-frame-pointer
CXXFLAGS: -fno-omit-frame-pointer
WORKSPACE: ${{ github.workspace}}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
perfcompare:
name: Performance comparison
runs-on: ${{ inputs.bencher == 'gcp' && format('cirun-gcp-bencher--{0}', github.run_id) || 'self-hosted' }}
defaults:
run:
shell: bash
steps:
- name: Checkout mozilla/neqo
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: mozilla/neqo
path: neqo
submodules: 'recursive'
persist-credentials: false
clean: false
- run: |
cd neqo
git fetch --no-tags --depth=1 origin main
- name: Checkout microsoft/msquic
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: microsoft/msquic
ref: v2.5.3
path: msquic
submodules: 'recursive'
persist-credentials: false
clean: false
- name: Checkout google/quiche
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: google/quiche
ref: 1c35d630893fc2adacfffeb91bcb0561240cf5f1 # Google doesn't tag release, just update the hash occasionally
path: google-quiche
submodules: 'recursive'
persist-credentials: false
clean: false
- name: Checkout cloudflare/quiche
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: cloudflare/quiche
ref: 0.24.5
path: quiche
submodules: 'recursive'
persist-credentials: false
clean: false
- name: Checkout aws/s2n-quic
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: aws/s2n-quic
ref: v1.63.0
path: s2n-quic
submodules: 'recursive'
persist-credentials: false
clean: false
- name: Set PATH and environment
run: |
echo "$HOME/.cargo/bin" >> "${GITHUB_PATH}"
- name: Install Rust
uses: ./neqo/.github/actions/rust
with:
version: ${{ env.RUSTUP_TOOLCHAIN }}
tools: hyperfine, flamegraph, samply
token: ${{ secrets.GITHUB_TOKEN }}
workspaces: |
neqo
quiche
s2n-quic
- name: Get minimum NSS version
id: nss-version
run: |
cd neqo
cat neqo-crypto/min_version.txt > versions.txt
git show origin/main:neqo-crypto/min_version.txt >> versions.txt
# Use the maximum version from both branches.
echo "minimum=$(sort -u versions.txt | tail -n1)" >> "$GITHUB_OUTPUT"
- name: Install NSS
id: nss
uses: ./neqo/.github/actions/nss
with:
minimum-version: ${{ steps.nss-version.outputs.minimum }}
- name: Build neqo
run: |
# See https://github.com/flamegraph-rs/flamegraph for why we append to RUSTFLAGS here.
export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld -C link-arg=-Wl,--no-rosegment -C force-frame-pointers=yes $RUSTFLAGS"
echo "RUSTFLAGS=$RUSTFLAGS" >> "$GITHUB_ENV"
mkdir -p binaries/neqo-main
mkdir -p binaries/neqo
cd neqo
git checkout origin/main
cargo build --locked --release --features bench --bin neqo-client --bin neqo-server
cp target/release/neqo-client ../binaries/neqo-main/
cp target/release/neqo-server ../binaries/neqo-main/
git checkout -
cargo build --locked --release --features bench --bin neqo-client --bin neqo-server
cp target/release/neqo-client ../binaries/neqo/
cp target/release/neqo-server ../binaries/neqo/
- name: Build msquic
run: |
mkdir -p msquic/build
cd msquic/build
rm -f CMakeCache.txt # This seems to be needed now to avoid a spurious warning about "QUIC_TLS"...
cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DQUIC_BUILD_TOOLS=1 -DQUIC_BUILD_PERF=1 ..
cmake --build .
- name: Build google/quiche
run: |
cd google-quiche
bazel-7.0.0 build -c opt --copt=-fno-omit-frame-pointer --copt=-g --strip=never --sandbox_writable_path="$HOME/.cache/sccache" quiche:quic_server quiche:quic_client
bazel-7.0.0 shutdown
- name: Build cloudflare/quiche
run: |
# We already changed RUSTFLAGS above; this depends on that having happened.
cd quiche
cargo build --release --bin quiche-client --bin quiche-server # --locked not working
- name: Build aws/s2n-quic
run: |
# We already changed RUSTFLAGS above; this depends on that having happened.
cd s2n-quic
cargo build --release --bin s2n-quic-qns # --locked not working
# Disable turboboost, hyperthreading and use performance governor.
# Also creates "cpu23", "cpu2" and "cpu3" CPU sets for use with cset.
# On the bencher, logical cores 2 and 3 have been isolated for use by the benchmarks.
- name: Prepare machine
run: sudo /root/bin/prep.sh
# Compare various configurations of neqo against msquic and google/quiche, and gather perf data
# during the hyperfine runs.
- name: Compare QUIC implementations
env:
HOST: 127.0.0.1
PORT: 4433
SIZE: 33554432 # 32 MB
RUNS: 100
NSS_DB_PATH: ${{ github.workspace }}/neqo/test-fixture/db
run: |
TMP=$(mktemp -d)
mkdir -p "$TMP/out"
echo "COMPARE_TMP=$TMP" >> "$GITHUB_ENV"
mkdir -p binaries hyperfine hyperfine-main
touch results.txt
# Make a cert and key for msquic and google.
openssl req -nodes -new -x509 -keyout "$TMP/key" -out "$TMP/cert" -subj "/CN=DOMAIN" 2>/dev/null
# Make test files to serve.
truncate -s "$SIZE" "$TMP/$SIZE"
BIGSIZE=$(bc -l <<< "$SIZE * 20")
truncate -s "$BIGSIZE" "$TMP/$BIGSIZE"
# Define the commands to run for each client and server.
declare -A client_cmd=(
["neqo"]="binaries/neqo/neqo-client _cc _pacing _disk _flags -Q 1 https://$HOST:$PORT/$SIZE"
["msquic"]="msquic/build/bin/Release/quicinterop -test:D -custom:$HOST -port:$PORT -urls:https://$HOST:$PORT/$SIZE"
["google"]="google-quiche/bazel-bin/quiche/quic_client --disable_certificate_verification https://$HOST:$PORT/$SIZE"
["quiche"]="quiche/target/release/quiche-client _disk --no-verify https://$HOST:$PORT/$SIZE"
["s2n"]="s2n-quic/target/release/s2n-quic-qns interop client --tls rustls --disable-cert-verification _disk --local-ip $HOST https://$HOST:$PORT/$SIZE"
)
declare -A server_cmd=(
["neqo"]="binaries/neqo/neqo-server _cc _pacing _flags -Q 1 $HOST:$PORT"
["msquic"]="msquic/build/bin/Release/quicinteropserver -root:$TMP -listen:$HOST -port:$PORT -file:$TMP/cert -key:$TMP/key -noexit"
["google"]="google-quiche/bazel-bin/quiche/quic_server --generate_dynamic_responses --port $PORT --certificate_file $TMP/cert --key_file $TMP/key"
["quiche"]="quiche/target/release/quiche-server --root $TMP --listen $HOST:$PORT --cert $TMP/cert --key $TMP/key"
["s2n"]="s2n-quic/target/release/s2n-quic-qns interop server --www-dir $TMP --certificate $TMP/cert --private-key $TMP/key --ip $HOST --port $PORT"
)
# Flags to append to client_cmd to make the client dump the retrieved file to disk.
declare -A disk_flags=(
["neqo"]="--output-dir ."
["msquic"]="" # msquic always dumps to disk :-(
["google"]="" # needs special handling below
["quiche"]="--dump-responses ."
["s2n"]="--download-dir ."
)
# Flags to pass to neqo when it runs against another implementation.
declare -A neqo_flags=(
["neqo"]=""
["msquic"]="-a hq-interop"
["google"]=""
["quiche"]=""
["s2n"]="-a hq-interop"
)
# Replace various placeholders in the commands with the actual values.
# Also generate an extension to append to the file name.
function transmogrify {
CMD=$1
local cc=$2
local pacing=$3
local flags=$4
local disk=$5
if [[ "$cc" != "" ]]; then
CMD=${CMD//_cc/--cc $cc}
EXT="-$cc"
else
CMD=${CMD//_cc/}
fi
if [[ "$pacing" == "on" ]]; then
CMD=${CMD//_pacing/}
else
CMD=${CMD//_pacing/--no-pacing}
EXT="$EXT-nopacing"
fi
CMD=${CMD//_flags/$flags}
CMD=${CMD//_disk/$disk}
}
# A Welch's t-test to determine if a performance change is statistically significant.
# We use this later to highlight significant changes in the results.
cat <<EOF > welch.R
args <- commandArgs(trailingOnly = TRUE)
baseline <- scan(args[1], what = numeric())
result <- scan(args[2], what = numeric())
t_result <- t.test(baseline, result, alternative = "two.sided")
p_value <- t_result\$p.value
alpha <- 0.05
quit(status = as.integer(p_value < alpha))
EOF
# See https://github.com/microsoft/msquic/issues/4618#issuecomment-2422611592
sudo ip link set dev lo mtu "$MTU"
for server in "${!server_cmd[@]}"; do
cp "$(echo "${server_cmd[$server]}" | cut -f1 -d' ')" binaries/
for client in "${!client_cmd[@]}"; do
# Do not run msquic and s2n-quic against google-quiche and quiche; the latter only support H3.
if [[ "$client" != "$server" && "$client" != "neqo" && "$server" != "neqo" ]]; then
echo "Skipping $client vs. $server"
continue
fi
echo "*** Running $client vs. $server"
cp "$(echo "${client_cmd[$client]}" | cut -f1 -d' ')" binaries/
# google-quiche binaries don't have write permissions by default?!
chmod u+w binaries/*
# google and msquic don't let us configure the congestion control or pacing.
# TODO: Support quiche and s2n-quic here.
if [[ "$client" != "neqo" && "$server" == "neqo" || "$client" == "neqo" && "$server" != "neqo" ]]; then
cc_opt=("cubic")
pacing_opt=("on")
elif [[ "$client" == "neqo" && "$server" == "neqo" ]]; then
cc_opt=("reno" "cubic")
pacing_opt=("on" "")
else
cc_opt=("")
pacing_opt=("")
fi
for cc in "${cc_opt[@]}"; do
for pacing in "${pacing_opt[@]}"; do
echo "*** Running size verification test for $client vs. $server"
transmogrify "${server_cmd[$server]}" "$cc" "$pacing" "${neqo_flags[$client]}" ""
FILENAME="$client-$server$EXT"
SERVER_CMD="$CMD"
# pkill only matches on the first 15 characters of the command?!
SERVER_TAG="$(basename "$(echo "${server_cmd[$server]}" | cut -f1 -d' ')" | cut -c1-15)"
transmogrify "${client_cmd[$client]}" "$cc" "$pacing" "${neqo_flags[$server]}" "${disk_flags[$client]}"
cd "$TMP/out"
# Do a single test run where we check the size of the file that is downloaded.
# This is to ensure that the server actually served the file, and that the client
# downloaded it. We're not doing this during hyperfine or perf runs, because the
# writing-to-disk incurrs overheads we're not interested in.
# shellcheck disable=SC2086
$WORKSPACE/$SERVER_CMD &
sleep 0.2
if [ "$client" == "google" ]; then
# shellcheck disable=SC2086
$WORKSPACE/$CMD > "$SIZE" 2>&1
else
# shellcheck disable=SC2086
$WORKSPACE/$CMD > /dev/null 2>&1
fi
pkill "$SERVER_TAG"
pgrep "$SERVER_TAG" || true
# Sanity check the size of the last retrieved file.
# google/quiche outputs the HTTP header, too, so we can't just check for -eq.
FILE_SIZE=$(wc -c <"$TMP/out/$SIZE")
if [ "$FILE_SIZE" -lt "$SIZE" ]; then
echo "Error: File size mismatch for $client vs. $server, $SIZE expected, got $FILE_SIZE"
exit 1
fi
# Don't write to disk following this.
transmogrify "${client_cmd[$client]}" "$cc" "$pacing" "${neqo_flags[$server]}" ""
# Make a tag string for this test.
COMMAND="$client vs. $server${cc:+ (}$cc${pacing:+, paced}${cc:+)}"
if [[ "$client" == "neqo" || "$server" == "neqo" ]]; then
echo "*** Running main-branch hyperfine tests for $client vs. $server"
nice -n -20 setarch --addr-no-randomize hyperfine \
--command-name "$COMMAND" --time-unit millisecond \
--export-json "$WORKSPACE/hyperfine-main/$FILENAME.json" \
--output null --warmup 5 --min-runs "$RUNS" \
--prepare "$WORKSPACE/${SERVER_CMD/neqo/neqo-main} & echo \$! >> /cpusets/cpu2/tasks; sleep 0.2" \
--conclude "pkill $SERVER_TAG" \
"echo \$\$ >> /cpusets/cpu3/tasks; $WORKSPACE/${CMD/neqo/neqo-main}"
fi
# Now do the actual hyperfine runs.
echo "*** Running PR-branch hyperfine tests for $client vs. $server"
nice -n -20 setarch --addr-no-randomize hyperfine \
--command-name "$COMMAND" --time-unit millisecond \
--export-json "$WORKSPACE/hyperfine/$FILENAME.json" \
--export-markdown "$WORKSPACE/hyperfine/$FILENAME.md" \
--output null --warmup 5 --min-runs "$RUNS" \
--prepare "$WORKSPACE/$SERVER_CMD & echo \$! >> /cpusets/cpu2/tasks; sleep 0.2" \
--conclude "pkill $SERVER_TAG" \
"echo \$\$ >> /cpusets/cpu3/tasks; $WORKSPACE/$CMD"
# Do a longer run with perf separately. We used to just wrap the hyperfine command above in perf,
# but that uses different processes for the individual runs, and there is apparently no way to merge
# the perf profiles of those different runs.
echo "*** Running PR-branch perf tests for $client vs. $server"
pgrep "$SERVER_TAG" || true
# shellcheck disable=SC2086
nice -n -20 setarch --addr-no-randomize cset proc --set=cpu2 --exec \
perf -- $PERF_OPT -o "$WORKSPACE/$FILENAME.server.perf" $WORKSPACE/$SERVER_CMD &
sleep 0.2
CMD=${CMD//$SIZE/$BIGSIZE}
# shellcheck disable=SC2086
nice -n -20 setarch --addr-no-randomize cset proc --set=cpu3 --exec \
perf -- $PERF_OPT -o "$WORKSPACE/$FILENAME.client.perf" $WORKSPACE/$CMD > /dev/null 2>&1
pkill "$SERVER_TAG"
cd "$WORKSPACE"
echo "*** Preparing results report for $client vs. $server"
if [[ "$client" == "neqo" && "$server" == "neqo" && "$cc" == "cubic" && "$pacing" == "on" ||
"$client" == "$server" && "$client" != "neqo" && "$server" != "neqo" ]]; then
BOLD="**"
else
BOLD=""
fi
grep -Ev '^\|(:| Command)' < "hyperfine/$FILENAME.md" | \
sed -E -e 's/`//g;' -e "s/(^\| )([^\|]*) \|/\1$BOLD\2$BOLD \|/" | \
cut -f1-5 -d\| | tr -d '\n' >> steps.md
# Compute throughput in MiB/s.
RESULT="hyperfine/$FILENAME.json"
MEAN=$(jq -r '.results[0].mean' "$RESULT")
RANGE=$(grep -Po '^.*± *\K.*' < "hyperfine/$FILENAME.md" | cut -d' ' -f1)
MIBS=$(bc -l <<< "($SIZE / 1048576) / $MEAN")
MIBS_RANGE=$(bc -l <<< "($SIZE / 1048576) / $RANGE")
printf "| %.1f ± %.1f " "$MIBS" "$MIBS_RANGE" >> steps.md
BASELINE="hyperfine-main/$FILENAME.json"
# Figure out if any performance difference to `main` is statistically relevant, and indicate that.
if [ -e "$BASELINE" ]; then
BASELINE_MEAN=$(jq -r '.results[0].mean' "$BASELINE")
# Even though we tell hyperfine to use milliseconds, it still outputs in seconds when dumping to JSON.
DELTA=$(bc -l <<< "($MEAN - $BASELINE_MEAN) * 1000")
PERCENT=$(bc -l <<< "(($MEAN - $BASELINE_MEAN) / $BASELINE_MEAN) * 100")
# If a performance change is statistically significant, highlight it.
jq -r '.results[0].times[]' "$BASELINE" > baseline.txt
jq -r '.results[0].times[]' "$RESULT" > result.txt
if ! Rscript welch.R baseline.txt result.txt 2> /dev/null; then
if (( $(bc -l <<< "$DELTA > 0") )); then
echo "Performance has regressed: $BASELINE_MEAN -> $MEAN"
SYMBOL=":broken_heart:"
FORMAT='**'
else
echo "Performance has improved: $BASELINE_MEAN -> $MEAN"
SYMBOL=":green_heart:"
FORMAT='**'
fi
else
echo "No statistically significant change: $BASELINE_MEAN -> $MEAN"
SYMBOL=""
FORMAT=""
fi
rm result.txt baseline.txt
printf "| %s %s%.1f%s | %s%.1f%%%s |\n" "$SYMBOL" "$FORMAT" "$DELTA" "$FORMAT" "$FORMAT" "$PERCENT" "$FORMAT" >> steps.md
elif [ "$client" == "neqo" ] || [ "$server" == "neqo" ]; then
echo No cached baseline from main found.
echo '| :question: | :question: |' >> steps.md
else
echo '| | |' >> steps.md
fi
done
done
done
done
# Make a single results table.
{
echo "Transfer of $SIZE bytes over loopback, min. $RUNS runs. All unit-less numbers are in milliseconds."
echo
# shellcheck disable=SC2016
echo '| Client vs. server (params) | Mean ± σ | Min | Max | MiB/s ± σ | Δ `main` | Δ `main` |'
echo '|:---|---:|---:|---:|---:|---:|---:|'
sed -E -e 's/^\| \*\*/\| /' steps.md | sort | sed -E -e 's/^\| ([^\|]+\*\* \|)/\| \*\*\1/'
} > comparison.md
rm -r "$TMP"
# Re-enable turboboost, hyperthreading and use powersave governor. Remove all CPU sets.
- name: Restore machine
if: success() || failure() || cancelled()
run: |
sudo /root/bin/unprep.sh
# In case the previous test failed:
sudo ip link set dev lo mtu 65536
- name: Post-process perf data
run: |
for f in *.perf; do
# Convert for profiler.firefox.com
samply import "$f" -o "$f.samply.json.gz" --save-only --presymbolicate
# Generate flamegraphs
flamegraph --perfdata "$f" --palette rust -o "${f//.perf/.svg}"
done
- name: Format results as Markdown
id: results
env:
EVENT_PATH: ${{ github.event_path }}
TESTBED: ${{ inputs.bencher == 'gcp' && 'GCP' || 'On-prem' }}
run: |
SHA=$(cd neqo && git log origin/main -1 --format=%H)
echo "$SHA" > main-sha.txt
{
echo "### Client/server transfer results"
echo
echo "Performance differences relative to $SHA."
echo
cat comparison.md
} >> results.md
cat results.md > "$GITHUB_STEP_SUMMARY"
echo "$TESTBED" > testbed.txt
cp "$EVENT_PATH" event.json
- name: Export profiler.firefox.com data
id: export_samply
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: ${{ github.event.repository.name }}-${{ github.event.pull_request.head.sha }}-perfcompare-samply
path: |
*.samply.json.gz
binaries
compression-level: 9
- name: Export performance data
id: export_perf
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: ${{ github.event.repository.name }}-${{ github.event.pull_request.head.sha }}-perfcompare-perf
path: |
*.svg
*.txt
*.md
event.json
results.*
results-main.*
hyperfine
hyperfine-main
compression-level: 9
- name: Export PR comment data
uses: ./neqo/.github/actions/pr-comment-data-export
with:
name: ${{ github.workflow }}
contents: results.md
log-md: ${{ format('[Download data for `profiler.firefox.com`]({0}) or [download performance comparison data]({1}).', steps.export_samply.outputs.artifact-url, steps.export_perf.outputs.artifact-url) }}
- name: Fail on regression
# Don't check for regressions when running on main.
if: github.ref != 'refs/heads/main' && github.event.merge_group.base_ref != 'refs/heads/main'
run: |
if grep -q "Performance has regressed." results.txt; then
echo "Performance regression detected."
exit 1
else
echo "No performance regression detected."
fi
- name: Remove benchmark artifacts
if: always()
run: |
rm -- * || true
rm -r -- binaries comment-data hyperfine hyperfine-main "$COMPARE_TMP" || true