diff --git a/.github/workflows/benchmark_cargo_cmp.yml b/.github/workflows/benchmark_cargo_cmp.yml index 5bbe2ad422..ba532209f0 100644 --- a/.github/workflows/benchmark_cargo_cmp.yml +++ b/.github/workflows/benchmark_cargo_cmp.yml @@ -87,7 +87,6 @@ jobs: run: "./scripts/bin/infra perf cargo ${{ github.event_name == 'pull_request' && '--pr-benchmark' || (inputs.dryRun == true && '--dry-run' || '') }} comparison" env: BENCHER_API_TOKEN: "${{ secrets.BENCHER_API_TOKEN }}" - BENCHER_PR_START_POINT_HASH: "${{ github.event.pull_request.base.sha }}" BENCHER_PR_HEAD_HASH: "${{ github.event.pull_request.head.sha }}" GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/benchmark_cargo_slang.yml b/.github/workflows/benchmark_cargo_slang.yml index 9282f54ddc..dfd950d893 100644 --- a/.github/workflows/benchmark_cargo_slang.yml +++ b/.github/workflows/benchmark_cargo_slang.yml @@ -87,7 +87,6 @@ jobs: run: "./scripts/bin/infra perf cargo ${{ github.event_name == 'pull_request' && '--pr-benchmark' || (inputs.dryRun == true && '--dry-run' || '') }} slang" env: BENCHER_API_TOKEN: "${{ secrets.BENCHER_API_TOKEN }}" - BENCHER_PR_START_POINT_HASH: "${{ github.event.pull_request.base.sha }}" BENCHER_PR_HEAD_HASH: "${{ github.event.pull_request.head.sha }}" GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/benchmark_cargo_slang_v2.yml b/.github/workflows/benchmark_cargo_slang_v2.yml index a5d1974b37..e5889de1b5 100644 --- a/.github/workflows/benchmark_cargo_slang_v2.yml +++ b/.github/workflows/benchmark_cargo_slang_v2.yml @@ -20,13 +20,12 @@ on: branches: - "main" - # Run on pull requests (smoke test always, full benchmark with 'ci:perf' label): + # Run on every pull request (full benchmark, non-blocking): pull_request: types: - "opened" - "synchronize" - "reopened" - - "labeled" permissions: {} @@ -78,21 +77,14 @@ jobs: echo "SLANG_BENCHER_PROJECT=${{ inputs.bencherProject }}" >> $GITHUB_ENV if: "${{ inputs.bencherProject }}" - - name: "Smoke Test > infra perf cargo --smoke slang-v2" - if: "${{ github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'ci:perf') }}" - run: "./scripts/bin/infra perf cargo --smoke slang-v2" - - name: "Benchmark > infra perf cargo slang-v2" - if: "${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:perf') }}" run: "./scripts/bin/infra perf cargo ${{ github.event_name == 'pull_request' && '--pr-benchmark' || (inputs.dryRun == true && '--dry-run' || '') }} slang-v2" env: BENCHER_API_TOKEN: "${{ secrets.BENCHER_API_TOKEN }}" - BENCHER_PR_START_POINT_HASH: "${{ github.event.pull_request.base.sha }}" BENCHER_PR_HEAD_HASH: "${{ github.event.pull_request.head.sha }}" GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" - name: "Benchmark > Upload Benchmarking Data" - if: "${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:perf') }}" uses: "actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a" # v7.0.1 with: name: "benchmarking-data" diff --git a/.github/workflows/benchmark_npm.yml b/.github/workflows/benchmark_npm.yml index d93f3ab4ab..9293d0e5f6 100644 --- a/.github/workflows/benchmark_npm.yml +++ b/.github/workflows/benchmark_npm.yml @@ -89,6 +89,5 @@ jobs: run: "./scripts/bin/infra perf npm ${{ github.event_name == 'pull_request' && '--pr-benchmark' || (inputs.dryRun == true && '--dry-run' || '') }}" env: BENCHER_API_TOKEN: "${{ secrets.BENCHER_API_TOKEN }}" - BENCHER_PR_START_POINT_HASH: "${{ github.event.pull_request.base.sha }}" BENCHER_PR_HEAD_HASH: "${{ github.event.pull_request.head.sha }}" GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/crates/infra/cli/src/commands/perf/cargo/mod.rs b/crates/infra/cli/src/commands/perf/cargo/mod.rs index 5bc69c7cf3..0631701911 100644 --- a/crates/infra/cli/src/commands/perf/cargo/mod.rs +++ b/crates/infra/cli/src/commands/perf/cargo/mod.rs @@ -151,8 +151,8 @@ impl CargoController { // 1% threshold: iai-callgrind uses deterministic hardware counters (not wall clock), // so any change reflects a real code change, not noise. - // We also keep the window small (only 2 measurements), for the same reason. - let threshold = |measure| BencherThreshold::new(measure, "0.01").with_max_sample_size("2"); + // We also keep the window small (only 1 measurement), for the same reason. + let threshold = |measure| BencherThreshold::new(measure, "0.01").with_max_sample_size("1"); // We don't add thresholds for l1-hits, l2-hits, and ram-hits, since there's not a simple // rule that could catch all cases (ie more l1-hits is better if total bytes read remains the same, diff --git a/crates/infra/cli/src/toolchains/bencher/mod.rs b/crates/infra/cli/src/toolchains/bencher/mod.rs index 215be54f40..aa715ea212 100644 --- a/crates/infra/cli/src/toolchains/bencher/mod.rs +++ b/crates/infra/cli/src/toolchains/bencher/mod.rs @@ -80,15 +80,12 @@ pub(crate) fn run_bench( .expect("GITHUB_HEAD_REF must be set for --pr-benchmark (are you running in a PR?)"); let base_ref = std::env::var("GITHUB_BASE_REF") .expect("GITHUB_BASE_REF must be set for --pr-benchmark (are you running in a PR?)"); - let start_point_hash = std::env::var("BENCHER_PR_START_POINT_HASH") - .expect("BENCHER_PR_START_POINT_HASH must be set for --pr-benchmark"); let head_hash = std::env::var("BENCHER_PR_HEAD_HASH") .expect("BENCHER_PR_HEAD_HASH must be set for --pr-benchmark"); command = command .property("--branch", &head_ref) .property("--start-point", &base_ref) - .property("--start-point-hash", &start_point_hash) .flag("--start-point-reset") .property("--hash", &head_hash);