diff --git a/.github/workflows/build-solutions.yml b/.github/workflows/build-solutions.yml index ea41dd7aba1..c0829a4a87f 100644 --- a/.github/workflows/build-solutions.yml +++ b/.github/workflows/build-solutions.yml @@ -73,5 +73,11 @@ jobs: dotnet restore src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj --locked-mode dotnet restore src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj -r ${{ matrix.target.rid }} -p:PublishReadyToRun=true + - name: Decompress PGO callchain profile + run: | + gz="src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz" + json="src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json" + if [ -f "$gz" ] && [ ! -f "$json" ]; then gzip -d -k "$gz"; fi + - name: Publish Nethermind.Runner for ${{ matrix.target.rid }} run: dotnet publish src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj -c release -r ${{ matrix.target.rid }} --no-restore --no-self-contained diff --git a/.github/workflows/clean-expb-runner.yml b/.github/workflows/clean-expb-runner.yml new file mode 100644 index 00000000000..94dc1a21b33 --- /dev/null +++ b/.github/workflows/clean-expb-runner.yml @@ -0,0 +1,53 @@ +name: Clean up EXPB runner + +on: + workflow_dispatch: + +permissions: {} + +jobs: + cleanup: + name: Clean up stale EXPB resources + runs-on: [self-hosted, reproducible-benchmarks] + timeout-minutes: 10 + steps: + - name: Force clean all EXPB resources + shell: bash + run: | + set -euo pipefail + echo "=== Disk usage before cleanup ===" + df -h / /mnt/sda 2>/dev/null | tail -2 + + echo "=== Killing all EXPB containers ===" + timeout 30 bash -c 'docker ps -aq --filter "name=expb-executor" 2>/dev/null | xargs -r docker rm -f 2>/dev/null' || true + timeout 30 bash -c 'docker ps -aq --filter "ancestor=nethermindeth/nethermind" 2>/dev/null | xargs -r docker rm -f 2>/dev/null' || true + + echo "=== Removing EXPB networks ===" + docker network ls --filter "name=expb-executor" -q 2>/dev/null | xargs -r docker network rm 2>/dev/null || true + + echo "=== Removing EXPB volumes ===" + docker volume ls -q --filter "name=expb-executor" 2>/dev/null | xargs -r docker volume rm 2>/dev/null || true + + echo "=== Unmounting stale overlays ===" + mount | grep "expb-data/work" | awk '{print $3}' | xargs -r sudo umount -l 2>/dev/null || true + sudo rm -rf /mnt/sda/expb-data/work/upper /mnt/sda/expb-data/work/work /mnt/sda/expb-data/work/merged 2>/dev/null || true + + echo "=== Removing EXPB lock file ===" + rm -f /tmp/expb.lock + + echo "=== Pruning stopped containers ===" + docker container prune -f 2>/dev/null || true + + echo "=== Removing old PGO/EXPB images ===" + docker images --format '{{.Repository}}:{{.Tag}} {{.ID}}' 2>/dev/null | \ + grep -E "nethermindeth/nethermind:pgo-|grafana/alloy|grafana/k6" | \ + awk '{print $2}' | sort -u | \ + xargs -r docker rmi -f 2>/dev/null || true + docker image prune -f 2>/dev/null || true + docker builder prune -f --max-storage 4G 2>/dev/null || true + + echo "=== Cleaning old EXPB output directories (keep last 3) ===" + ls -dt /mnt/sda/expb-data/outputs/expb-executor-* 2>/dev/null | tail -n +4 | xargs -r sudo rm -rf 2>/dev/null || true + + echo "=== Disk usage after cleanup ===" + df -h / /mnt/sda 2>/dev/null | tail -2 diff --git a/.github/workflows/collect-pgo-profile.yml b/.github/workflows/collect-pgo-profile.yml index b618e480a47..0ba2fec300c 100644 --- a/.github/workflows/collect-pgo-profile.yml +++ b/.github/workflows/collect-pgo-profile.yml @@ -53,68 +53,88 @@ jobs: inputs: '{ "image-name": "nethermind", "tag": "${{ env.PGO_TAG }}", - "dockerfile": "Dockerfile.pgo", + "dockerfile": "tools/PgoTrim/Dockerfile.pgo", "build-config": "release" }' - - name: Wait for publish-docker.yml to complete + - name: Trigger publish-docker.yml with Dockerfile.pgo-sampling + uses: benc-uk/workflow-dispatch@v1 + with: + workflow: publish-docker.yml + ref: ${{ github.ref }} + token: ${{ github.token }} + inputs: '{ + "image-name": "nethermind", + "tag": "${{ env.PGO_TAG }}-sampling", + "dockerfile": "tools/PgoTrim/Dockerfile.pgo-sampling", + "build-config": "release" + }' + + - name: Wait for publish-docker.yml builds to complete env: GITHUB_TOKEN: ${{ github.token }} shell: bash run: | set -euo pipefail - echo "Waiting for publish-docker.yml to start and complete..." + echo "Waiting for publish-docker.yml runs to start and complete..." max_discovery=300 poll=15 max_completion=7200 - elapsed=0 - run_id="" - - # Phase 1: discover the run (filter by head_sha to avoid matching unrelated runs) head_sha="${{ github.sha }}" - while [[ -z "${run_id}" ]]; do - response=$(gh api "repos/${{ github.repository }}/actions/workflows/publish-docker.yml/runs?per_page=5&branch=${{ github.ref_name }}&head_sha=${head_sha}" 2>/dev/null || echo '{"workflow_runs":[]}') - run_id=$(echo "${response}" | jq -r '.workflow_runs[0].id // empty') - if [[ -n "${run_id}" ]]; then - echo "Found publish-docker run: ${run_id}" + # Phase 1: discover both runs (two dispatches for pgo + pgo-sampling) + elapsed=0 + run_ids=() + while [[ "${#run_ids[@]}" -lt 2 ]]; do + response=$(gh api "repos/${{ github.repository }}/actions/workflows/publish-docker.yml/runs?per_page=10&branch=${{ github.ref_name }}&head_sha=${head_sha}" 2>/dev/null || echo '{"workflow_runs":[]}') + mapfile -t run_ids < <(echo "${response}" | jq -r '.workflow_runs[].id // empty' | head -2) + + if [[ "${#run_ids[@]}" -ge 2 ]]; then + echo "Found ${#run_ids[@]} publish-docker runs: ${run_ids[*]}" break fi elapsed=$((elapsed + poll)) if [[ "${elapsed}" -ge "${max_discovery}" ]]; then - echo "No publish-docker.yml run found within ${max_discovery}s." - exit 1 + echo "Found ${#run_ids[@]}/2 publish-docker.yml runs within ${max_discovery}s." + if [[ "${#run_ids[@]}" -eq 0 ]]; then + exit 1 + fi + echo "Proceeding with ${#run_ids[@]} run(s)." + break fi - echo "Waiting for run to appear... (${elapsed}/${max_discovery}s)" + echo "Waiting for runs to appear... (${#run_ids[@]}/2 found, ${elapsed}/${max_discovery}s)" sleep "${poll}" done - # Phase 2: wait for completion - elapsed=0 - while true; do - run_data=$(gh api "repos/${{ github.repository }}/actions/runs/${run_id}" 2>/dev/null || echo '{}') - status=$(echo "${run_data}" | jq -r '.status // "unknown"') - conclusion=$(echo "${run_data}" | jq -r '.conclusion // empty') - - if [[ "${status}" == "completed" ]]; then - if [[ "${conclusion}" == "success" ]]; then - echo "publish-docker.yml completed successfully." - exit 0 - else - echo "publish-docker.yml finished with conclusion: ${conclusion}" - exit 1 + # Phase 2: wait for all discovered runs to complete + for run_id in "${run_ids[@]}"; do + echo "Waiting for run ${run_id}..." + elapsed=0 + while true; do + run_data=$(gh api "repos/${{ github.repository }}/actions/runs/${run_id}" 2>/dev/null || echo '{}') + status=$(echo "${run_data}" | jq -r '.status // "unknown"') + conclusion=$(echo "${run_data}" | jq -r '.conclusion // empty') + + if [[ "${status}" == "completed" ]]; then + if [[ "${conclusion}" == "success" ]]; then + echo "Run ${run_id} completed successfully." + break + else + echo "Run ${run_id} finished with conclusion: ${conclusion}" + exit 1 + fi fi - fi - elapsed=$((elapsed + poll)) - if [[ "${elapsed}" -ge "${max_completion}" ]]; then - echo "Timeout waiting for publish-docker.yml (${max_completion}s)." - exit 1 - fi - echo "Status: ${status} — waiting... (${elapsed}/${max_completion}s)" - sleep "${poll}" + elapsed=$((elapsed + poll)) + if [[ "${elapsed}" -ge "${max_completion}" ]]; then + echo "Timeout waiting for run ${run_id} (${max_completion}s)." + exit 1 + fi + echo "Run ${run_id}: ${status} — waiting... (${elapsed}/${max_completion}s)" + sleep "${poll}" + done done collect: @@ -123,6 +143,15 @@ jobs: runs-on: [self-hosted, reproducible-benchmarks] timeout-minutes: 720 steps: + - name: Check EXPB lock + shell: bash + run: | + if [[ -f /tmp/expb.lock ]]; then + echo "EXPB lock file exists — another run is active or a previous run was cancelled." + echo "Run the 'Clean up EXPB runner' workflow to force-clear if stuck." + exit 1 + fi + - name: Check out repository uses: actions/checkout@v6 @@ -166,6 +195,19 @@ jobs: a\ bind: /nethermind/pgo a\ mode: rw }' "${rendered}" + # Patch config for PGO collection: + # - 10000 blocks for broader method coverage + # - No TC delay override — Tier-1 recompilation produces richer edge/block + # counts in the .jit file (TC delay is only needed for the sampling pass + # where perf map validity matters) + python3 -c " + import yaml, sys + p = sys.argv[1] + c = yaml.safe_load(open(p)) + for s in c.get('scenarios', {}).values(): + s['amount'] = 10000 + yaml.safe_dump(c, open(p, 'w'), default_flow_style=False, sort_keys=False) + " "${rendered}" echo "rendered_config_file=${rendered}" >> "${GITHUB_OUTPUT}" echo "Rendered config:" cat "${rendered}" @@ -178,12 +220,11 @@ jobs: echo "uv is required but not found in PATH." exit 1 fi - uv tool install --force --from "git+https://github.com/NethermindEth/execution-payloads-benchmarks@56f83b112a93436a66468b863ed9e47bcd5feba6" expb + uv tool install --force --from "git+https://github.com/NethermindEth/execution-payloads-benchmarks@feat/security-opt-support" expb echo "$(uv tool dir --bin)" >> "${GITHUB_PATH}" - name: Run EXPB for PGO collection id: run-expb - continue-on-error: true shell: bash working-directory: ${{ env.EXPB_DATA_DIR }} run: | @@ -217,10 +258,10 @@ jobs: exit "${expb_exit}" - name: Extract PGO data from EXPB output - if: always() shell: bash run: | set -euo pipefail + rm -rf "${RUNNER_TEMP}/pgo-data" mkdir -p "${RUNNER_TEMP}/pgo-data" # EXPB stores extra_volumes under {outputs_dir}/{scenario}/volumes/{name}/ @@ -250,14 +291,163 @@ jobs: echo "=== .jit files (edge/block profiling data) ===" find "${RUNNER_TEMP}/pgo-data" -name "*.jit" -type f -ls 2>/dev/null || true + # Fail if no trace data was collected — no point running subsequent steps + if ! find "${RUNNER_TEMP}/pgo-data" -name "*.nettrace" -type f 2>/dev/null | grep -q .; then + echo "ERROR: No .nettrace files found — PGO collection failed" + exit 1 + fi + + - name: Run CPU sampling pass + shell: bash + env: + CONFIG_FILE: ${{ steps.resolve.outputs.config_file }} + run: | + set -euo pipefail + sampling_tag="${{ env.PGO_TAG }}-sampling" + + # Allow perf_event_open inside containers (host-level sysctl) + sudo sysctl -w kernel.perf_event_paranoid=-1 || true + + # Clean up this run's containers/networks/volumes from the main PGO collection step + timeout 30 bash -c 'docker ps -aq --filter "name=expb-executor-nethermind-pgo-collect" 2>/dev/null | xargs -r docker rm -f 2>/dev/null' || true + timeout 10 bash -c 'docker network ls --filter "name=expb-executor-nethermind-pgo-collect" -q 2>/dev/null | xargs -r docker network rm 2>/dev/null' || true + docker volume ls -q --filter "name=expb-executor-nethermind-pgo-collect" 2>/dev/null | xargs -r docker volume rm 2>/dev/null || true + + echo "Rendering sampling config from base config..." + sampling_config="${RUNNER_TEMP}/rendered-sampling-config.yaml" + # Render directly from the base config — don't depend on RUNNER_TEMP files + # from earlier steps which may be cleaned up + sed \ + -e "s#<>#${sampling_tag}#g" \ + -e 's#<>#0#g' \ + -e 's#^\([[:space:]]*\)nethermind:#\1nethermind-pgo-sampling:#g' \ + "${EXPB_DATA_DIR}/${CONFIG_FILE}" \ + > "${sampling_config}" + # Inject pgo volume (same as main config) + sed -i '/^[[:space:]]*mode: r$/{ + a\ pgo: + a\ bind: /nethermind/pgo + a\ mode: rw + }' "${sampling_config}" + # Patch sampling config for SPGO: + # - seccomp=unconfined for perf_event_open + # - TC_CallCountingDelayMs=900000 in Dockerfile prevents Tier-1 recompilation + # during sampling (recompiled methods get new addresses not in perf map) + # - COLLECTSEC=580 for perfcollect — must finish before container stops + # - amount=10000 (2x default) for more representative CPU sampling + python3 -c " + import yaml, sys + p = sys.argv[1] + c = yaml.safe_load(open(p)) + for s in c.get('scenarios', {}).values(): + s['security_opt'] = ['seccomp=unconfined'] + s['amount'] = 10000 + env = s.get('extra_env', {}) or {} + env['COLLECTSEC'] = '580' + s['extra_env'] = env + yaml.safe_dump(c, open(p, 'w'), default_flow_style=False, sort_keys=False) + " "${sampling_config}" + echo "Rendered sampling config:" + cat "${sampling_config}" + + echo "Running CPU sampling pass via EXPB..." + expb execute-scenarios \ + --config-file "${sampling_config}" \ + --per-payload-metrics \ + --print-logs \ + 2>&1 | tee "${RUNNER_TEMP}/expb-sampling-run.log" + + # Extract sampling trace (perfcollect produces sampling.trace.zip) + expb_outputs="${EXPB_DATA_DIR}/outputs" + sampling_vol=$(find "${expb_outputs}" -maxdepth 3 -type d -name "pgo" -path "*/volumes/pgo" \ + -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2- || true) + if [[ -n "${sampling_vol}" ]]; then + echo "Sampling volume contents:" + ls -la "${sampling_vol}/" 2>/dev/null || true + # Copy perfcollect.log for debugging + cp "${sampling_vol}/perfcollect.log" "${RUNNER_TEMP}/pgo-data/" 2>/dev/null || true + # perfcollect produces sampling.trace.zip (perf + LTTng bundle) + sampling_file=$(find "${sampling_vol}" -name "sampling.trace.zip" -type f | head -1 || true) + if [[ -n "${sampling_file}" ]]; then + # Repack zip with paths relative to the trace root. + # perfcollect inside Docker produces paths like nethermind/pgo/sampling.trace/... + # but CtfTraceEventSource expects lttngTrace/... and perf.data.txt at the root. + repack_dir="${RUNNER_TEMP}/sampling-repack" + mkdir -p "${repack_dir}" + cd "${repack_dir}" + unzip -q "${sampling_file}" + trace_root=$(find . -name "lttngTrace" -type d | head -1 | xargs dirname) + if [[ -n "${trace_root}" && "${trace_root}" != "." ]]; then + echo "Repacking zip: stripping prefix ${trace_root}/" + cd "${trace_root}" + fi + zip -qr "${RUNNER_TEMP}/pgo-data/sampling.trace.zip" . + cd "${RUNNER_TEMP}" + rm -rf "${repack_dir}" + fi + fi + + if [[ -f "${RUNNER_TEMP}/pgo-data/sampling.trace.zip" ]]; then + echo "CPU sampling trace collected:" + ls -lh "${RUNNER_TEMP}/pgo-data/sampling.trace.zip" + echo "Zip contents:" + unzip -l "${RUNNER_TEMP}/pgo-data/sampling.trace.zip" | head -20 + else + echo "No sampling trace found — call graph data will not be available." + fi + - name: Set up .NET - if: always() uses: actions/setup-dotnet@v5 with: cache: false + - name: Process traces with PgoTrim + id: pgotrim + shell: bash + run: | + set -euo pipefail + pgo_dir="${RUNNER_TEMP}/pgo-data" + + # Convert sampling .trace.zip to .etlx (injects MethodDetails CTF mapping) + sampling_trace="${pgo_dir}/sampling.trace.zip" + sampling_etlx="${RUNNER_TEMP}/sampling-converted.etlx" + if [[ -f "${sampling_trace}" ]]; then + echo "Converting sampling trace (injecting MethodDetails CTF mapping)..." + dotnet run --project "${GITHUB_WORKSPACE}/tools/PgoTrim" -c Release -- \ + convert-trace "${sampling_trace}" "${sampling_etlx}" || true + + # Extract perf sample IPs for SPGO — placed alongside .etlx so + # dotnet-pgo finds it via Path.ChangeExtension(etlx, ".spgo") + echo "Extracting perf sample IPs for SPGO..." + dotnet run --project "${GITHUB_WORKSPACE}/tools/PgoTrim" -c Release -- \ + extract-spgo "${sampling_trace}" "${sampling_etlx%.etlx}.spgo" || true + + # Generate CallChainProfile JSON for crossgen2 CallFrequency method layout + if [[ -f "${sampling_etlx}" ]]; then + echo "Generating call chain profile for crossgen2..." + callchain_json="${RUNNER_TEMP}/nethermind.callchain.json" + dotnet run --project "${GITHUB_WORKSPACE}/tools/PgoTrim" -c Release -- \ + generate-callchain "${sampling_etlx}" "${callchain_json}" || true + if [[ -s "${callchain_json}" ]]; then + gzip -k "${callchain_json}" + echo "callchain_gz=${callchain_json}.gz" >> "${GITHUB_OUTPUT}" + fi + fi + fi + + # Compress .jit edge/block profiling data for runtime PGO + jit_file=$(find "${pgo_dir}" -name "*.jit" -type f -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2- || true) + jit_gz="${RUNNER_TEMP}/nethermind.jit.gz" + if [[ -n "${jit_file}" ]]; then + echo "Compressing ${jit_file} with PgoTrim..." + dotnet run --project "${GITHUB_WORKSPACE}/tools/PgoTrim" -c Release -- \ + "${jit_file}" "${jit_gz}" --min-block 0 --min-edge 0 + echo "jit_gz=${jit_gz}" >> "${GITHUB_OUTPUT}" + else + echo "No .jit file found — skipping edge profiling." + fi + - name: Build dotnet-pgo from source - if: always() shell: bash run: | set -euo pipefail @@ -265,6 +455,53 @@ jobs: git clone --depth 1 --branch v10.0.5 \ https://github.com/dotnet/runtime.git "${RUNNER_TEMP}/dotnet-runtime" cd "${RUNNER_TEMP}/dotnet-runtime" + + # Build TraceEvent from our PerfView PR branch which has the MethodDetails + # CTF mapping fix. This lets dotnet-pgo read perfcollect .trace.zip directly + # (both LTTng CLR events AND perf CPU samples). + git clone --depth 1 --branch fix/add-methoddetails-ctf-mapping \ + https://github.com/benaadams/perfview.git "${RUNNER_TEMP}/perfview" + # Strip SupportFiles package ref and its file copy entries (private feed). + # Keep Dia2Lib.dll and TraceReloggerLib.dll as compile refs from the NuGet cache. + python3 -c " + import re, sys + p = sys.argv[1] + with open(p) as f: xml = f.read() + xml = re.sub(r'', '', xml) + xml = re.sub(r']*>.*?', '', xml, flags=re.DOTALL) + with open(p, 'w') as f: f.write(xml) + " "${RUNNER_TEMP}/perfview/src/TraceEvent/TraceEvent.csproj" + # Copy compile-time reference DLLs from the NuGet TraceEvent 3.1.30 package + # (Dia2Lib.dll and TraceReloggerLib.dll provide Windows COM interop types) + nuget_te=$(find ~/.nuget/packages/microsoft.diagnostics.tracing.traceevent/3.1.* -name "Dia2Lib.dll" -path "*/netstandard*" 2>/dev/null | head -1 | xargs dirname) + if [[ -n "${nuget_te}" ]]; then + cp "${nuget_te}/Dia2Lib.dll" "${nuget_te}/TraceReloggerLib.dll" \ + "${RUNNER_TEMP}/perfview/src/TraceEvent/" 2>/dev/null || true + # Add as compile references + sed -i '/<\/ItemGroup>/i\ Dia2Lib.dll<\/HintPath><\/Reference>\n TraceReloggerLib.dll<\/HintPath><\/Reference>' \ + "${RUNNER_TEMP}/perfview/src/TraceEvent/TraceEvent.csproj" + fi + # Replace NuGet TraceEvent reference with project reference to our patched source + pgo_csproj="src/coreclr/tools/dotnet-pgo/dotnet-pgo.csproj" + sed -i 's||\n |' "${pgo_csproj}" + # Patch dotnet-pgo with Nethermind extensions: + # - .spgo sample loading for SPGO block attribution + # - .callgraph loading for Pettis-Hansen method layout + # - Safe SmoothFlow wrapper for disconnected flow graph crash + # Copy our .cs file into the source tree (compiled as part of dotnet-pgo) + cp "${GITHUB_WORKSPACE}/tools/PgoTrim/dotnet-pgo-patches/NethermindPgoPatches.cs" \ + "src/coreclr/tools/dotnet-pgo/NethermindPgoPatches.cs" + pgo_program="src/coreclr/tools/dotnet-pgo/Program.cs" + # Insert calls to our patches at the right points: + # 1. Load .spgo samples before the "Samples outside managed code" print + sed -i '/PrintOutput(\$"Samples outside managed code: {correlator.SamplesOutsideManagedCode}");/i\ + NethermindPgoPatches.LoadSpgoSamples(etlFileName, correlator);' "${pgo_program}" + # 2. Load .callgraph after the SampledProfileTraceData loop (after "}" closing the GenerateCallGraph block) + # Insert right before the instrumentationDataByMethod dictionary creation + sed -i '/Dictionary instrumentationDataByMethod/i\ + NethermindPgoPatches.LoadCallGraph(etlFileName, GetMethodMemMap(), callGraph, exclusiveSamples);' "${pgo_program}" + # 3. Replace SmoothAllProfiles with safe version + sed -i 's|correlator.SmoothAllProfiles();|NethermindPgoPatches.SafeSmoothAllProfiles(correlator);|' "${pgo_program}" # Build only dotnet-pgo (--projects bypasses subset traversal, avoids ILCompiler) ./build.sh -c Release \ --projects "$(pwd)/src/coreclr/tools/dotnet-pgo/dotnet-pgo.csproj" @@ -288,7 +525,6 @@ jobs: - name: Convert traces to .mibc and merge with edge profiling id: convert - if: always() shell: bash run: | set -euo pipefail @@ -301,7 +537,7 @@ jobs: fi # Step 1: Create .mibc from EventPipe trace (method-level hot/cold) - trace_file=$(find "${pgo_dir}" -name "*.nettrace" -type f -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2- || true) + trace_file=$(find "${pgo_dir}" -name "*.nettrace" -not -name "sampling*" -type f -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2- || true) if [[ -z "${trace_file}" ]]; then echo "No .nettrace file found under: ${pgo_dir}" ls -laR "${pgo_dir}" @@ -330,41 +566,79 @@ jobs: fi dotnet-pgo create-mibc "${create_mibc_args[@]}" - cp "${trace_mibc}" "${mibc_file}" + # Step 1b: If CPU sampling trace exists, convert and merge for call graph data + sampling_trace="${pgo_dir}/sampling.trace.zip" + if [[ -f "${sampling_trace}" ]]; then + echo "Converting CPU sampling trace to .mibc..." + sampling_mibc="${RUNNER_TEMP}/sampling.mibc" + # Use PgoTrim-converted .etlx (has MethodDetails) with .spgo alongside + # (perf sample IPs that dotnet-pgo's patched SPGO code reads) + sampling_etlx="${RUNNER_TEMP}/sampling-converted.etlx" + sampling_input="${sampling_trace}" + if [[ -f "${sampling_etlx}" ]]; then + sampling_input="${sampling_etlx}" + echo "Using converted .etlx with .spgo samples" + fi + sampling_args=( + --trace "${sampling_input}" + --output "${sampling_mibc}" + ) + if [[ -d "${ref_dir}" ]]; then + for dll in "${ref_dir}"/*.dll; do + sampling_args+=(--reference "${dll}") + done + fi + dotnet-pgo create-mibc --spgo --spgo-min-samples 20 "${sampling_args[@]}" || true + + if [[ -f "${sampling_mibc}" ]]; then + main_size=$(stat -c%s "${trace_mibc}" 2>/dev/null || echo 0) + sampling_size=$(stat -c%s "${sampling_mibc}" 2>/dev/null || echo 0) + echo "Main .mibc: ${main_size} bytes, Sampling .mibc: ${sampling_size} bytes" + + echo "Merging main + sampling .mibc profiles..." + dotnet-pgo merge \ + --input "${trace_mibc}" \ + --input "${sampling_mibc}" \ + --output "${mibc_file}" + echo "Merged .mibc profile:" + ls -lh "${mibc_file}" + else + echo "Sampling .mibc creation failed — using main trace only." + cp "${trace_mibc}" "${mibc_file}" + fi + else + echo "No sampling trace — using main trace only." + cp "${trace_mibc}" "${mibc_file}" + fi - echo "Generated .mibc profile:" + echo "Final .mibc profile:" ls -lh "${mibc_file}" echo "mibc_file=${mibc_file}" >> "${GITHUB_OUTPUT}" - # Step 2: Trim .jit edge/block profiling data for runtime PGO - # .jit files (from DOTNET_WritePGOData) use a JIT-internal text format read - # back via DOTNET_ReadPGOData — they can't be merged into .mibc by dotnet-pgo. - jit_file=$(find "${pgo_dir}" -name "*.jit" -type f -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -1 | cut -d' ' -f2- || true) - jit_gz="${RUNNER_TEMP}/nethermind.jit.gz" - if [[ -n "${jit_file}" ]]; then - echo "Trimming ${jit_file} with PgoTrim..." - dotnet run --project "${GITHUB_WORKSPACE}/tools/PgoTrim" -c Release -- \ - "${jit_file}" "${jit_gz}" --min-block 100 --min-edge 250 - echo "jit_gz=${jit_gz}" >> "${GITHUB_OUTPUT}" - else - echo "No .jit file found — skipping edge profiling trim." - fi + # .jit compression is handled by the "Process traces with PgoTrim" step above - name: Upload .mibc artifact - if: always() uses: actions/upload-artifact@v7 with: name: nethermind-pgo-profile path: ${{ steps.convert.outputs.mibc_file }} - retention-days: 90 + retention-days: 1 - - name: Upload trimmed .jit.gz artifact - if: always() && steps.convert.outputs.jit_gz != '' + - name: Upload compressed .jit.gz artifact + if: steps.pgotrim.outputs.jit_gz != '' uses: actions/upload-artifact@v7 with: name: nethermind-pgo-jit - path: ${{ steps.convert.outputs.jit_gz }} - retention-days: 90 + path: ${{ steps.pgotrim.outputs.jit_gz }} + retention-days: 1 + + - name: Upload callchain profile + if: steps.pgotrim.outputs.callchain_gz != '' + uses: actions/upload-artifact@v7 + with: + name: nethermind-pgo-callchain + path: ${{ steps.pgotrim.outputs.callchain_gz }} + retention-days: 1 - name: Upload raw PGO data if: always() @@ -373,10 +647,24 @@ jobs: name: nethermind-pgo-raw-data path: ${{ runner.temp }}/pgo-data/ if-no-files-found: warn - retention-days: 30 + retention-days: 7 + + - name: Clean up Docker resources + if: always() + shell: bash + run: | + # Clean up this run's containers, networks, volumes, and images + docker ps -aq --filter "name=expb-executor-nethermind-pgo" 2>/dev/null | xargs -r docker rm -f 2>/dev/null || true + docker network ls --filter "name=expb-executor-nethermind-pgo" -q 2>/dev/null | xargs -r docker network rm 2>/dev/null || true + docker volume ls -q --filter "name=expb-executor-nethermind-pgo" 2>/dev/null | xargs -r docker volume rm 2>/dev/null || true + # Remove this run's PGO images + docker images --format '{{.Repository}}:{{.Tag}} {{.ID}}' 2>/dev/null | \ + grep "nethermindeth/nethermind:${{ env.PGO_TAG }}" | \ + awk '{print $2}' | sort -u | \ + xargs -r docker rmi -f 2>/dev/null || true update-pgo-profile: - name: Create PR to update PGO profile + name: Update PGO profile needs: collect runs-on: ubuntu-latest if: success() @@ -392,11 +680,17 @@ jobs: - name: Download .jit.gz artifact uses: actions/download-artifact@v7 - continue-on-error: true with: name: nethermind-pgo-jit path: src/Nethermind/Nethermind.Runner/pgo/ + - name: Download callchain profile + uses: actions/download-artifact@v7 + continue-on-error: true + with: + name: nethermind-pgo-callchain + path: src/Nethermind/Nethermind.Runner/pgo/ + - name: Check for changes id: diff run: | @@ -408,8 +702,19 @@ jobs: echo "PGO profile has changed." fi - - name: Create pull request - if: steps.diff.outputs.changed == 'true' + - name: Commit to branch (non-master) + if: steps.diff.outputs.changed == 'true' && github.ref != 'refs/heads/master' + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc + git add src/Nethermind/Nethermind.Runner/pgo/nethermind.jit.gz 2>/dev/null || true + git add src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz 2>/dev/null || true + git commit -m "chore(pgo): update PGO profile" + git push + + - name: Create pull request (master) + if: steps.diff.outputs.changed == 'true' && github.ref == 'refs/heads/master' env: GH_TOKEN: ${{ github.token }} run: | @@ -419,6 +724,7 @@ jobs: git checkout -b "${branch}" git add src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc git add src/Nethermind/Nethermind.Runner/pgo/nethermind.jit.gz 2>/dev/null || true + git add src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz 2>/dev/null || true git commit -m "chore(pgo): update PGO profile from weekly collection" git push -u origin "${branch}" gh pr create \ diff --git a/.github/workflows/evm-opcode-benchmark-diff.yml b/.github/workflows/evm-opcode-benchmark-diff.yml index f8a1c93059f..f797ac269e6 100644 --- a/.github/workflows/evm-opcode-benchmark-diff.yml +++ b/.github/workflows/evm-opcode-benchmark-diff.yml @@ -112,6 +112,15 @@ jobs: git worktree add --detach -f "${base_dir}" "${base_sha}" echo "BASE_WORKTREE=${base_dir}" >> "${GITHUB_ENV}" + - name: Decompress PGO callchain profiles + shell: bash + run: | + for dir in . "${BASE_WORKTREE}"; do + gz="${dir}/src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz" + json="${dir}/src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json" + if [ -f "$gz" ] && [ ! -f "$json" ]; then gzip -d -k "$gz"; fi + done + - name: Run base benchmark shell: bash run: | diff --git a/.github/workflows/run-block-processing-benchmark.yml b/.github/workflows/run-block-processing-benchmark.yml index 179adafd892..b9c395bca4b 100644 --- a/.github/workflows/run-block-processing-benchmark.yml +++ b/.github/workflows/run-block-processing-benchmark.yml @@ -90,6 +90,12 @@ jobs: cache: true cache-dependency-path: src/Nethermind/Nethermind.Runner/packages.lock.json + - name: Decompress PGO callchain profile + run: | + gz="src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz" + json="src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json" + if [ -f "$gz" ] && [ ! -f "$json" ]; then gzip -d -k "$gz"; fi + - name: Build base branch run: | dotnet restore ${{ env.BENCHMARK_PROJECT }} @@ -120,6 +126,12 @@ jobs: ref: ${{ github.event.pull_request.head.sha || github.ref }} clean: false + - name: Decompress PGO callchain profile (PR) + run: | + gz="src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz" + json="src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json" + if [ -f "$gz" ] && [ ! -f "$json" ]; then gzip -d -k "$gz"; fi + - name: Build PR branch run: | dotnet restore ${{ env.BENCHMARK_PROJECT }} diff --git a/.github/workflows/run-expb-reproducible-benchmarks.yml b/.github/workflows/run-expb-reproducible-benchmarks.yml index bf54267b4b1..f89c6a91ec9 100644 --- a/.github/workflows/run-expb-reproducible-benchmarks.yml +++ b/.github/workflows/run-expb-reproducible-benchmarks.yml @@ -638,15 +638,15 @@ jobs: # Background: tail Nethermind container logs for live visibility ( - sleep 5 - for i in $(seq 1 60); do - cid=$(docker ps --filter "name=nethermind" --format '{{.ID}}' 2>/dev/null | head -1) + sleep 10 + for i in $(seq 1 120); do + cid=$(docker ps --filter "name=expb-executor" --filter "ancestor=nethermindeth/nethermind" --format '{{.ID}}' 2>/dev/null | head -1) if [[ -z "$cid" ]]; then cid=$(docker ps --format '{{.ID}} {{.Image}}' 2>/dev/null | grep nethermind | head -1 | cut -d' ' -f1) fi if [[ -n "$cid" ]]; then echo "=== Tailing Nethermind container ${cid} logs ===" >&3 - docker logs -f "$cid" 2>&1 | sed 's/^/[nethermind] /' >&3 + docker logs -f "$cid" 2>&1 | sed -u 's/^/[nethermind] /' >&3 break fi sleep 2 @@ -1394,15 +1394,15 @@ jobs: # Background: tail Nethermind container logs for live visibility ( - sleep 5 - for i in $(seq 1 60); do - cid=$(docker ps --filter "name=nethermind" --format '{{.ID}}' 2>/dev/null | head -1) + sleep 10 + for i in $(seq 1 120); do + cid=$(docker ps --filter "name=expb-executor" --filter "ancestor=nethermindeth/nethermind" --format '{{.ID}}' 2>/dev/null | head -1) if [[ -z "$cid" ]]; then cid=$(docker ps --format '{{.ID}} {{.Image}}' 2>/dev/null | grep nethermind | head -1 | cut -d' ' -f1) fi if [[ -n "$cid" ]]; then echo "=== Tailing Nethermind container ${cid} logs ===" >&3 - docker logs -f "$cid" 2>&1 | sed 's/^/[nethermind] /' >&3 + docker logs -f "$cid" 2>&1 | sed -u 's/^/[nethermind] /' >&3 break fi sleep 2 diff --git a/Directory.Build.targets b/Directory.Build.targets index c84a993db57..66cc56221a3 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -1,8 +1,28 @@ + + + false + + + true + Speed $(MSBuildThisFileDirectory)src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc + $(MSBuildThisFileDirectory)src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json $(NethermindPgoProfile) + + + + + $(Crossgen2ExtraCommandLineArgs);--opt-cross-module:*;--hot-cold-splitting;--file-layout:methodorder + $(Crossgen2ExtraCommandLineArgs);--callchain-profile:$(NethermindCallChainProfile);--method-layout:callfrequency + $(Crossgen2ExtraCommandLineArgs);--method-layout:pettishansen diff --git a/Directory.Packages.props b/Directory.Packages.props index 3afde4cb2a1..5d87641cb08 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -92,5 +92,6 @@ + \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 453e82093d8..ad8bea0c56b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,9 @@ RUN arch=$([ "$TARGETARCH" = "amd64" ] && echo "x64" || echo "$TARGETARCH") && \ cd src/Nethermind/Nethermind.Runner && \ dotnet restore --locked-mode && \ dotnet restore -r "linux-${arch}" -p:PublishReadyToRun=true && \ + if [ -f pgo/nethermind.callchain.json.gz ] && [ ! -f pgo/nethermind.callchain.json ]; then \ + gzip -d -k pgo/nethermind.callchain.json.gz; \ + fi && \ dotnet publish -c $BUILD_CONFIG -r "linux-${arch}" -o /publish --no-restore --no-self-contained \ -p:SourceRevisionId=$COMMIT_HASH diff --git a/Dockerfile.chiseled b/Dockerfile.chiseled index 967d57def70..c60c6b056bb 100644 --- a/Dockerfile.chiseled +++ b/Dockerfile.chiseled @@ -20,6 +20,9 @@ RUN arch=$([ "$TARGETARCH" = "amd64" ] && echo "x64" || echo "$TARGETARCH") && \ cd src/Nethermind/Nethermind.Runner && \ dotnet restore --locked-mode && \ dotnet restore -r "linux-${arch}" -p:PublishReadyToRun=true && \ + if [ -f pgo/nethermind.callchain.json.gz ] && [ ! -f pgo/nethermind.callchain.json ]; then \ + gzip -d -k pgo/nethermind.callchain.json.gz; \ + fi && \ dotnet publish -c $BUILD_CONFIG -r "linux-${arch}" -o /publish --no-restore --no-self-contained \ -p:SourceRevisionId=$COMMIT_HASH @@ -27,7 +30,15 @@ RUN arch=$([ "$TARGETARCH" = "amd64" ] && echo "x64" || echo "$TARGETARCH") && \ RUN cd /publish && \ mkdir keystore && \ mkdir logs && \ - mkdir nethermind_db + mkdir nethermind_db && \ + mkdir -p pgo + +# Decompress edge/block PGO data for runtime JIT optimization +COPY src/Nethermind/Nethermind.Runner/pgo/ /tmp/pgo/ +RUN if [ -s /tmp/pgo/nethermind.jit.gz ]; then \ + gzip -d -c /tmp/pgo/nethermind.jit.gz > /publish/pgo/nethermind.jit; \ + fi && \ + rm -rf /tmp/pgo FROM mcr.microsoft.com/dotnet/aspnet:10.0.5-noble-chiseled@sha256:1191b4891ae8b1a8184b2de52b2c6332dfb27c30b58d282632044357db63761d @@ -35,6 +46,12 @@ WORKDIR /nethermind # Chiseled image has no shell — large pages cannot be auto-detected. # To enable: docker run -e DOTNET_GCLargePages=1 -e DOTNET_GCRegionRange=0x800000000 ... + +# Enable edge/block PGO and profile-driven inlining. +# The JIT silently skips PGO loading when the file doesn't exist. +ENV DOTNET_ReadPGOData=1 +ENV DOTNET_PGODataPath=/nethermind/pgo/nethermind.jit +ENV DOTNET_JitInlinePolicyProfile=1 VOLUME /nethermind/keystore VOLUME /nethermind/logs VOLUME /nethermind/nethermind_db diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index df6ceac2bef..b8bdf8e722d 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -66,10 +66,13 @@ if [[ -z "${DOTNET_GCLargePages:-}" ]]; then fi fi -# Enable edge/block PGO if the trimmed profile exists -if [[ -z "${DOTNET_ReadPGOData:-}" ]] && [[ -f "/nethermind/pgo/nethermind.jit" ]]; then +# Enable edge/block PGO if the profile exists +if [[ -z "${DOTNET_ReadPGOData:-}" ]] && [[ -s "/nethermind/pgo/nethermind.jit" ]]; then export DOTNET_ReadPGOData=1 export DOTNET_PGODataPath=/nethermind/pgo/nethermind.jit + # Use profile-driven inlining: inline more aggressively at hot call sites, + # less at cold ones, based on the seeded PGO frequency data. + export DOTNET_JitInlinePolicyProfile=1 echo "Edge/block PGO enabled: ${DOTNET_PGODataPath}" fi diff --git a/src/Nethermind/Nethermind.Evm/VirtualMachine.Warmup.cs b/src/Nethermind/Nethermind.Evm/VirtualMachine.Warmup.cs index 28d4672fe9e..c7001d40d09 100644 --- a/src/Nethermind/Nethermind.Evm/VirtualMachine.Warmup.cs +++ b/src/Nethermind/Nethermind.Evm/VirtualMachine.Warmup.cs @@ -146,6 +146,23 @@ static void AddPrecompileCall(List codeToDeploy) codeToDeploy.Add((byte)Instruction.POP); } + // Opcodes that interact with world state or code repository. + // Skip these during warmup to avoid polluting GDV type histograms with the + // warmup state type — real block processing uses a different IWorldState + // implementation, and a bimodal histogram causes the JIT to emit type-check + // guards instead of direct devirtualization. + private static readonly HashSet StateOpcodes = + [ + (int)Instruction.BALANCE, (int)Instruction.SELFBALANCE, + (int)Instruction.SLOAD, (int)Instruction.SSTORE, + (int)Instruction.TLOAD, (int)Instruction.TSTORE, + (int)Instruction.EXTCODESIZE, (int)Instruction.EXTCODECOPY, (int)Instruction.EXTCODEHASH, + (int)Instruction.CALL, (int)Instruction.STATICCALL, (int)Instruction.DELEGATECALL, (int)Instruction.CALLCODE, + (int)Instruction.CREATE, (int)Instruction.CREATE2, + (int)Instruction.SELFDESTRUCT, + (int)Instruction.LOG0, (int)Instruction.LOG1, (int)Instruction.LOG2, (int)Instruction.LOG3, (int)Instruction.LOG4, + ]; + private static void RunOpCodes(VirtualMachine vm, IWorldState state, VmState vmState, IReleaseSpec spec) where TTracingInst : struct, IFlag { @@ -158,17 +175,36 @@ private static void RunOpCodes(VirtualMachine vm, IWor TGasPolicy gas = TGasPolicy.FromLong(long.MaxValue); int pc = 0; + // Values that exercise common (non-trivial, non-failing) opcode paths: + // - Not 0 or 1 (avoids fast-path/short-circuit branches) + // - Multi-word (exercises full 256-bit arithmetic) + // - Different from each other (avoids equality fast-paths) + // These influence Tier-0 PGO edge counts — we want the common-case branches + // profiled, not the degenerate/edge-case branches. + UInt256 a = new(0x1234567890ABCDEF, 0xFEDCBA0987654321, 0x1111111111111111, 0x2222222222222222); + UInt256 b = new(0x42, 0, 0, 0); // small but > 1 + UInt256 c = new(0xDEADBEEFCAFEBABE, 0x0123456789ABCDEF, 0x3333333333333333, 0x4444444444444444); + for (int repeat = 0; repeat < WarmUpIterations; repeat++) { for (int i = 0; i < opcodes.Length; i++) { - // LOG4 needs 6 values on stack - stack.PushOne(); - stack.PushOne(); - stack.PushOne(); - stack.PushOne(); - stack.PushOne(); - stack.PushOne(); + // Skip state-interacting opcodes — their GDV type profiling during + // warmup would record the wrong IWorldState type, creating bimodal + // histograms that prevent devirtualization during real execution. + if (StateOpcodes.Contains(i)) + continue; + + // Push representative values so arithmetic opcodes take common paths: + // a / b → multi-word division (not by 0 or 1) + // a % c → remainder with c > result (common case) + // a * b % c → mulmod where modulus > product (common case) + stack.PushUInt256(in a); + stack.PushUInt256(in b); + stack.PushUInt256(in c); + stack.PushUInt256(in a); + stack.PushUInt256(in b); + stack.PushUInt256(in c); opcodes[i](vm, ref stack, ref gas, ref pc); if (vm.ReturnData is VmState returnState) diff --git a/src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj b/src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj index d0a462cabc0..ce10f55de95 100644 --- a/src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj +++ b/src/Nethermind/Nethermind.Runner/Nethermind.Runner.csproj @@ -22,8 +22,6 @@ true - true - Speed @@ -106,6 +104,15 @@ Always + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + diff --git a/src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz b/src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz new file mode 100644 index 00000000000..1eca8e0623d Binary files /dev/null and b/src/Nethermind/Nethermind.Runner/pgo/nethermind.callchain.json.gz differ diff --git a/src/Nethermind/Nethermind.Runner/pgo/nethermind.jit.gz b/src/Nethermind/Nethermind.Runner/pgo/nethermind.jit.gz index f39c62550bf..0c9c3a60b07 100644 Binary files a/src/Nethermind/Nethermind.Runner/pgo/nethermind.jit.gz and b/src/Nethermind/Nethermind.Runner/pgo/nethermind.jit.gz differ diff --git a/src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc b/src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc index 224e05d645d..a1e036a2868 100644 Binary files a/src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc and b/src/Nethermind/Nethermind.Runner/pgo/nethermind.mibc differ diff --git a/tools/PgoTrim/CallChainGenerator.cs b/tools/PgoTrim/CallChainGenerator.cs new file mode 100644 index 00000000000..1661816eb5c --- /dev/null +++ b/tools/PgoTrim/CallChainGenerator.cs @@ -0,0 +1,239 @@ +// SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +using System.Globalization; +using System.Text.Json; +using Microsoft.Diagnostics.Tracing.Etlx; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; + +/// +/// Reads a .callgraph file (callee_ip caller_ip pairs from SpgoExtractor) and +/// resolves IPs to method names using the .etlx MethodMemoryMap. Outputs a +/// CallChainProfile JSON file for crossgen2's --callchain-profile / CallFrequency layout. +/// +/// Also collects per-method size data from the .etlx for potential CDS (Cache-Directed Sort) +/// implementation. +/// +/// JSON format expected by crossgen2's CallChainProfile: +/// { +/// "CallerMethodName": [["CalleeA", "CalleeB"], [100, 50]], +/// ... +/// } +/// +static class CallChainGenerator +{ + public static int Generate(string etlxPath, string outputJsonPath) + { + string callGraphPath = Path.ChangeExtension(etlxPath, ".callgraph"); + if (!File.Exists(callGraphPath)) + { + Console.Error.WriteLine($"No .callgraph file found at: {callGraphPath}"); + return 1; + } + + if (!File.Exists(etlxPath)) + { + Console.Error.WriteLine($"No .etlx file found at: {etlxPath}"); + return 1; + } + + Console.WriteLine($"Loading method map from {etlxPath}..."); + + // Build IP -> method name map from .etlx CLR events + Dictionary methodsByName = new(); + Dictionary ipToMethod = new(); + + using (TraceLog traceLog = TraceLog.OpenOrConvert(etlxPath, new TraceLogOptions { KeepAllEvents = true })) + { + foreach (TraceProcess process in traceLog.Processes) + { + // Build from MethodLoadVerbose events (have native start address + size) + foreach (MethodLoadUnloadVerboseTraceData evt in + process.EventsInProcess.ByEventType()) + { + string name = FormatMethodName(evt); + ulong start = evt.MethodStartAddress; + int size = evt.MethodSize; + + if (start == 0 || size == 0) + continue; + + // Register all IPs in this method's range + // We only need the start address for lookup - use range check later + if (!methodsByName.ContainsKey(name)) + { + methodsByName[name] = new MethodInfo(name, start, size); + } + + ipToMethod[start] = name; + } + } + } + + Console.WriteLine($" {methodsByName.Count:N0} methods resolved from .etlx"); + + // Build sorted array for binary search IP resolution + List<(ulong start, int size, string name)> sortedMethods = new(); + foreach (MethodInfo mi in methodsByName.Values) + { + sortedMethods.Add((mi.Start, mi.Size, mi.Name)); + } + sortedMethods.Sort((a, b) => a.start.CompareTo(b.start)); + + // Parse .callgraph and aggregate caller -> callee -> count + Dictionary> callGraph = new(); + Dictionary exclusiveSamples = new(); + int resolvedEdges = 0; + int unresolvedEdges = 0; + int totalLines = 0; + + Console.WriteLine($"Processing {callGraphPath}..."); + + foreach (string line in File.ReadLines(callGraphPath)) + { + ReadOnlySpan span = line.AsSpan().Trim(); + if (span.IsEmpty || span[0] == '#') + continue; + + totalLines++; + int space = span.IndexOf(' '); + if (space <= 0) + continue; + + if (!ulong.TryParse(span.Slice(0, space), NumberStyles.HexNumber, null, out ulong calleeIp)) + continue; + if (!ulong.TryParse(span.Slice(space + 1), NumberStyles.HexNumber, null, out ulong callerIp)) + continue; + + string? calleeName = ResolveIp(sortedMethods, calleeIp); + string? callerName = ResolveIp(sortedMethods, callerIp); + + // Count exclusive samples for callee + if (calleeName != null) + { + if (exclusiveSamples.TryGetValue(calleeName, out int count)) + exclusiveSamples[calleeName] = count + 1; + else + exclusiveSamples[calleeName] = 1; + } + + if (callerName != null && calleeName != null && callerName != calleeName) + { + if (!callGraph.TryGetValue(callerName, out Dictionary? callees)) + { + callees = new Dictionary(); + callGraph[callerName] = callees; + } + if (callees.TryGetValue(calleeName, out int edgeCount)) + callees[calleeName] = edgeCount + 1; + else + callees[calleeName] = 1; + + resolvedEdges++; + } + else + { + unresolvedEdges++; + } + } + + Console.WriteLine($" {totalLines:N0} lines, {resolvedEdges:N0} resolved edges, {unresolvedEdges:N0} unresolved"); + Console.WriteLine($" {callGraph.Count:N0} unique callers, {exclusiveSamples.Count:N0} methods with samples"); + + // Write CallChainProfile JSON + Console.WriteLine($"Writing {outputJsonPath}..."); + using (FileStream fs = File.Create(outputJsonPath)) + using (Utf8JsonWriter writer = new Utf8JsonWriter(fs, new JsonWriterOptions { Indented = false })) + { + writer.WriteStartObject(); + + foreach (KeyValuePair> caller in callGraph) + { + writer.WritePropertyName(caller.Key); + writer.WriteStartArray(); + + // First array: callee names + writer.WriteStartArray(); + foreach (KeyValuePair callee in caller.Value) + { + writer.WriteStringValue(callee.Key); + } + writer.WriteEndArray(); + + // Second array: call counts + writer.WriteStartArray(); + foreach (KeyValuePair callee in caller.Value) + { + writer.WriteNumberValue(callee.Value); + } + writer.WriteEndArray(); + + writer.WriteEndArray(); + } + + writer.WriteEndObject(); + } + + // Also write a method-sizes file for potential CDS implementation + string sizesPath = Path.ChangeExtension(outputJsonPath, ".sizes"); + int sizesWritten = 0; + using (StreamWriter sw = new StreamWriter(sizesPath)) + { + sw.WriteLine("# method_name native_size_bytes exclusive_samples"); + foreach (MethodInfo mi in methodsByName.Values) + { + int samples = exclusiveSamples.GetValueOrDefault(mi.Name, 0); + sw.WriteLine($"{mi.Name}\t{mi.Size}\t{samples}"); + sizesWritten++; + } + } + + Console.WriteLine($" CallChain JSON: {new FileInfo(outputJsonPath).Length:N0} bytes"); + Console.WriteLine($" Method sizes: {sizesPath} ({sizesWritten:N0} methods)"); + + return resolvedEdges > 0 ? 0 : 1; + } + + static string? ResolveIp(List<(ulong start, int size, string name)> sortedMethods, ulong ip) + { + // Binary search for the method containing this IP + int lo = 0; + int hi = sortedMethods.Count - 1; + + while (lo <= hi) + { + int mid = lo + (hi - lo) / 2; + ulong methodStart = sortedMethods[mid].start; + + if (ip < methodStart) + { + hi = mid - 1; + } + else if (ip >= methodStart + (ulong)sortedMethods[mid].size) + { + lo = mid + 1; + } + else + { + return sortedMethods[mid].name; + } + } + + return null; + } + + static string FormatMethodName(MethodLoadUnloadVerboseTraceData evt) + { + // Format: Namespace.Type.Method(ArgTypes) + // crossgen2's CallChainProfile resolves by matching against MethodDesc.ToString() + string ns = evt.MethodNamespace; + string name = evt.MethodName; + string sig = evt.MethodSignature; + + if (!string.IsNullOrEmpty(ns)) + return $"{ns}.{name}({sig})"; + return $"{name}({sig})"; + } + + record MethodInfo(string Name, ulong Start, int Size); +} diff --git a/tools/PgoTrim/Dockerfile.lttng-coreclr b/tools/PgoTrim/Dockerfile.lttng-coreclr new file mode 100644 index 00000000000..31fbd99b5b2 --- /dev/null +++ b/tools/PgoTrim/Dockerfile.lttng-coreclr @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited +# SPDX-License-Identifier: LGPL-3.0-only +# +# Builds libcoreclr.so + libcoreclrtraceptprovider.so with LTTng enabled. +# The Microsoft .NET SDK ships a dummy event provider (no-op FireEtXplat* +# functions). This image rebuilds coreclr with FEATURE_EVENTSOURCE_XPLAT=1 +# so the event dispatch code calls tracepoint() and perfcollect can capture +# CLR events for SPGO profiling. +# +# Build & push (one-time, only needed when .NET version changes): +# docker buildx build --platform=linux/amd64 \ +# -f tools/PgoTrim/Dockerfile.lttng-coreclr \ +# -t nethermindeth/nethermind:lttng-coreclr-10.0.5 \ +# --push . + +FROM --platform=linux/amd64 mcr.microsoft.com/dotnet/sdk:10.0.201-noble@sha256:478b9038d187e5b5c29bfa8173ded5d29e864b5ad06102a12106380ee01e2e49 + +ARG DOTNET_RUNTIME_VERSION=10.0.5 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential cmake clang llvm lld ninja-build \ + python-is-python3 git curl \ + libicu-dev liblttng-ust-dev libssl-dev libkrb5-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone --depth 1 --branch v${DOTNET_RUNTIME_VERSION} \ + https://github.com/dotnet/runtime.git /runtime + +# Build only the coreclr runtime subset. +# Produces both libcoreclr.so (with LTTng event dispatch) and +# libcoreclrtraceptprovider.so (with LTTng tracepoint definitions). +RUN cd /runtime && \ + ./build.sh clr.runtime -c Release -a x64 + +WORKDIR /runtime/artifacts/bin/coreclr/linux.x64.Release diff --git a/Dockerfile.pgo b/tools/PgoTrim/Dockerfile.pgo similarity index 91% rename from Dockerfile.pgo rename to tools/PgoTrim/Dockerfile.pgo index 736aca2b2c5..3a8d3a3b761 100644 --- a/Dockerfile.pgo +++ b/tools/PgoTrim/Dockerfile.pgo @@ -48,7 +48,10 @@ ENV DOTNET_GCRegionRange=0x800000000 ENV DOTNET_EnableEventPipe=1 ENV DOTNET_EventPipeCircularMB=256 ENV DOTNET_EventPipeOutputPath=/nethermind/pgo/nethermind-{pid}.nettrace -ENV DOTNET_EventPipeConfig=Microsoft-Windows-DotNETRuntime:0x1E000080018:4 +# JIT/R2R method events + PGO instrumentation data. +# Level 5 (Verbose) is required for SampledProfileTraceData events that dotnet-pgo +# uses to build caller->callee edge weights for Pettis-Hansen method layout. +ENV DOTNET_EventPipeConfig=Microsoft-Windows-DotNETRuntime:0x1E000080018:5 # Write edge/block profiling data after Tier-1 compilation for branch optimization ENV DOTNET_TieredPGO=1 ENV DOTNET_WritePGOData=1 diff --git a/tools/PgoTrim/Dockerfile.pgo-sampling b/tools/PgoTrim/Dockerfile.pgo-sampling new file mode 100644 index 00000000000..65902999eb7 --- /dev/null +++ b/tools/PgoTrim/Dockerfile.pgo-sampling @@ -0,0 +1,177 @@ +# SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited +# SPDX-License-Identifier: LGPL-3.0-only +# +# Collects CPU sampling profile data via perf + LTTng for SPGO. +# perfcollect produces .trace.zip which dotnet-pgo --spgo converts +# to .mibc with sample-based block counts for method layout optimization. +# +# The Microsoft .NET 10 SDK does not ship libcoreclrtraceptprovider.so +# (LTTng tracepoint probes) — PR dotnet/runtime#113876 made it optional. +# We build it from source and inject it so the runtime emits LTTng events. +# +# This image is run AFTER the main PGO collection (Dockerfile.pgo). +# The two .mibc files are merged to combine edge/type data with SPGO samples. + +FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:10.0.201-noble@sha256:478b9038d187e5b5c29bfa8173ded5d29e864b5ad06102a12106380ee01e2e49 AS build + +ARG BUILD_CONFIG=release +ARG CI=true +ARG COMMIT_HASH +ARG SOURCE_DATE_EPOCH +ARG TARGETARCH + +WORKDIR /nethermind + +COPY src/Nethermind src/Nethermind +COPY Directory.*.props Directory.Build.targets ./ +COPY global.json . +COPY nuget.config . + +RUN arch=$([ "$TARGETARCH" = "amd64" ] && echo "x64" || echo "$TARGETARCH") && \ + rid="linux-${arch}" && \ + cd src/Nethermind/Nethermind.Runner && \ + dotnet restore --locked-mode && \ + dotnet publish -c $BUILD_CONFIG -r $rid -o /publish --no-self-contained \ + -p:PublishReadyToRun=false \ + -p:SourceRevisionId=$COMMIT_HASH + +# A temporary symlink to support the old executable name +RUN ln -sr /publish/nethermind /publish/Nethermind.Runner + +# Pre-built coreclr with LTTng tracepoint support. +# The Microsoft SDK ships a dummy provider (no-op FireEtXplat* functions). +# This image was built from dotnet/runtime v10.0.5 with FEATURE_EVENTSOURCE_XPLAT=1. +# Rebuild with: docker buildx build --platform=linux/amd64 -f tools/PgoTrim/Dockerfile.lttng-coreclr -t nethermindeth/nethermind:lttng-coreclr-10.0.5 --push . +FROM --platform=linux/amd64 nethermindeth/nethermind:lttng-coreclr-10.0.5 AS lttng-build + +# Runtime stage +FROM mcr.microsoft.com/dotnet/sdk:10.0.201-noble@sha256:478b9038d187e5b5c29bfa8173ded5d29e864b5ad06102a12106380ee01e2e49 + +ARG DOTNET_RUNTIME_VERSION=10.0.5 + +WORKDIR /nethermind + +# Install perf + LTTng for perfcollect +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + linux-tools-generic \ + lttng-tools \ + liblttng-ust-dev \ + zip \ + unzip && \ + curl -sSL https://aka.ms/perfcollect -o /usr/local/bin/perfcollect && \ + chmod +x /usr/local/bin/perfcollect && \ + # Patch perfcollect for dotnet-pgo SPGO support: + # 1. Add MethodDetails tracepoint (keyword MethodDiagnostic) — predates perfcollect's list + # 2. Add TypeKeyword to default events — needed for BulkType events + # 3. Add AssemblyLoad to second LoaderKeyword array — the duplicate declaration + # overwrites the first which had AssemblyLoad. dotnet-pgo needs AssemblyLoadUnload. + # 4. Add JittedMethodILToNativeMapKeyword — needed for SPGO block attribution + sed -i '/DotNETRuntime:MethodJittingStarted_V1/a\ DotNETRuntime:MethodDetails' /usr/local/bin/perfcollect && \ + sed -i '/EnableLTTngEvents \${DotNETRuntime_CompilationKeyword\[@\]}/a\ EnableLTTngEvents ${DotNETRuntime_TypeKeyword[@]}\n EnableLTTngEvents ${DotNETRuntime_JittedMethodILToNativeMapKeyword[@]}' /usr/local/bin/perfcollect && \ + # perfcollect only lists MethodILToNativeMap (v0) but runtime fires _V1 — add it + sed -i '/DotNETRuntime:MethodILToNativeMap$/a\ DotNETRuntime:MethodILToNativeMap_V1' /usr/local/bin/perfcollect && \ + sed -i '/DotNETRuntime:ModuleUnload_V2/a\ DotNETRuntime:AssemblyLoad\n DotNETRuntime:AssemblyLoad_V1\n DotNETRuntime:AssemblyUnload\n DotNETRuntime:AssemblyUnload_V1' /usr/local/bin/perfcollect && \ + rm -rf /var/lib/apt/lists/* + +# Replace libcoreclr.so with our LTTng-enabled build and add the +# tracepoint provider .so that it dlopen's at startup +COPY --from=lttng-build /runtime/artifacts/bin/coreclr/linux.x64.Release/libcoreclr.so \ + /runtime/artifacts/bin/coreclr/linux.x64.Release/libcoreclrtraceptprovider.so \ + /usr/share/dotnet/shared/Microsoft.NETCore.App/${DOTNET_RUNTIME_VERSION}/ + +# Override runtimeconfig.json settings not suitable for PGO collection +ENV DOTNET_System_Diagnostics_Tracing_EventSource_IsSupported=true +ENV DOTNET_GCLargePages=0 +ENV DOTNET_GCRegionRange=0x800000000 + +# Delay Tier-1 recompilation so the perf map (written at Tier-0 JIT time) +# stays valid for the entire perfcollect window. Without this, Tier-1 +# recompiles methods to new addresses and 97% of CPU samples fall outside +# managed code (unresolvable IPs). +ENV DOTNET_TC_CallCountingDelayMs=900000 + +# Enable LTTng tracepoints + perf map for managed symbol resolution +ENV DOTNET_PerfMapEnabled=1 +ENV DOTNET_EnableEventLog=1 +# Without DOTNET_LTTngConfig, ActivateAllKeywordsOfAllProviders enables all +# keywords at Verbose level — the broadest possible event set. +# Explicit config was tried (0x60000A0018:5) but MethodILToNativeMap_V1 +# events were still not emitted. Using all-keywords to diagnose. + +VOLUME /nethermind/keystore +VOLUME /nethermind/logs +VOLUME /nethermind/nethermind_db +VOLUME /nethermind/pgo + +EXPOSE 8545 8551 30303 + +COPY --from=build /publish . + +# perfcollect runs in background collecting perf CPU samples + LTTng CLR events. +# Nethermind runs in background; we wait for perfcollect to finish first +# (collection + post-processing) to ensure .trace.zip is written before +# Docker's 10s stop grace period kills the container. +COPY <<'SCRIPT' /nethermind/trace-entrypoint.sh +#!/bin/bash + +# Prevent restart loops: if we already ran and failed, exit cleanly +# so Docker's "unless-stopped" restart policy doesn't keep retrying. +if [[ -f /nethermind/pgo/.started ]]; then + echo "trace: container already ran — exiting to prevent restart loop" + exit 0 +fi +touch /nethermind/pgo/.started + +mkdir -p /nethermind/pgo + +# Forward SIGTERM/SIGINT to Nethermind for graceful shutdown +trap 'kill -TERM $nethermind_pid 2>/dev/null' TERM INT + +echo "trace: starting perfcollect (perf + LTTng)" +COLLECTSEC="${COLLECTSEC:-120}" +echo "trace: collectsec=${COLLECTSEC}" +perfcollect collect /nethermind/pgo/sampling -collectsec "${COLLECTSEC}" &> /nethermind/pgo/perfcollect.log & +trace_pid=$! + +# Wait for LTTng session to be active before starting Nethermind. +# The runtime registers UST tracepoints via dlopen at startup; +# the LTTng session must be active to capture them. +echo "trace: waiting for LTTng session..." +for i in $(seq 1 30); do + if grep -q "Collection started" /nethermind/pgo/perfcollect.log 2>/dev/null; then + echo "trace: LTTng session active" + break + fi + sleep 1 +done + +if ! kill -0 $trace_pid 2>/dev/null; then + echo "trace: ERROR — perfcollect exited early:" + cat /nethermind/pgo/perfcollect.log 2>/dev/null + echo "trace: starting Nethermind without perfcollect" +else + echo "trace: perfcollect running (pid $trace_pid)" +fi + +echo "trace: starting Nethermind" +./nethermind "$@" & +nethermind_pid=$! + +# Wait for perfcollect to finish (collection + post-processing) +wait $trace_pid 2>/dev/null || true +echo "trace: perfcollect finished" +echo "trace: perfcollect log:" +cat /nethermind/pgo/perfcollect.log 2>/dev/null || true +echo "trace: pgo volume contents:" +ls -la /nethermind/pgo/ 2>/dev/null || true + +# Wait for Nethermind — SIGTERM from Docker is forwarded via the trap above +wait $nethermind_pid 2>/dev/null +nethermind_exit=$? +echo "trace: Nethermind exited with code $nethermind_exit" +exit $nethermind_exit +SCRIPT +RUN chmod +x /nethermind/trace-entrypoint.sh + +ENTRYPOINT ["./trace-entrypoint.sh"] diff --git a/tools/PgoTrim/PgoTrim.csproj b/tools/PgoTrim/PgoTrim.csproj index b9b1bb1334c..01c41c5b765 100644 --- a/tools/PgoTrim/PgoTrim.csproj +++ b/tools/PgoTrim/PgoTrim.csproj @@ -2,4 +2,11 @@ Exe + + + + + + + diff --git a/tools/PgoTrim/Program.cs b/tools/PgoTrim/Program.cs index 26147c27f23..11ca4569308 100644 --- a/tools/PgoTrim/Program.cs +++ b/tools/PgoTrim/Program.cs @@ -15,6 +15,40 @@ // // Usage: dotnet run -- [output.jit.gz] [--min-block 100] [--min-edge 250] +// Subcommand: convert-trace — inject missing MethodDetails CTF mapping and convert to .etlx +if (args.Length >= 1 && args[0] == "convert-trace") +{ + if (args.Length < 3) + { + Console.Error.WriteLine("Usage: PgoTrim convert-trace "); + return 1; + } + return TraceConverter.Convert(args[1], args[2]); +} + +// Subcommand: extract-spgo — extract perf sample IPs for dotnet-pgo SPGO +if (args.Length >= 1 && args[0] == "extract-spgo") +{ + if (args.Length < 3) + { + Console.Error.WriteLine("Usage: PgoTrim extract-spgo "); + return 1; + } + return SpgoExtractor.Extract(args[1], args[2]); +} + +// Subcommand: generate-callchain — resolve .callgraph IPs to method names for crossgen2 CallFrequency layout +if (args.Length >= 1 && args[0] == "generate-callchain") +{ + if (args.Length < 3) + { + Console.Error.WriteLine("Usage: PgoTrim generate-callchain "); + Console.Error.WriteLine(" Reads .callgraph file (same base name as .etlx) and resolves IPs to method names."); + return 1; + } + return CallChainGenerator.Generate(args[1], args[2]); +} + int minBlock = 100; int minEdge = 250; string? inputPath = null; diff --git a/tools/PgoTrim/SpgoExtractor.cs b/tools/PgoTrim/SpgoExtractor.cs new file mode 100644 index 00000000000..abced75a1ab --- /dev/null +++ b/tools/PgoTrim/SpgoExtractor.cs @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +using System.Globalization; +using System.IO; +using System.IO.Compression; +using System.Linq; + +/// +/// Extracts CPU sample data from perfcollect's perf.data.txt: +/// - .spgo file: one hex leaf IP per line for SPGO basic block attribution +/// - .callgraph file: callee_ip caller_ip pairs for Pettis-Hansen method layout +/// +static class SpgoExtractor +{ + public static int Extract(string traceZipPath, string outputPath) + { + if (!traceZipPath.EndsWith(".trace.zip", StringComparison.OrdinalIgnoreCase)) + { + Console.Error.WriteLine($"Input must be a .trace.zip file: {traceZipPath}"); + return 1; + } + + Console.WriteLine($"Extracting perf sample IPs from {traceZipPath}..."); + + using ZipArchive zip = ZipFile.OpenRead(traceZipPath); + ZipArchiveEntry? perfEntry = zip.Entries.FirstOrDefault(e => e.Name == "perf.data.txt"); + if (perfEntry == null) + { + Console.Error.WriteLine("No perf.data.txt found in .trace.zip"); + return 1; + } + + string callGraphPath = Path.ChangeExtension(outputPath, ".callgraph"); + + int sampleCount = 0; + int frameCount = 0; + int edgeCount = 0; + + using (Stream stream = perfEntry.Open()) + using (StreamReader reader = new StreamReader(stream)) + using (StreamWriter spgoWriter = new StreamWriter(outputPath)) + using (StreamWriter cgWriter = new StreamWriter(callGraphPath)) + { + cgWriter.WriteLine("# callee_ip caller_ip"); + + string? line; + while ((line = reader.ReadLine()) != null) + { + // Stack frame lines start with \t and contain a hex IP: + // 7dc812345678 MethodName+0x1a3 (/path/to/module) + if (line.Length > 0 && line[0] == '\t') + { + ReadOnlySpan trimmed = line.AsSpan().TrimStart(); + int spaceIdx = trimmed.IndexOf(' '); + if (spaceIdx > 0 && + ulong.TryParse(trimmed[..spaceIdx], NumberStyles.HexNumber, null, out _)) + { + frameCount++; + // Leaf IP for SPGO block attribution + string leafIp = trimmed[..spaceIdx].ToString(); + spgoWriter.WriteLine(leafIp); + sampleCount++; + + // Read caller frame (second frame) for call graph + line = reader.ReadLine(); + if (line != null && line.Length > 0 && line[0] == '\t') + { + frameCount++; + ReadOnlySpan callerTrimmed = line.AsSpan().TrimStart(); + int callerSpace = callerTrimmed.IndexOf(' '); + if (callerSpace > 0 && + ulong.TryParse(callerTrimmed[..callerSpace], NumberStyles.HexNumber, null, out _)) + { + cgWriter.Write(leafIp); + cgWriter.Write(' '); + cgWriter.WriteLine(callerTrimmed[..callerSpace]); + edgeCount++; + } + + // Skip remaining frames until next sample header + while ((line = reader.ReadLine()) != null && line.Length > 0 && line[0] == '\t') + frameCount++; + } + } + } + } + } + + Console.WriteLine($" {sampleCount:N0} samples extracted ({frameCount:N0} total frames)"); + Console.WriteLine($" {edgeCount:N0} caller-callee edges extracted"); + Console.WriteLine($" Output: {outputPath}"); + Console.WriteLine($" Output: {callGraphPath}"); + + return sampleCount > 0 ? 0 : 1; + } +} diff --git a/tools/PgoTrim/TraceConverter.cs b/tools/PgoTrim/TraceConverter.cs new file mode 100644 index 00000000000..a759bd0ed5d --- /dev/null +++ b/tools/PgoTrim/TraceConverter.cs @@ -0,0 +1,126 @@ +// SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only + +using System.Collections; +using System.Reflection; +using Microsoft.Diagnostics.Tracing; +using Microsoft.Diagnostics.Tracing.Etlx; +using Microsoft.Diagnostics.Tracing.Parsers; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; + +/// +/// Converts a perfcollect .trace.zip to .etlx with the missing MethodDetails CTF mapping. +/// Uses the same CreateFromLinuxEventSources path as TraceLog.CreateFromLttngTextDataFile +/// but with the MethodDetails mapping injected first. +/// +static class TraceConverter +{ + public static int Convert(string inputPath, string outputPath) + { + if (!inputPath.EndsWith(".trace.zip", StringComparison.OrdinalIgnoreCase)) + { + Console.Error.WriteLine($"Input must be a .trace.zip file: {inputPath}"); + return 1; + } + + string etlxPath = outputPath.EndsWith(".etlx", StringComparison.OrdinalIgnoreCase) + ? outputPath + : Path.ChangeExtension(outputPath, ".etlx"); + + Version? teVersion = typeof(TraceLog).Assembly.GetName().Version; + Console.WriteLine($"TraceEvent version: {teVersion}"); + Console.WriteLine($"Converting {inputPath} to {etlxPath}..."); + + using (CtfTraceEventSource ctfSource = new CtfTraceEventSource(inputPath)) + { + // Register CLR parser (populates _eventMapping with known mappings) + new ClrTraceEventParser(ctfSource); + + // Inject missing CTF mappings + // CtfEventMapping(eventName, providerGuid, opcode, id, version) + InjectCtfMapping(ctfSource, "DotNETRuntime:MethodDetails", + ClrTraceEventParser.ProviderGuid, opcode: 43, id: 72, version: 0); + // Use version=0 for the V1 event — TraceLog's template lookup matches + // on (providerGuid, eventId, opcode) and the only registered template + // for ID 190 is version 0. Version 1 events won't match if we use version=1. + InjectCtfMapping(ctfSource, "DotNETRuntime:MethodILToNativeMap_V1", + ClrTraceEventParser.ProviderGuid, opcode: 87, id: 190, version: 0); + + // Use the same path as CreateFromLttngTextDataFile: + // CreateFromLinuxEventSources(source, etlxPath, options) + // This is an internal method — call via reflection + Console.WriteLine("Processing events via CreateFromLinuxEventSources..."); + MethodInfo? method = typeof(TraceLog).GetMethod("CreateFromLinuxEventSources", + BindingFlags.NonPublic | BindingFlags.Static); + + if (method != null) + { + method.Invoke(null, [ctfSource, etlxPath, new TraceLogOptions { KeepAllEvents = true }]); + } + else + { + Console.Error.WriteLine("Error: Could not find CreateFromLinuxEventSources"); + return 1; + } + } + + // Verify the .etlx has the events dotnet-pgo needs + Console.WriteLine("Verifying via OpenOrConvert (same path as dotnet-pgo)..."); + using TraceLog traceLog = TraceLog.OpenOrConvert(etlxPath, new TraceLogOptions { KeepAllEvents = true }); + int totalMethodDetails = 0; + int totalMethodLoad = 0; + int totalJitStart = 0; + int totalILToNativeMap = 0; + + foreach (TraceProcess process in traceLog.Processes) + { + int md = process.EventsInProcess.ByEventType().Count(); + int ml = process.EventsInProcess.ByEventType().Count(); + int js = process.EventsInProcess.ByEventType().Count(); + int il = process.EventsInProcess.ByEventType().Count(); + totalMethodDetails += md; + totalMethodLoad += ml; + totalJitStart += js; + totalILToNativeMap += il; + + if (md > 0 || ml > 0) + { + Console.WriteLine($" {process.Name} (PID {process.ProcessID}): " + + $"MethodDetails={md:N0} MethodLoadVerbose={ml:N0} JittingStarted={js:N0} ILToNativeMap={il:N0}"); + } + } + + Console.WriteLine($"Total: MethodDetails={totalMethodDetails:N0} " + + $"MethodLoadVerbose={totalMethodLoad:N0} JittingStarted={totalJitStart:N0} " + + $"ILToNativeMap={totalILToNativeMap:N0}"); + Console.WriteLine($"Output: {etlxPath} ({new FileInfo(etlxPath).Length / 1024.0 / 1024.0:F1} MB)"); + + return totalMethodDetails > 0 ? 0 : 1; + } + + static void InjectCtfMapping(CtfTraceEventSource source, string eventName, + Guid providerGuid, int opcode, int id, int version) + { + FieldInfo? field = typeof(CtfTraceEventSource).GetField("_eventMapping", + BindingFlags.NonPublic | BindingFlags.Instance); + + if (field?.GetValue(source) is not IDictionary dict) + { + Console.Error.WriteLine("Warning: Could not access _eventMapping"); + return; + } + + Type? mappingType = typeof(CtfTraceEventSource).Assembly.GetTypes() + .FirstOrDefault(t => t.Name == "CtfEventMapping"); + ConstructorInfo? ctor = mappingType?.GetConstructors().FirstOrDefault(); + + if (ctor == null) + { + Console.Error.WriteLine("Warning: Could not find CtfEventMapping constructor"); + return; + } + + dict[eventName] = ctor.Invoke([eventName, providerGuid, opcode, id, version]); + Console.WriteLine($"Injected: {eventName} -> opcode={opcode}, id={id}"); + } +} diff --git a/tools/PgoTrim/dotnet-pgo-patches/NethermindPgoPatches.cs b/tools/PgoTrim/dotnet-pgo-patches/NethermindPgoPatches.cs new file mode 100644 index 00000000000..753f195dc75 --- /dev/null +++ b/tools/PgoTrim/dotnet-pgo-patches/NethermindPgoPatches.cs @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: 2026 Demerzel Solutions Limited +// SPDX-License-Identifier: LGPL-3.0-only +// +// Supplementary file compiled into dotnet-pgo to support Nethermind's PGO pipeline. +// Reads .spgo (leaf sample IPs) and .callgraph (caller-callee IP pairs) files +// produced by PgoTrim from perfcollect's perf.data.txt. +// +// Copied into the dotnet-pgo source tree at build time by collect-pgo-profile.yml. + +#nullable enable + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; + +using Internal.TypeSystem; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + internal static class NethermindPgoPatches + { + /// + /// Reads a .spgo file (one hex IP per line) and attributes each sample + /// to the correlator for SPGO basic block attribution. + /// + public static void LoadSpgoSamples(string etlxPath, SampleCorrelator correlator) + { + string spgoFile = Path.ChangeExtension(etlxPath, ".spgo"); + if (!File.Exists(spgoFile)) + return; + + int count = 0; + foreach (string line in File.ReadLines(spgoFile)) + { + if (ulong.TryParse(line.AsSpan().Trim(), NumberStyles.HexNumber, null, out ulong ip)) + { + correlator.AttributeSamplesToIP(ip, 1); + count++; + } + } + Program.PrintOutput($"Supplementary .spgo samples loaded: {count}"); + } + + /// + /// Reads a .callgraph file (callee_hex_ip caller_hex_ip per line) and + /// populates the call graph and exclusive sample counts for Pettis-Hansen. + /// + public static void LoadCallGraph( + string etlxPath, + MethodMemoryMap mmap, + Dictionary> callGraph, + Dictionary exclusiveSamples) + { + if (callGraph == null || exclusiveSamples == null || mmap == null) + return; + + string callGraphFile = Path.ChangeExtension(etlxPath, ".callgraph"); + if (!File.Exists(callGraphFile)) + return; + + int edgeCount = 0; + int sampleCount = 0; + + foreach (string line in File.ReadLines(callGraphFile)) + { + ReadOnlySpan span = line.AsSpan().Trim(); + if (span.IsEmpty || span[0] == '#') + continue; + + int space = span.IndexOf(' '); + if (space <= 0) + continue; + + if (!ulong.TryParse(span.Slice(0, space), NumberStyles.HexNumber, null, out ulong calleeIp)) + continue; + if (!ulong.TryParse(span.Slice(space + 1), NumberStyles.HexNumber, null, out ulong callerIp)) + continue; + + MethodDesc callee = mmap.GetMethod(calleeIp); + MethodDesc caller = mmap.GetMethod(callerIp); + + // Count exclusive samples for the callee (leaf of stack) + if (callee != null) + { + sampleCount++; + if (exclusiveSamples.TryGetValue(callee, out int count)) + exclusiveSamples[callee] = count + 1; + else + exclusiveSamples[callee] = 1; + } + + // Add caller->callee edge for Pettis-Hansen + if (callee != null && caller != null) + { + edgeCount++; + + if (!callGraph.TryGetValue(caller, out Dictionary? innerDict)) + { + innerDict = new Dictionary(); + callGraph[caller] = innerDict; + } + if (innerDict.TryGetValue(callee, out int edgeWeight)) + innerDict[callee] = edgeWeight + 1; + else + innerDict[callee] = 1; + } + } + + Program.PrintOutput($"Supplementary .callgraph loaded: {edgeCount:N0} edges, {sampleCount:N0} exclusive samples"); + } + + /// + /// Wraps SmoothAllProfiles with per-method try-catch so one method + /// with a disconnected flow graph doesn't kill the entire SPGO pass. + /// FlowSmoothing.MakeGraphFeasible crashes with "Stack empty" for some methods. + /// + public static void SafeSmoothAllProfiles(SampleCorrelator correlator) + { + // Access internal _methodInf via reflection since SmoothAllProfiles + // iterates it directly and we need per-method error handling. + System.Reflection.FieldInfo? field = typeof(SampleCorrelator).GetField("_methodInf", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); + if (field?.GetValue(correlator) is not System.Collections.IDictionary dict) + { + // Fallback: call original and let it throw if it throws + correlator.SmoothAllProfiles(); + return; + } + + foreach (System.Collections.DictionaryEntry entry in dict) + { + try + { + // PerMethodInfo.Profile.SmoothFlow() + object? pmi = entry.Value; + System.Reflection.PropertyInfo? profileProp = pmi?.GetType().GetProperty("Profile"); + object? profile = profileProp?.GetValue(pmi); + System.Reflection.MethodInfo? smoothMethod = profile?.GetType().GetMethod("SmoothFlow"); + smoothMethod?.Invoke(profile, null); + } + catch (Exception ex) + { + string innerMsg = ex.InnerException != null ? ex.InnerException.Message : ex.Message; + Console.Error.WriteLine($"Warning: SmoothFlow failed for method: {innerMsg}"); + } + } + } + } +}