Skip to content

Commit 971cfdb

Browse files
Bloxsterclaude
andcommitted
ci(docs): auto-update hardware requirements disk sizes from sync CI
Adds a CI pipeline that automatically measures and publishes up-to-date disk usage figures for the hardware requirements docs page. - qa-sync-from-scratch: measure `du -sb` before data dir cleanup, upload per-chain artifact (disk-usage-<chain>-full.txt) - qa-sync-from-scratch (minimal node): same, artifact named -minimal - update-disk-sizes.yml: new workflow triggered by workflow_run on both sync workflows; downloads artifacts, runs update-disk-sizes.py, pushes to docs/auto/disk-sizes branch and opens a draft PR - docs/site/src/data/disk-sizes.json: single source of truth for disk usage values (full/minimal updated by CI; archive remains manual) - docs/site/scripts/update-disk-sizes.py: merges artifact bytes into the JSON, formatting SI units (GB/TB) - hardware-requirements.mdx: imports disk-sizes.json and renders values dynamically; removes the static dated :::info note Archive node measurements are out of scope for this PR and will be added in a follow-up once snapshot machine access is confirmed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 99d9217 commit 971cfdb

6 files changed

Lines changed: 319 additions & 10 deletions

File tree

.github/workflows/qa-sync-from-scratch-minimal-node.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,20 @@ jobs:
118118
name: fd-leak-analysis-${{ env.CHAIN }}
119119
path: ${{ github.workspace }}/fd-leak-analysis-${{ env.CHAIN }}.md
120120

121+
- name: Measure disk usage
122+
if: ${{ always() && steps.test_step.outputs.test_executed == 'true' }}
123+
run: |
124+
BYTES=$(du -sb $ERIGON_DATA_DIR | awk '{print $1}')
125+
echo "Disk usage for $CHAIN minimal: ${BYTES} bytes"
126+
echo "$BYTES" > ${{ github.workspace }}/disk-usage-${{ env.CHAIN }}-minimal.txt
127+
128+
- name: Upload disk usage
129+
if: ${{ always() && steps.test_step.outputs.test_executed == 'true' }}
130+
uses: actions/upload-artifact@v6
131+
with:
132+
name: disk-usage-${{ env.CHAIN }}-minimal
133+
path: ${{ github.workspace }}/disk-usage-${{ env.CHAIN }}-minimal.txt
134+
121135
- name: Clean up Erigon data directory
122136
if: always()
123137
run: |

.github/workflows/qa-sync-from-scratch.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,20 @@ jobs:
133133
name: fd-leak-analysis-${{ env.CHAIN }}
134134
path: ${{ github.workspace }}/fd-leak-analysis-${{ env.CHAIN }}.md
135135

136+
- name: Measure disk usage
137+
if: ${{ always() && steps.test_step.outputs.test_executed == 'true' }}
138+
run: |
139+
BYTES=$(du -sb $ERIGON_DATA_DIR | awk '{print $1}')
140+
echo "Disk usage for $CHAIN full: ${BYTES} bytes"
141+
echo "$BYTES" > ${{ github.workspace }}/disk-usage-${{ env.CHAIN }}-full.txt
142+
143+
- name: Upload disk usage
144+
if: ${{ always() && steps.test_step.outputs.test_executed == 'true' }}
145+
uses: actions/upload-artifact@v6
146+
with:
147+
name: disk-usage-${{ env.CHAIN }}-full
148+
path: ${{ github.workspace }}/disk-usage-${{ env.CHAIN }}-full.txt
149+
136150
- name: Clean up Erigon data directory
137151
if: always()
138152
run: |
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
name: Docs - Update disk sizes
2+
3+
# Triggers after a successful sync-from-scratch run (full or minimal node).
4+
# Downloads the disk-usage artifacts, updates docs/site/src/data/disk-sizes.json,
5+
# and opens (or updates) a draft PR against main.
6+
on:
7+
workflow_run:
8+
workflows:
9+
- "QA - Sync from scratch"
10+
- "QA - Sync from scratch (minimal node)"
11+
types:
12+
- completed
13+
branches:
14+
- release/3.*
15+
- main
16+
workflow_dispatch:
17+
inputs:
18+
run_id:
19+
description: "Workflow run ID to pull disk-usage artifacts from"
20+
required: true
21+
prune_mode:
22+
description: "Prune mode measured in that run"
23+
required: true
24+
default: "full"
25+
type: choice
26+
options:
27+
- full
28+
- minimal
29+
30+
permissions:
31+
contents: write
32+
pull-requests: write
33+
actions: read
34+
35+
jobs:
36+
update-disk-sizes:
37+
# For workflow_run: only proceed on success.
38+
# For workflow_dispatch: always proceed.
39+
if: >
40+
github.event_name == 'workflow_dispatch' ||
41+
github.event.workflow_run.conclusion == 'success'
42+
runs-on: ubuntu-latest
43+
44+
steps:
45+
- name: Checkout main
46+
uses: actions/checkout@v6
47+
with:
48+
ref: main
49+
50+
- name: Determine prune mode and source run ID
51+
run: |
52+
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
53+
echo "PRUNE_MODE=${{ github.event.inputs.prune_mode }}" >> $GITHUB_ENV
54+
echo "SOURCE_RUN_ID=${{ github.event.inputs.run_id }}" >> $GITHUB_ENV
55+
elif [[ "${{ github.event.workflow_run.name }}" == *"minimal"* ]]; then
56+
echo "PRUNE_MODE=minimal" >> $GITHUB_ENV
57+
echo "SOURCE_RUN_ID=${{ github.event.workflow_run.id }}" >> $GITHUB_ENV
58+
else
59+
echo "PRUNE_MODE=full" >> $GITHUB_ENV
60+
echo "SOURCE_RUN_ID=${{ github.event.workflow_run.id }}" >> $GITHUB_ENV
61+
fi
62+
63+
- name: Download disk usage artifacts
64+
uses: actions/download-artifact@v4
65+
with:
66+
run-id: ${{ env.SOURCE_RUN_ID }}
67+
github-token: ${{ secrets.GITHUB_TOKEN }}
68+
pattern: disk-usage-*
69+
path: ./artifacts
70+
merge-multiple: true
71+
72+
- name: List downloaded artifacts
73+
run: ls -lh ./artifacts/ 2>/dev/null || echo "No artifacts found"
74+
75+
- name: Set up Python
76+
uses: actions/setup-python@v5
77+
with:
78+
python-version: "3.x"
79+
80+
- name: Update disk-sizes.json
81+
run: |
82+
python3 docs/site/scripts/update-disk-sizes.py \
83+
./artifacts \
84+
docs/site/src/data/disk-sizes.json \
85+
${{ env.PRUNE_MODE }}
86+
87+
- name: Check for changes
88+
id: diff
89+
run: |
90+
if git diff --quiet docs/site/src/data/disk-sizes.json; then
91+
echo "changed=false" >> $GITHUB_OUTPUT
92+
echo "No changes to disk-sizes.json"
93+
else
94+
echo "changed=true" >> $GITHUB_OUTPUT
95+
git diff docs/site/src/data/disk-sizes.json
96+
fi
97+
98+
- name: Configure git
99+
if: steps.diff.outputs.changed == 'true'
100+
run: |
101+
git config user.name "github-actions[bot]"
102+
git config user.email "github-actions[bot]@users.noreply.github.com"
103+
104+
- name: Commit and push to update branch
105+
if: steps.diff.outputs.changed == 'true'
106+
run: |
107+
BRANCH="docs/auto/disk-sizes"
108+
git fetch origin $BRANCH 2>/dev/null || true
109+
git checkout -B $BRANCH
110+
git add docs/site/src/data/disk-sizes.json
111+
git commit -m "chore(docs): auto-update measured disk sizes [$(date +%Y-%m-%d)]"
112+
git push --force origin $BRANCH
113+
114+
- name: Create or update draft PR
115+
if: steps.diff.outputs.changed == 'true'
116+
env:
117+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
118+
run: |
119+
BRANCH="docs/auto/disk-sizes"
120+
TODAY=$(date +%Y-%m-%d)
121+
EXISTING=$(gh pr list --head $BRANCH --state open --json number --jq '.[0].number' 2>/dev/null)
122+
123+
if [ -z "$EXISTING" ]; then
124+
gh pr create \
125+
--draft \
126+
--head "$BRANCH" \
127+
--base main \
128+
--title "docs: auto-update disk size measurements ($TODAY)" \
129+
--body "$(cat <<'EOF'
130+
## Automated disk size update
131+
132+
Opened automatically by the **Docs - Update disk sizes** CI workflow after a successful sync-from-scratch run.
133+
134+
### What changed
135+
`docs/site/src/data/disk-sizes.json` — `full` and/or `minimal` mode disk usage updated from the latest CI measurement. The hardware requirements page consumes this file directly, so merging this PR is all that's needed.
136+
137+
### What this does NOT update
138+
- **Archive** mode: requires manual measurement from always-on snapshot machines.
139+
- **Recommended disk size** column: intentionally static (should remain a comfortable buffer above current usage).
140+
141+
### Before merging
142+
- Values should show gradual growth vs. the previous entry — a sudden drop or doubling is suspect.
143+
- If both `full` and `minimal` runs completed today, both sets of chains are included (the branch is force-pushed by each run).
144+
145+
🤖 Auto-generated by [update-disk-sizes.yml](/.github/workflows/update-disk-sizes.yml)
146+
EOF
147+
)"
148+
echo "Created new draft PR for branch $BRANCH"
149+
else
150+
echo "PR #$EXISTING already open for branch $BRANCH — branch updated via force-push"
151+
fi

docs/site/docs/get-started/hardware-requirements.mdx

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ sidebar_position: 2
66

77
import Tabs from '@theme/Tabs';
88
import TabItem from '@theme/TabItem';
9+
import diskSizes from '@site/src/data/disk-sizes.json';
910

1011
# Hardware Requirements
1112

@@ -26,25 +27,21 @@ A locally mounted **SSD** (Solid-State Drive) or **NVMe** (Non-Volatile Memory E
2627

2728
The amount of disk space recommended and RAM you need depends on the [sync mode](../fundamentals/sync-modes) you want to run. **Current Disk Usage** values listed below are obtained using the standard Erigon + [Caplin](../fundamentals/caplin) configuration, with the sole exception of the `--prune.mode` flag.
2829

29-
:::info
30-
Please note that these figures are from September 2025, and the blockchain data size is continuously increasing.
31-
:::
32-
3330
<Tabs>
3431
<TabItem value="ethereum-mainnet" label="Ethereum mainnet">
3532
| Sync Mode | Current Disk Usage | Disk Size (Recommended) | RAM (Required) | RAM (Recommended) |
3633
| --- | --- | --- | --- | --- |
37-
| Archive | 1.77 TB | 4 TB | 32 GB | 64 GB |
38-
| Full (Default) | 920 GB | 2 TB | 16 GB | 32 GB |
39-
| Minimal | 350 GB | 1 TB | 16 GB | 64 GB |
34+
| Archive | {diskSizes.networks.mainnet.archive.display} | 4 TB | 32 GB | 64 GB |
35+
| Full (Default) | {diskSizes.networks.mainnet.full.display} | 2 TB | 16 GB | 32 GB |
36+
| Minimal | {diskSizes.networks.mainnet.minimal.display} | 1 TB | 16 GB | 64 GB |
4037

4138
</TabItem>
4239
<TabItem value="gnosis-chain" label="Gnosis Chain">
4340
| **Sync Mode** | **Current Disk Usage** | **Disk Size (Recommended)** | **RAM (Required)** | **RAM (Recommended)** |
4441
| -------------- | ---------------------- | --------------------------- | ------------------ | --------------------- |
45-
| Archive | 539 GB | 1 TB | 16 GB | 32 GB |
46-
| Full (Default) | 462 GB | 1 TB | 8 GB | 16 GB |
47-
| Minimal | 128 GB | 500 GB | 8 GB | 16 GB |
42+
| Archive | {diskSizes.networks.gnosis.archive.display} | 1 TB | 16 GB | 32 GB |
43+
| Full (Default) | {diskSizes.networks.gnosis.full.display} | 1 TB | 8 GB | 16 GB |
44+
| Minimal | {diskSizes.networks.gnosis.minimal.display} | 500 GB | 8 GB | 16 GB |
4845
</TabItem>
4946
<TabItem value="polygon" label="Polygon">
5047
:::warning
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/usr/bin/env python3
2+
"""Update docs/site/src/data/disk-sizes.json from CI artifact data.
3+
4+
Called by the update-disk-sizes GH Actions workflow after a successful
5+
sync-from-scratch run. Reads disk-usage-<chain>-<mode>.txt files produced
6+
by the sync workflows and updates the corresponding entries in the JSON.
7+
8+
Usage:
9+
python3 update-disk-sizes.py <artifacts-dir> <json-path> <prune-mode>
10+
11+
Arguments:
12+
artifacts-dir Directory containing disk-usage-<chain>-<mode>.txt files
13+
json-path Path to disk-sizes.json
14+
prune-mode 'full' or 'minimal'
15+
"""
16+
import json
17+
import sys
18+
from datetime import date
19+
from pathlib import Path
20+
21+
22+
def format_bytes(b: int) -> str:
23+
"""Format bytes to human-readable SI units (powers of 1000)."""
24+
if b >= 1_000_000_000_000:
25+
return f"{b / 1_000_000_000_000:.2f} TB"
26+
if b >= 1_000_000_000:
27+
return f"{round(b / 1_000_000_000)} GB"
28+
return f"{round(b / 1_000_000)} MB"
29+
30+
31+
def main() -> None:
32+
if len(sys.argv) != 4:
33+
print(f"Usage: {sys.argv[0]} <artifacts-dir> <json-path> <prune-mode>")
34+
sys.exit(1)
35+
36+
artifacts_dir = Path(sys.argv[1])
37+
json_path = Path(sys.argv[2])
38+
prune_mode = sys.argv[3]
39+
40+
if prune_mode not in ("full", "minimal"):
41+
print(f"Error: prune-mode must be 'full' or 'minimal', got {prune_mode!r}")
42+
sys.exit(1)
43+
44+
today = date.today().isoformat()
45+
46+
with open(json_path) as f:
47+
data = json.load(f)
48+
49+
updated = False
50+
for artifact_file in sorted(artifacts_dir.glob(f"disk-usage-*-{prune_mode}.txt")):
51+
# filename: disk-usage-<chain>-<mode>.txt e.g. disk-usage-mainnet-full.txt
52+
stem = artifact_file.stem # disk-usage-mainnet-full
53+
inner = stem[len("disk-usage-"):] # mainnet-full
54+
chain = inner[:-(len(prune_mode) + 1)] # mainnet
55+
56+
if chain not in data["networks"]:
57+
print(f"Skipping unknown chain: {chain!r}")
58+
continue
59+
60+
raw = artifact_file.read_text().strip()
61+
try:
62+
bytes_val = int(raw)
63+
except ValueError:
64+
print(f"Could not parse bytes from {artifact_file}: {raw!r}")
65+
continue
66+
67+
data["networks"][chain][prune_mode] = {
68+
"bytes": bytes_val,
69+
"display": format_bytes(bytes_val),
70+
"measured_at": today,
71+
"source": "ci",
72+
}
73+
print(f"Updated {chain}/{prune_mode}: {format_bytes(bytes_val)} ({bytes_val:,} bytes)")
74+
updated = True
75+
76+
if not updated:
77+
print("No updates made — no matching artifact files found")
78+
return
79+
80+
data["ci_last_updated"] = today
81+
with open(json_path, "w") as f:
82+
json.dump(data, f, indent=2)
83+
f.write("\n")
84+
print(f"Wrote {json_path}")
85+
86+
87+
if __name__ == "__main__":
88+
main()

docs/site/src/data/disk-sizes.json

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"ci_last_updated": "2025-09-01",
3+
"networks": {
4+
"mainnet": {
5+
"archive": {
6+
"bytes": null,
7+
"display": "1.77 TB",
8+
"measured_at": "2025-09-01",
9+
"source": "manual"
10+
},
11+
"full": {
12+
"bytes": null,
13+
"display": "920 GB",
14+
"measured_at": "2025-09-01",
15+
"source": "manual"
16+
},
17+
"minimal": {
18+
"bytes": null,
19+
"display": "350 GB",
20+
"measured_at": "2025-09-01",
21+
"source": "manual"
22+
}
23+
},
24+
"gnosis": {
25+
"archive": {
26+
"bytes": null,
27+
"display": "539 GB",
28+
"measured_at": "2025-09-01",
29+
"source": "manual"
30+
},
31+
"full": {
32+
"bytes": null,
33+
"display": "462 GB",
34+
"measured_at": "2025-09-01",
35+
"source": "manual"
36+
},
37+
"minimal": {
38+
"bytes": null,
39+
"display": "128 GB",
40+
"measured_at": "2025-09-01",
41+
"source": "manual"
42+
}
43+
}
44+
}
45+
}

0 commit comments

Comments
 (0)