Skip to content

Update solo-test.yml #13

Update solo-test.yml

Update solo-test.yml #13

Workflow file for this run

name: Solo Test Sandbox
on:
push:
branches:
- 4900-solo-reduce-json-rpc-relay-memory-footprint
permissions:
contents: read
jobs:
solo-test:
runs-on: hiero-smart-contracts-linux-large
timeout-minutes: 50
strategy:
fail-fast: false
matrix:
memory_limit: [1000Mi, 512Mi, 256Mi]
permissions:
contents: read
checks: write
env:
SOLO_CLUSTER_NAME: solo
SOLO_NAMESPACE: solo
SOLO_CLUSTER_SETUP_NAMESPACE: solo-cluster
SOLO_DEPLOYMENT: solo-deployment
name: Solo Test (${{ matrix.memory_limit }})
steps:
- name: Set env variables
run: |
if [ -n "${{ inputs.operator_id }}" ]; then
echo "OPERATOR_ID_MAIN=${{ inputs.operator_id }}" >> $GITHUB_ENV
fi
- name: Harden Runner
uses: step-security/harden-runner@5ef0c079ce82195b2a36a210272d6b661572d83e # v2.14.2
with:
egress-policy: audit
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
with:
node-version: 22
# Set up kind; needed for configuring the solo environment
- name: Setup Kind
uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
with:
install_only: true
node_image: kindest/node:v1.31.4@sha256:2cb39f7295fe7eafee0842b1052a599a4fb0f8bcf3f83d96c7f4864c357c6c30
version: v0.26.0
kubectl_version: v1.31.4
verbosity: 3
wait: 120s
- name: Install Solo
run: npm install -g @hashgraph/solo
- name: Configure and run solo
run: |
kind create cluster -n "${SOLO_CLUSTER_NAME}"
# metrics-server is not bundled with Kind; --kubelet-insecure-tls is required
# because Kind kubelets use self-signed certificates.
# See: https://github.com/kubernetes-sigs/metrics-server#requirements
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
kubectl patch deployment metrics-server -n kube-system \
--type=json \
-p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
kubectl rollout status deployment/metrics-server -n kube-system --timeout=120s
# initialize solo
solo init
solo cluster-ref config connect --cluster-ref kind-${SOLO_CLUSTER_NAME} --context kind-${SOLO_CLUSTER_NAME}
solo deployment config create -n "${SOLO_NAMESPACE}" --deployment "${SOLO_DEPLOYMENT}"
solo deployment cluster attach --deployment "${SOLO_DEPLOYMENT}" --cluster-ref kind-${SOLO_CLUSTER_NAME} --num-consensus-nodes 1
solo keys consensus generate --gossip-keys --tls-keys --deployment "${SOLO_DEPLOYMENT}"
solo cluster-ref config setup -s "${SOLO_CLUSTER_SETUP_NAMESPACE}"
solo consensus network deploy --deployment "${SOLO_DEPLOYMENT}"
solo consensus node setup --deployment "${SOLO_DEPLOYMENT}"
solo consensus node start --deployment "${SOLO_DEPLOYMENT}"
solo mirror node add --deployment "${SOLO_DEPLOYMENT}" --cluster-ref kind-${SOLO_CLUSTER_NAME} --enable-ingress --pinger
- name: Run Solo Relay
run: |
cat <<EOF > relay-resources.yaml
relay:
resources:
requests:
cpu: 0
memory: 0
limits:
cpu: 1100m
memory: ${{ matrix.memory_limit }}
EOF
cat relay-resources.yaml
solo relay node add -i node1 --deployment "${SOLO_DEPLOYMENT}" -f relay-resources.yaml
- name: check Relay Resources
run: |
echo "Describing Relay pod resources:"
kubectl -n "${SOLO_NAMESPACE}" describe pod relay-1
- name: Port-forward Consensus Node
run: |
sudo kill -9 $(sudo lsof -ti :50211) || true
kubectl port-forward -n "${SOLO_NAMESPACE}" network-node1-0 50211:50211 &
- name: Checkout Relay repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: 4900-solo-reduce-json-rpc-relay-memory-footprint
- name: Install packages
run: npm ci
- name: Build Relay
run: npm run build
- name: Record test start time
id: test-timing
run: echo "start=$(date +%s)" >> "$GITHUB_OUTPUT"
- name: Start json-rpc-relay-test-client
env:
CHAIN_ID: "0x12a"
MIRROR_NODE_URL: "http://127.0.0.1:8081"
HEDERA_NETWORK: '{"127.0.0.1:50211":"0.0.3"}'
OPERATOR_ID_MAIN: 0.0.2
OPERATOR_KEY_MAIN: 302e020100300506032b65700422042091132178e72057a1d7528025956fe39b0b847f200ab59b2fdd367017f3087137
REDIS_ENABLED: false
USE_ASYNC_TX_PROCESSING: false
E2E_RELAY_HOST: http://localhost:7546
SDK_LOG_LEVEL: trace
USE_INTERNAL_RELAY: false
run: npm run acceptancetest:xts
- name: Generate Report
if: always()
env:
MEMORY_LIMIT: ${{ matrix.memory_limit }}
TEST_STATUS: ${{ job.status }}
TEST_START: ${{ steps.test-timing.outputs.start }}
run: |
set +e # individual command failures must not abort the report
# ── Wall-clock test duration ──────────────────────────────
END_TIME=$(date +%s)
WALL_SECS=$(( END_TIME - ${TEST_START:-$END_TIME} ))
WALL_TIME="$(( WALL_SECS / 60 ))m $(( WALL_SECS % 60 ))s"
# ── Test results from JUnit XML (Mocha format) ───────────
# Parse the root <testsuites> element which carries aggregated totals.
# Uses awk with '"' delimiter — portable, no PCRE dependency.
TOTAL=0; PASSED=0; FAILURES=0; DURATION_S="0.0"
if compgen -G "test-results.*.xml" > /dev/null 2>&1; then
TOTAL=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / tests=$/) s+=$(i+1) } END { print s+0 }' test-results.*.xml)
FAILURES=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / failures=$/) s+=$(i+1) } END { print s+0 }' test-results.*.xml)
DURATION_S=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / time=$/) s+=$(i+1) } END { printf "%.1f", s }' test-results.*.xml)
PASSED=$(( TOTAL - FAILURES ))
fi
# ── TPS estimate (test-cases / wall-clock seconds) ───────
TPS="N/A"
if [ "$TOTAL" -gt 0 ] 2>/dev/null && [ "$WALL_SECS" -gt 0 ] 2>/dev/null; then
TPS=$(awk "BEGIN { printf \"%.2f\", $TOTAL / $WALL_SECS }")
fi
# ── Pod resource snapshot via metrics-server ─────────────
# Capture both header and no-header forms before the heredoc.
# Embedded $(...) inside a heredoc runs in a subshell that does not
# inherit the runner environment reliably; pre-computing avoids that.
TOP_FULL=$(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1) || TOP_FULL="(kubectl top unavailable)"
TOP_OUTPUT=$(echo "$TOP_FULL" | tail -n +2) # strip header row for parsing
# ── Relay-specific metrics from kubectl top ──────────────
RELAY_CPU="N/A"; RELAY_MEM="N/A"
RELAY_WS_CPU="N/A"; RELAY_WS_MEM="N/A"
RELAY_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-' | grep -v -- '-ws-' | head -1)
RELAY_WS_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-ws-' | head -1)
[ -n "$RELAY_LINE" ] && RELAY_CPU=$(echo "$RELAY_LINE" | awk '{print $2}') \
&& RELAY_MEM=$(echo "$RELAY_LINE" | awk '{print $3}')
[ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" | awk '{print $2}') \
&& RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" | awk '{print $3}')
# ── Consensus node metrics ───────────────────────────────
NODE_CPU="N/A"; NODE_MEM="N/A"
NODE_LINE=$(echo "$TOP_OUTPUT" | grep -E '^network-node' | head -1)
[ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" | awk '{print $2}') \
&& NODE_MEM=$(echo "$NODE_LINE" | awk '{print $3}')
# ── OOMKill detection ────────────────────────────────────
OOM_STATUS="None detected"
OOM_PODS=$(kubectl get pods -n "${SOLO_NAMESPACE}" -o json 2>/dev/null | python3 -c "
import sys, json
data = json.load(sys.stdin)
oom = set()
for p in data.get('items', []):
for c in p.get('status', {}).get('containerStatuses', []):
for sk in ('state', 'lastState'):
if c.get(sk, {}).get('terminated', {}).get('reason') == 'OOMKilled':
oom.add(p['metadata']['name'])
print(', '.join(sorted(oom)) if oom else '')
" 2>/dev/null) || OOM_PODS=""
[ -n "$OOM_PODS" ] && OOM_STATUS="OOMKilled: $OOM_PODS"
# ── Relay restart count ──────────────────────────────────
RELAY_RESTARTS=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers 2>/dev/null \
| awk '/^relay-/ { sum += $4 } END { print sum+0 }') || RELAY_RESTARTS="N/A"
# ── Relay pod resource limits from spec ──────────────────
RELAY_POD=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers \
-o custom-columns=":metadata.name" 2>/dev/null \
| grep -E '^relay-[0-9]+-[^w]' | head -1) || RELAY_POD=""
CONFIGURED_LIMITS="(not found)"
if [ -n "$RELAY_POD" ]; then
CONFIGURED_LIMITS=$(kubectl get pod "$RELAY_POD" -n "${SOLO_NAMESPACE}" \
-o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' \
2>/dev/null) || CONFIGURED_LIMITS="(query failed)"
fi
# ── Echo all KPIs to step log (always visible regardless of summary write) ──
echo "============================================"
echo " Solo Memory Benchmark — ${MEMORY_LIMIT}"
echo "============================================"
echo " Job Status : ${TEST_STATUS}"
echo " Wall-Clock : ${WALL_TIME}"
echo " Tests Total : ${TOTAL}"
echo " Tests Passed : ${PASSED}"
echo " Tests Failed : ${FAILURES}"
echo " Suite Time(s) : ${DURATION_S}"
echo " Est. TPS : ${TPS}"
echo "--------------------------------------------"
echo " Relay (rpc) : CPU=${RELAY_CPU} MEM=${RELAY_MEM} LIMIT=${MEMORY_LIMIT}"
echo " Relay (ws) : CPU=${RELAY_WS_CPU} MEM=${RELAY_WS_MEM}"
echo " Consensus : CPU=${NODE_CPU} MEM=${NODE_MEM}"
echo " Relay Limits : ${CONFIGURED_LIMITS}"
echo " OOMKills : ${OOM_STATUS}"
echo " Restarts : ${RELAY_RESTARTS}"
echo "============================================"
echo ""
echo "==> All pod resources:"
echo "${TOP_FULL}"
# ── Write GitHub Job Summary ─────────────────────────────
# All variables are pre-computed; no command substitutions inside
# the heredoc to avoid subshell environment inheritance issues.
# Use printf to append each section: avoids heredoc quoting/escaping
# pitfalls and makes each write independently verifiable.
printf '## Solo Memory Benchmark — `%s`\n\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
printf '| Key | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Memory Limit | `%s` |\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
printf '| Runner | `hiero-smart-contracts-linux-large` |\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Test Suite | `acceptancetest:xts` |\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Job Status | `%s` |\n' "${TEST_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
printf '| Wall-Clock Duration | `%s` |\n\n' "${WALL_TIME}" >> "${GITHUB_STEP_SUMMARY}"
printf '### Test Results\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Total | Passed | Failed | Suite Duration (s) | Est. TPS |\n' >> "${GITHUB_STEP_SUMMARY}"
printf '|:---:|:---:|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| %s | %s | %s | %s | %s |\n\n' \
"${TOTAL}" "${PASSED}" "${FAILURES}" "${DURATION_S}" "${TPS}" >> "${GITHUB_STEP_SUMMARY}"
printf '### Relay Resource Consumption (snapshot at test end)\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Component | CPU | Memory (RSS) | Configured Limit |\n' >> "${GITHUB_STEP_SUMMARY}"
printf '|---|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| relay (rpc) | %s | %s | `%s` |\n' "${RELAY_CPU}" "${RELAY_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
printf '| relay (ws) | %s | %s | `%s` |\n\n' "${RELAY_WS_CPU}" "${RELAY_WS_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
printf '### Consensus Node Resources\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Component | CPU | Memory |\n|---|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| network-node1 | %s | %s |\n\n' "${NODE_CPU}" "${NODE_MEM}" >> "${GITHUB_STEP_SUMMARY}"
printf '### Health & Stability\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| Metric | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}"
printf '| OOMKill Events | %s |\n' "${OOM_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
printf '| Relay Restart Count | %s |\n\n' "${RELAY_RESTARTS}" >> "${GITHUB_STEP_SUMMARY}"
printf '<details><summary>All Pod Resources (<code>kubectl top</code>)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
"${TOP_FULL}" >> "${GITHUB_STEP_SUMMARY}"
printf '<details><summary>Relay Container Limits (from pod spec)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
"${CONFIGURED_LIMITS}" >> "${GITHUB_STEP_SUMMARY}"
printf '---\n> **Reading guide:**\n> - Relay Memory ≈ limit → under pressure; OOMKills likely at higher load.\n> - Relay Memory ≪ limit → room for further reduction.\n> - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.\n' \
>> "${GITHUB_STEP_SUMMARY}"
echo "Report written to GitHub Job Summary."