Update solo-test.yml #13

Workflow file for this run

.github/workflows/solo-test.yml at 1a8510c

	name: Solo Test Sandbox

	on:
	push:
	branches:
	- 4900-solo-reduce-json-rpc-relay-memory-footprint

	permissions:
	contents: read

	jobs:
	solo-test:
	runs-on: hiero-smart-contracts-linux-large
	timeout-minutes: 50
	strategy:
	fail-fast: false
	matrix:
	memory_limit: [1000Mi, 512Mi, 256Mi]
	permissions:
	contents: read
	checks: write
	env:
	SOLO_CLUSTER_NAME: solo
	SOLO_NAMESPACE: solo
	SOLO_CLUSTER_SETUP_NAMESPACE: solo-cluster
	SOLO_DEPLOYMENT: solo-deployment
	name: Solo Test (${{ matrix.memory_limit }})
	steps:
	- name: Set env variables
	run: \|
	if [ -n "${{ inputs.operator_id }}" ]; then
	echo "OPERATOR_ID_MAIN=${{ inputs.operator_id }}" >> $GITHUB_ENV
	fi
	- name: Harden Runner
	uses: step-security/harden-runner@5ef0c079ce82195b2a36a210272d6b661572d83e # v2.14.2
	with:
	egress-policy: audit

	- name: Checkout Code
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

	- name: Setup node
	uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
	with:
	node-version: 22

	# Set up kind; needed for configuring the solo environment
	- name: Setup Kind
	uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
	with:
	install_only: true
	node_image: kindest/node:v1.31.4@sha256:2cb39f7295fe7eafee0842b1052a599a4fb0f8bcf3f83d96c7f4864c357c6c30
	version: v0.26.0
	kubectl_version: v1.31.4
	verbosity: 3
	wait: 120s

	- name: Install Solo
	run: npm install -g @hashgraph/solo

	- name: Configure and run solo
	run: \|
	kind create cluster -n "${SOLO_CLUSTER_NAME}"

	# metrics-server is not bundled with Kind; --kubelet-insecure-tls is required
	# because Kind kubelets use self-signed certificates.
	# See: https://github.com/kubernetes-sigs/metrics-server#requirements
	kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
	kubectl patch deployment metrics-server -n kube-system \
	--type=json \
	-p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
	kubectl rollout status deployment/metrics-server -n kube-system --timeout=120s

	# initialize solo
	solo init
	solo cluster-ref config connect --cluster-ref kind-${SOLO_CLUSTER_NAME} --context kind-${SOLO_CLUSTER_NAME}
	solo deployment config create -n "${SOLO_NAMESPACE}" --deployment "${SOLO_DEPLOYMENT}"
	solo deployment cluster attach --deployment "${SOLO_DEPLOYMENT}" --cluster-ref kind-${SOLO_CLUSTER_NAME} --num-consensus-nodes 1
	solo keys consensus generate --gossip-keys --tls-keys --deployment "${SOLO_DEPLOYMENT}"
	solo cluster-ref config setup -s "${SOLO_CLUSTER_SETUP_NAMESPACE}"
	solo consensus network deploy --deployment "${SOLO_DEPLOYMENT}"
	solo consensus node setup --deployment "${SOLO_DEPLOYMENT}"
	solo consensus node start --deployment "${SOLO_DEPLOYMENT}"
	solo mirror node add --deployment "${SOLO_DEPLOYMENT}" --cluster-ref kind-${SOLO_CLUSTER_NAME} --enable-ingress --pinger

	- name: Run Solo Relay
	run: \|
	cat <<EOF > relay-resources.yaml
	relay:
	resources:
	requests:
	cpu: 0
	memory: 0
	limits:
	cpu: 1100m
	memory: ${{ matrix.memory_limit }}
	EOF
	cat relay-resources.yaml
	solo relay node add -i node1 --deployment "${SOLO_DEPLOYMENT}" -f relay-resources.yaml

	- name: check Relay Resources
	run: \|
	echo "Describing Relay pod resources:"
	kubectl -n "${SOLO_NAMESPACE}" describe pod relay-1

	- name: Port-forward Consensus Node
	run: \|
	sudo kill -9 $(sudo lsof -ti :50211) \|\| true
	kubectl port-forward -n "${SOLO_NAMESPACE}" network-node1-0 50211:50211 &

	- name: Checkout Relay repo
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	ref: 4900-solo-reduce-json-rpc-relay-memory-footprint

	- name: Install packages
	run: npm ci

	- name: Build Relay
	run: npm run build

	- name: Record test start time
	id: test-timing
	run: echo "start=$(date +%s)" >> "$GITHUB_OUTPUT"

	- name: Start json-rpc-relay-test-client
	env:
	CHAIN_ID: "0x12a"
	MIRROR_NODE_URL: "http://127.0.0.1:8081"
	HEDERA_NETWORK: '{"127.0.0.1:50211":"0.0.3"}'
	OPERATOR_ID_MAIN: 0.0.2
	OPERATOR_KEY_MAIN: 302e020100300506032b65700422042091132178e72057a1d7528025956fe39b0b847f200ab59b2fdd367017f3087137
	REDIS_ENABLED: false
	USE_ASYNC_TX_PROCESSING: false
	E2E_RELAY_HOST: http://localhost:7546
	SDK_LOG_LEVEL: trace
	USE_INTERNAL_RELAY: false
	run: npm run acceptancetest:xts

	- name: Generate Report
	if: always()
	env:
	MEMORY_LIMIT: ${{ matrix.memory_limit }}
	TEST_STATUS: ${{ job.status }}
	TEST_START: ${{ steps.test-timing.outputs.start }}
	run: \|
	set +e # individual command failures must not abort the report

	# ── Wall-clock test duration ──────────────────────────────
	END_TIME=$(date +%s)
	WALL_SECS=$(( END_TIME - ${TEST_START:-$END_TIME} ))
	WALL_TIME="$(( WALL_SECS / 60 ))m $(( WALL_SECS % 60 ))s"

	# ── Test results from JUnit XML (Mocha format) ───────────
	# Parse the root <testsuites> element which carries aggregated totals.
	# Uses awk with '"' delimiter — portable, no PCRE dependency.
	TOTAL=0; PASSED=0; FAILURES=0; DURATION_S="0.0"
	if compgen -G "test-results.*.xml" > /dev/null 2>&1; then
	TOTAL=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / tests=$/) s+=$(i+1) } END { print s+0 }' test-results.*.xml)
	FAILURES=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / failures=$/) s+=$(i+1) } END { print s+0 }' test-results.*.xml)
	DURATION_S=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / time=$/) s+=$(i+1) } END { printf "%.1f", s }' test-results.*.xml)
	PASSED=$(( TOTAL - FAILURES ))
	fi

	# ── TPS estimate (test-cases / wall-clock seconds) ───────
	TPS="N/A"
	if [ "$TOTAL" -gt 0 ] 2>/dev/null && [ "$WALL_SECS" -gt 0 ] 2>/dev/null; then
	TPS=$(awk "BEGIN { printf \"%.2f\", $TOTAL / $WALL_SECS }")
	fi

	# ── Pod resource snapshot via metrics-server ─────────────
	# Capture both header and no-header forms before the heredoc.
	# Embedded $(...) inside a heredoc runs in a subshell that does not
	# inherit the runner environment reliably; pre-computing avoids that.
	TOP_FULL=$(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1) \|\| TOP_FULL="(kubectl top unavailable)"
	TOP_OUTPUT=$(echo "$TOP_FULL" \| tail -n +2) # strip header row for parsing

	# ── Relay-specific metrics from kubectl top ──────────────
	RELAY_CPU="N/A"; RELAY_MEM="N/A"
	RELAY_WS_CPU="N/A"; RELAY_WS_MEM="N/A"
	RELAY_LINE=$(echo "$TOP_OUTPUT" \| grep -E '^relay-[0-9]+-' \| grep -v -- '-ws-' \| head -1)
	RELAY_WS_LINE=$(echo "$TOP_OUTPUT" \| grep -E '^relay-[0-9]+-ws-' \| head -1)
	[ -n "$RELAY_LINE" ] && RELAY_CPU=$(echo "$RELAY_LINE" \| awk '{print $2}') \
	&& RELAY_MEM=$(echo "$RELAY_LINE" \| awk '{print $3}')
	[ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" \| awk '{print $2}') \
	&& RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" \| awk '{print $3}')

	# ── Consensus node metrics ───────────────────────────────
	NODE_CPU="N/A"; NODE_MEM="N/A"
	NODE_LINE=$(echo "$TOP_OUTPUT" \| grep -E '^network-node' \| head -1)
	[ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" \| awk '{print $2}') \
	&& NODE_MEM=$(echo "$NODE_LINE" \| awk '{print $3}')

	# ── OOMKill detection ────────────────────────────────────
	OOM_STATUS="None detected"
	OOM_PODS=$(kubectl get pods -n "${SOLO_NAMESPACE}" -o json 2>/dev/null \| python3 -c "
	import sys, json
	data = json.load(sys.stdin)
	oom = set()
	for p in data.get('items', []):
	for c in p.get('status', {}).get('containerStatuses', []):
	for sk in ('state', 'lastState'):
	if c.get(sk, {}).get('terminated', {}).get('reason') == 'OOMKilled':
	oom.add(p['metadata']['name'])
	print(', '.join(sorted(oom)) if oom else '')
	" 2>/dev/null) \|\| OOM_PODS=""
	[ -n "$OOM_PODS" ] && OOM_STATUS="OOMKilled: $OOM_PODS"

	# ── Relay restart count ──────────────────────────────────
	RELAY_RESTARTS=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers 2>/dev/null \
	\| awk '/^relay-/ { sum += $4 } END { print sum+0 }') \|\| RELAY_RESTARTS="N/A"

	# ── Relay pod resource limits from spec ──────────────────
	RELAY_POD=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers \
	-o custom-columns=":metadata.name" 2>/dev/null \
	\| grep -E '^relay-[0-9]+-[^w]' \| head -1) \|\| RELAY_POD=""
	CONFIGURED_LIMITS="(not found)"
	if [ -n "$RELAY_POD" ]; then
	CONFIGURED_LIMITS=$(kubectl get pod "$RELAY_POD" -n "${SOLO_NAMESPACE}" \
	-o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' \
	2>/dev/null) \|\| CONFIGURED_LIMITS="(query failed)"
	fi

	# ── Echo all KPIs to step log (always visible regardless of summary write) ──
	echo "============================================"
	echo " Solo Memory Benchmark — ${MEMORY_LIMIT}"
	echo "============================================"
	echo " Job Status : ${TEST_STATUS}"
	echo " Wall-Clock : ${WALL_TIME}"
	echo " Tests Total : ${TOTAL}"
	echo " Tests Passed : ${PASSED}"
	echo " Tests Failed : ${FAILURES}"
	echo " Suite Time(s) : ${DURATION_S}"
	echo " Est. TPS : ${TPS}"
	echo "--------------------------------------------"
	echo " Relay (rpc) : CPU=${RELAY_CPU} MEM=${RELAY_MEM} LIMIT=${MEMORY_LIMIT}"
	echo " Relay (ws) : CPU=${RELAY_WS_CPU} MEM=${RELAY_WS_MEM}"
	echo " Consensus : CPU=${NODE_CPU} MEM=${NODE_MEM}"
	echo " Relay Limits : ${CONFIGURED_LIMITS}"
	echo " OOMKills : ${OOM_STATUS}"
	echo " Restarts : ${RELAY_RESTARTS}"
	echo "============================================"
	echo ""
	echo "==> All pod resources:"
	echo "${TOP_FULL}"

	# ── Write GitHub Job Summary ─────────────────────────────
	# All variables are pre-computed; no command substitutions inside
	# the heredoc to avoid subshell environment inheritance issues.
	# Use printf to append each section: avoids heredoc quoting/escaping
	# pitfalls and makes each write independently verifiable.
	printf '## Solo Memory Benchmark — `%s`\n\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Key \| Value \|\n\|---\|---\|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Memory Limit \| `%s` \|\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Runner \| `hiero-smart-contracts-linux-large` \|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Test Suite \| `acceptancetest:xts` \|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Job Status \| `%s` \|\n' "${TEST_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Wall-Clock Duration \| `%s` \|\n\n' "${WALL_TIME}" >> "${GITHUB_STEP_SUMMARY}"

	printf '### Test Results\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Total \| Passed \| Failed \| Suite Duration (s) \| Est. TPS \|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\|:---:\|:---:\|:---:\|:---:\|:---:\|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| %s \| %s \| %s \| %s \| %s \|\n\n' \
	"${TOTAL}" "${PASSED}" "${FAILURES}" "${DURATION_S}" "${TPS}" >> "${GITHUB_STEP_SUMMARY}"

	printf '### Relay Resource Consumption (snapshot at test end)\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Component \| CPU \| Memory (RSS) \| Configured Limit \|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\|---\|:---:\|:---:\|:---:\|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| relay (rpc) \| %s \| %s \| `%s` \|\n' "${RELAY_CPU}" "${RELAY_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
	printf '\| relay (ws) \| %s \| %s \| `%s` \|\n\n' "${RELAY_WS_CPU}" "${RELAY_WS_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"

	printf '### Consensus Node Resources\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Component \| CPU \| Memory \|\n\|---\|:---:\|:---:\|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| network-node1 \| %s \| %s \|\n\n' "${NODE_CPU}" "${NODE_MEM}" >> "${GITHUB_STEP_SUMMARY}"

	printf '### Health & Stability\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Metric \| Value \|\n\|---\|---\|\n' >> "${GITHUB_STEP_SUMMARY}"
	printf '\| OOMKill Events \| %s \|\n' "${OOM_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
	printf '\| Relay Restart Count \| %s \|\n\n' "${RELAY_RESTARTS}" >> "${GITHUB_STEP_SUMMARY}"

	printf '<details><summary>All Pod Resources (<code>kubectl top</code>)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
	"${TOP_FULL}" >> "${GITHUB_STEP_SUMMARY}"
	printf '<details><summary>Relay Container Limits (from pod spec)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
	"${CONFIGURED_LIMITS}" >> "${GITHUB_STEP_SUMMARY}"
	printf '---\n> Reading guide:\n> - Relay Memory ≈ limit → under pressure; OOMKills likely at higher load.\n> - Relay Memory ≪ limit → room for further reduction.\n> - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.\n' \
	>> "${GITHUB_STEP_SUMMARY}"

	echo "Report written to GitHub Job Summary."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Update solo-test.yml #13

Workflow file

Update solo-test.yml #13

Uh oh!

Workflow file for this run