Skip to content

Commit 1a8510c

Browse files
committed
Update solo-test.yml
Signed-off-by: Logan Nguyen <logan.nguyen@swirldslabs.com>
1 parent 29dc803 commit 1a8510c

File tree

1 file changed

+84
-75
lines changed

1 file changed

+84
-75
lines changed

.github/workflows/solo-test.yml

Lines changed: 84 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -169,24 +169,27 @@ jobs:
169169
fi
170170
171171
# ── Pod resource snapshot via metrics-server ─────────────
172-
echo "==> Pod resource usage at end of test run:"
173-
TOP_OUTPUT=$(kubectl top pods -n "${SOLO_NAMESPACE}" --no-headers 2>&1) || true
174-
echo "$TOP_OUTPUT"
172+
# Capture both header and no-header forms before the heredoc.
173+
# Embedded $(...) inside a heredoc runs in a subshell that does not
174+
# inherit the runner environment reliably; pre-computing avoids that.
175+
TOP_FULL=$(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1) || TOP_FULL="(kubectl top unavailable)"
176+
TOP_OUTPUT=$(echo "$TOP_FULL" | tail -n +2) # strip header row for parsing
175177
176178
# ── Relay-specific metrics from kubectl top ──────────────
177179
RELAY_CPU="N/A"; RELAY_MEM="N/A"
178180
RELAY_WS_CPU="N/A"; RELAY_WS_MEM="N/A"
179-
if [ -n "$TOP_OUTPUT" ]; then
180-
RELAY_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-' | grep -v -- '-ws-' | head -1)
181-
RELAY_WS_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-ws-' | head -1)
182-
[ -n "$RELAY_LINE" ] && RELAY_CPU=$(echo "$RELAY_LINE" | awk '{print $2}') && RELAY_MEM=$(echo "$RELAY_LINE" | awk '{print $3}')
183-
[ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" | awk '{print $2}') && RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" | awk '{print $3}')
184-
fi
185-
186-
# ── Consensus node metrics ──────────────────────────────
181+
RELAY_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-' | grep -v -- '-ws-' | head -1)
182+
RELAY_WS_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-ws-' | head -1)
183+
[ -n "$RELAY_LINE" ] && RELAY_CPU=$(echo "$RELAY_LINE" | awk '{print $2}') \
184+
&& RELAY_MEM=$(echo "$RELAY_LINE" | awk '{print $3}')
185+
[ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" | awk '{print $2}') \
186+
&& RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" | awk '{print $3}')
187+
188+
# ── Consensus node metrics ──────────────────────────────
187189
NODE_CPU="N/A"; NODE_MEM="N/A"
188190
NODE_LINE=$(echo "$TOP_OUTPUT" | grep -E '^network-node' | head -1)
189-
[ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" | awk '{print $2}') && NODE_MEM=$(echo "$NODE_LINE" | awk '{print $3}')
191+
[ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" | awk '{print $2}') \
192+
&& NODE_MEM=$(echo "$NODE_LINE" | awk '{print $3}')
190193
191194
# ── OOMKill detection ────────────────────────────────────
192195
OOM_STATUS="None detected"
@@ -200,80 +203,86 @@ jobs:
200203
if c.get(sk, {}).get('terminated', {}).get('reason') == 'OOMKilled':
201204
oom.add(p['metadata']['name'])
202205
print(', '.join(sorted(oom)) if oom else '')
203-
" 2>/dev/null) || true
204-
[ -n "$OOM_PODS" ] && OOM_STATUS="**OOMKilled:** $OOM_PODS"
206+
" 2>/dev/null) || OOM_PODS=""
207+
[ -n "$OOM_PODS" ] && OOM_STATUS="OOMKilled: $OOM_PODS"
205208
206209
# ── Relay restart count ──────────────────────────────────
207210
RELAY_RESTARTS=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers 2>/dev/null \
208211
| awk '/^relay-/ { sum += $4 } END { print sum+0 }') || RELAY_RESTARTS="N/A"
209212
210213
# ── Relay pod resource limits from spec ──────────────────
211-
echo "==> Relay pod resource configuration:"
212214
RELAY_POD=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers \
213215
-o custom-columns=":metadata.name" 2>/dev/null \
214-
| grep -E '^relay-[0-9]+-[^w]' | head -1) || true
216+
| grep -E '^relay-[0-9]+-[^w]' | head -1) || RELAY_POD=""
215217
CONFIGURED_LIMITS="(not found)"
216218
if [ -n "$RELAY_POD" ]; then
217219
CONFIGURED_LIMITS=$(kubectl get pod "$RELAY_POD" -n "${SOLO_NAMESPACE}" \
218-
-o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' 2>/dev/null) || true
219-
echo "$CONFIGURED_LIMITS"
220+
-o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' \
221+
2>/dev/null) || CONFIGURED_LIMITS="(query failed)"
220222
fi
221223
224+
# ── Echo all KPIs to step log (always visible regardless of summary write) ──
225+
echo "============================================"
226+
echo " Solo Memory Benchmark — ${MEMORY_LIMIT}"
227+
echo "============================================"
228+
echo " Job Status : ${TEST_STATUS}"
229+
echo " Wall-Clock : ${WALL_TIME}"
230+
echo " Tests Total : ${TOTAL}"
231+
echo " Tests Passed : ${PASSED}"
232+
echo " Tests Failed : ${FAILURES}"
233+
echo " Suite Time(s) : ${DURATION_S}"
234+
echo " Est. TPS : ${TPS}"
235+
echo "--------------------------------------------"
236+
echo " Relay (rpc) : CPU=${RELAY_CPU} MEM=${RELAY_MEM} LIMIT=${MEMORY_LIMIT}"
237+
echo " Relay (ws) : CPU=${RELAY_WS_CPU} MEM=${RELAY_WS_MEM}"
238+
echo " Consensus : CPU=${NODE_CPU} MEM=${NODE_MEM}"
239+
echo " Relay Limits : ${CONFIGURED_LIMITS}"
240+
echo " OOMKills : ${OOM_STATUS}"
241+
echo " Restarts : ${RELAY_RESTARTS}"
242+
echo "============================================"
243+
echo ""
244+
echo "==> All pod resources:"
245+
echo "${TOP_FULL}"
246+
222247
# ── Write GitHub Job Summary ─────────────────────────────
223-
cat >> "$GITHUB_STEP_SUMMARY" <<SUMMARY
224-
## Solo Memory Benchmark — \`${MEMORY_LIMIT}\`
225-
226-
| Key | Value |
227-
|---|---|
228-
| Memory Limit | \`${MEMORY_LIMIT}\` |
229-
| Runner | \`hiero-smart-contracts-linux-large\` |
230-
| Test Suite | \`acceptancetest:xts\` |
231-
| Job Status | \`${TEST_STATUS}\` |
232-
| Wall-Clock Duration | \`${WALL_TIME}\` |
233-
234-
### Test Results
235-
| Total | Passed | Failed | Suite Duration (s) | Est. TPS (tests / wall-s) |
236-
|:---:|:---:|:---:|:---:|:---:|
237-
| ${TOTAL} | ${PASSED} | ${FAILURES} | ${DURATION_S} | ${TPS} |
238-
239-
### Relay Resource Consumption (snapshot at test end)
240-
| Component | CPU | Memory (RSS) | Configured Limit |
241-
|---|:---:|:---:|:---:|
242-
| relay (rpc) | ${RELAY_CPU} | ${RELAY_MEM} | \`${MEMORY_LIMIT}\` |
243-
| relay (ws) | ${RELAY_WS_CPU} | ${RELAY_WS_MEM} | \`${MEMORY_LIMIT}\` |
244-
245-
### Consensus Node Resources
246-
| Component | CPU | Memory |
247-
|---|:---:|:---:|
248-
| network-node1 | ${NODE_CPU} | ${NODE_MEM} |
249-
250-
### Health & Stability
251-
| Metric | Value |
252-
|---|---|
253-
| OOMKill Events | ${OOM_STATUS} |
254-
| Relay Restart Count | ${RELAY_RESTARTS} |
255-
256-
<details><summary>All Pod Resources (<code>kubectl top</code>)</summary>
257-
258-
\`\`\`
259-
$(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1 || echo "(unavailable)")
260-
\`\`\`
261-
262-
</details>
263-
264-
<details><summary>Relay Container Limits (from pod spec)</summary>
265-
266-
\`\`\`
267-
${CONFIGURED_LIMITS}
268-
\`\`\`
269-
270-
</details>
271-
272-
---
273-
> **Reading guide:**
274-
> - Relay Memory ≈ limit → under memory pressure; OOMKills likely at higher load.
275-
> - Relay Memory ≪ limit → room for further reduction.
276-
> - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.
277-
SUMMARY
278-
279-
echo "DOD Report written to GitHub Job Summary."
248+
# All variables are pre-computed; no command substitutions inside
249+
# the heredoc to avoid subshell environment inheritance issues.
250+
# Use printf to append each section: avoids heredoc quoting/escaping
251+
# pitfalls and makes each write independently verifiable.
252+
printf '## Solo Memory Benchmark — `%s`\n\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
253+
printf '| Key | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}"
254+
printf '| Memory Limit | `%s` |\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
255+
printf '| Runner | `hiero-smart-contracts-linux-large` |\n' >> "${GITHUB_STEP_SUMMARY}"
256+
printf '| Test Suite | `acceptancetest:xts` |\n' >> "${GITHUB_STEP_SUMMARY}"
257+
printf '| Job Status | `%s` |\n' "${TEST_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
258+
printf '| Wall-Clock Duration | `%s` |\n\n' "${WALL_TIME}" >> "${GITHUB_STEP_SUMMARY}"
259+
260+
printf '### Test Results\n' >> "${GITHUB_STEP_SUMMARY}"
261+
printf '| Total | Passed | Failed | Suite Duration (s) | Est. TPS |\n' >> "${GITHUB_STEP_SUMMARY}"
262+
printf '|:---:|:---:|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
263+
printf '| %s | %s | %s | %s | %s |\n\n' \
264+
"${TOTAL}" "${PASSED}" "${FAILURES}" "${DURATION_S}" "${TPS}" >> "${GITHUB_STEP_SUMMARY}"
265+
266+
printf '### Relay Resource Consumption (snapshot at test end)\n' >> "${GITHUB_STEP_SUMMARY}"
267+
printf '| Component | CPU | Memory (RSS) | Configured Limit |\n' >> "${GITHUB_STEP_SUMMARY}"
268+
printf '|---|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
269+
printf '| relay (rpc) | %s | %s | `%s` |\n' "${RELAY_CPU}" "${RELAY_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
270+
printf '| relay (ws) | %s | %s | `%s` |\n\n' "${RELAY_WS_CPU}" "${RELAY_WS_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
271+
272+
printf '### Consensus Node Resources\n' >> "${GITHUB_STEP_SUMMARY}"
273+
printf '| Component | CPU | Memory |\n|---|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
274+
printf '| network-node1 | %s | %s |\n\n' "${NODE_CPU}" "${NODE_MEM}" >> "${GITHUB_STEP_SUMMARY}"
275+
276+
printf '### Health & Stability\n' >> "${GITHUB_STEP_SUMMARY}"
277+
printf '| Metric | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}"
278+
printf '| OOMKill Events | %s |\n' "${OOM_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
279+
printf '| Relay Restart Count | %s |\n\n' "${RELAY_RESTARTS}" >> "${GITHUB_STEP_SUMMARY}"
280+
281+
printf '<details><summary>All Pod Resources (<code>kubectl top</code>)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
282+
"${TOP_FULL}" >> "${GITHUB_STEP_SUMMARY}"
283+
printf '<details><summary>Relay Container Limits (from pod spec)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
284+
"${CONFIGURED_LIMITS}" >> "${GITHUB_STEP_SUMMARY}"
285+
printf '---\n> **Reading guide:**\n> - Relay Memory ≈ limit → under pressure; OOMKills likely at higher load.\n> - Relay Memory ≪ limit → room for further reduction.\n> - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.\n' \
286+
>> "${GITHUB_STEP_SUMMARY}"
287+
288+
echo "Report written to GitHub Job Summary."

0 commit comments

Comments
 (0)