@@ -169,24 +169,27 @@ jobs:
169169 fi
170170
171171 # ── Pod resource snapshot via metrics-server ─────────────
172- echo "==> Pod resource usage at end of test run:"
173- TOP_OUTPUT=$(kubectl top pods -n "${SOLO_NAMESPACE}" --no-headers 2>&1) || true
174- echo "$TOP_OUTPUT"
172+ # Capture both header and no-header forms before the heredoc.
173+ # Embedded $(...) inside a heredoc runs in a subshell that does not
174+ # inherit the runner environment reliably; pre-computing avoids that.
175+ TOP_FULL=$(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1) || TOP_FULL="(kubectl top unavailable)"
176+ TOP_OUTPUT=$(echo "$TOP_FULL" | tail -n +2) # strip header row for parsing
175177
176178 # ── Relay-specific metrics from kubectl top ──────────────
177179 RELAY_CPU="N/A"; RELAY_MEM="N/A"
178180 RELAY_WS_CPU="N/A"; RELAY_WS_MEM="N/A"
179- if [ -n "$TOP_OUTPUT" ]; then
180- RELAY_LINE =$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-' | grep -v -- ' -ws-' | head -1)
181- RELAY_WS_LINE =$(echo "$TOP_OUTPUT " | grep -E '^relay-[0-9]+-ws-' | head -1)
182- [ -n "$RELAY_LINE" ] && RELAY_CPU=$(echo "$RELAY_LINE" | awk '{print $2}') && RELAY_MEM=$(echo "$RELAY_LINE" | awk '{print $3}')
183- [ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" | awk '{print $2}') && RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" | awk '{print $3}')
184- fi
185-
186- # ── Consensus node metrics ──────────────────────────────
181+ RELAY_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-' | grep -v -- '-ws-' | head -1)
182+ RELAY_WS_LINE =$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-ws-' | head -1)
183+ [ -n "$RELAY_LINE" ] && RELAY_CPU =$(echo "$RELAY_LINE " | awk '{print $2}') \
184+ && RELAY_MEM=$(echo "$RELAY_LINE" | awk '{print $3}')
185+ [ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" | awk '{print $2}') \
186+ && RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" | awk '{print $3}')
187+
188+ # ── Consensus node metrics ───────────────────────────────
187189 NODE_CPU="N/A"; NODE_MEM="N/A"
188190 NODE_LINE=$(echo "$TOP_OUTPUT" | grep -E '^network-node' | head -1)
189- [ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" | awk '{print $2}') && NODE_MEM=$(echo "$NODE_LINE" | awk '{print $3}')
191+ [ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" | awk '{print $2}') \
192+ && NODE_MEM=$(echo "$NODE_LINE" | awk '{print $3}')
190193
191194 # ── OOMKill detection ────────────────────────────────────
192195 OOM_STATUS="None detected"
@@ -200,80 +203,86 @@ jobs:
200203 if c.get(sk, {}).get('terminated', {}).get('reason') == 'OOMKilled':
201204 oom.add(p['metadata']['name'])
202205 print(', '.join(sorted(oom)) if oom else '')
203- " 2>/dev/null) || true
204- [ -n "$OOM_PODS" ] && OOM_STATUS="** OOMKilled:** $OOM_PODS"
206+ " 2>/dev/null) || OOM_PODS=""
207+ [ -n "$OOM_PODS" ] && OOM_STATUS="OOMKilled: $OOM_PODS"
205208
206209 # ── Relay restart count ──────────────────────────────────
207210 RELAY_RESTARTS=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers 2>/dev/null \
208211 | awk '/^relay-/ { sum += $4 } END { print sum+0 }') || RELAY_RESTARTS="N/A"
209212
210213 # ── Relay pod resource limits from spec ──────────────────
211- echo "==> Relay pod resource configuration:"
212214 RELAY_POD=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers \
213215 -o custom-columns=":metadata.name" 2>/dev/null \
214- | grep -E '^relay-[0-9]+-[^w]' | head -1) || true
216+ | grep -E '^relay-[0-9]+-[^w]' | head -1) || RELAY_POD=""
215217 CONFIGURED_LIMITS="(not found)"
216218 if [ -n "$RELAY_POD" ]; then
217219 CONFIGURED_LIMITS=$(kubectl get pod "$RELAY_POD" -n "${SOLO_NAMESPACE}" \
218- -o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' 2>/dev/null) || true
219- echo "$ CONFIGURED_LIMITS"
220+ -o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' \
221+ 2>/dev/null) || CONFIGURED_LIMITS="(query failed) "
220222 fi
221223
224+ # ── Echo all KPIs to step log (always visible regardless of summary write) ──
225+ echo "============================================"
226+ echo " Solo Memory Benchmark — ${MEMORY_LIMIT}"
227+ echo "============================================"
228+ echo " Job Status : ${TEST_STATUS}"
229+ echo " Wall-Clock : ${WALL_TIME}"
230+ echo " Tests Total : ${TOTAL}"
231+ echo " Tests Passed : ${PASSED}"
232+ echo " Tests Failed : ${FAILURES}"
233+ echo " Suite Time(s) : ${DURATION_S}"
234+ echo " Est. TPS : ${TPS}"
235+ echo "--------------------------------------------"
236+ echo " Relay (rpc) : CPU=${RELAY_CPU} MEM=${RELAY_MEM} LIMIT=${MEMORY_LIMIT}"
237+ echo " Relay (ws) : CPU=${RELAY_WS_CPU} MEM=${RELAY_WS_MEM}"
238+ echo " Consensus : CPU=${NODE_CPU} MEM=${NODE_MEM}"
239+ echo " Relay Limits : ${CONFIGURED_LIMITS}"
240+ echo " OOMKills : ${OOM_STATUS}"
241+ echo " Restarts : ${RELAY_RESTARTS}"
242+ echo "============================================"
243+ echo ""
244+ echo "==> All pod resources:"
245+ echo "${TOP_FULL}"
246+
222247 # ── Write GitHub Job Summary ─────────────────────────────
223- cat >> "$GITHUB_STEP_SUMMARY" <<SUMMARY
224- ## Solo Memory Benchmark — \`${MEMORY_LIMIT}\`
225-
226- | Key | Value |
227- |---|---|
228- | Memory Limit | \`${MEMORY_LIMIT}\` |
229- | Runner | \`hiero-smart-contracts-linux-large\` |
230- | Test Suite | \`acceptancetest:xts\` |
231- | Job Status | \`${TEST_STATUS}\` |
232- | Wall-Clock Duration | \`${WALL_TIME}\` |
233-
234- ### Test Results
235- | Total | Passed | Failed | Suite Duration (s) | Est. TPS (tests / wall-s) |
236- |:---:|:---:|:---:|:---:|:---:|
237- | ${TOTAL} | ${PASSED} | ${FAILURES} | ${DURATION_S} | ${TPS} |
238-
239- ### Relay Resource Consumption (snapshot at test end)
240- | Component | CPU | Memory (RSS) | Configured Limit |
241- |---|:---:|:---:|:---:|
242- | relay (rpc) | ${RELAY_CPU} | ${RELAY_MEM} | \`${MEMORY_LIMIT}\` |
243- | relay (ws) | ${RELAY_WS_CPU} | ${RELAY_WS_MEM} | \`${MEMORY_LIMIT}\` |
244-
245- ### Consensus Node Resources
246- | Component | CPU | Memory |
247- |---|:---:|:---:|
248- | network-node1 | ${NODE_CPU} | ${NODE_MEM} |
249-
250- ### Health & Stability
251- | Metric | Value |
252- |---|---|
253- | OOMKill Events | ${OOM_STATUS} |
254- | Relay Restart Count | ${RELAY_RESTARTS} |
255-
256- <details><summary>All Pod Resources (<code>kubectl top</code>)</summary>
257-
258- \`\`\`
259- $(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1 || echo "(unavailable)")
260- \`\`\`
261-
262- </details>
263-
264- <details><summary>Relay Container Limits (from pod spec)</summary>
265-
266- \`\`\`
267- ${CONFIGURED_LIMITS}
268- \`\`\`
269-
270- </details>
271-
272- ---
273- > **Reading guide:**
274- > - Relay Memory ≈ limit → under memory pressure; OOMKills likely at higher load.
275- > - Relay Memory ≪ limit → room for further reduction.
276- > - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.
277- SUMMARY
278-
279- echo "DOD Report written to GitHub Job Summary."
248+ # All variables are pre-computed; no command substitutions inside
249+ # the heredoc to avoid subshell environment inheritance issues.
250+ # Use printf to append each section: avoids heredoc quoting/escaping
251+ # pitfalls and makes each write independently verifiable.
252+ printf '## Solo Memory Benchmark — `%s`\n\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
253+ printf '| Key | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}"
254+ printf '| Memory Limit | `%s` |\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
255+ printf '| Runner | `hiero-smart-contracts-linux-large` |\n' >> "${GITHUB_STEP_SUMMARY}"
256+ printf '| Test Suite | `acceptancetest:xts` |\n' >> "${GITHUB_STEP_SUMMARY}"
257+ printf '| Job Status | `%s` |\n' "${TEST_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
258+ printf '| Wall-Clock Duration | `%s` |\n\n' "${WALL_TIME}" >> "${GITHUB_STEP_SUMMARY}"
259+
260+ printf '### Test Results\n' >> "${GITHUB_STEP_SUMMARY}"
261+ printf '| Total | Passed | Failed | Suite Duration (s) | Est. TPS |\n' >> "${GITHUB_STEP_SUMMARY}"
262+ printf '|:---:|:---:|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
263+ printf '| %s | %s | %s | %s | %s |\n\n' \
264+ "${TOTAL}" "${PASSED}" "${FAILURES}" "${DURATION_S}" "${TPS}" >> "${GITHUB_STEP_SUMMARY}"
265+
266+ printf '### Relay Resource Consumption (snapshot at test end)\n' >> "${GITHUB_STEP_SUMMARY}"
267+ printf '| Component | CPU | Memory (RSS) | Configured Limit |\n' >> "${GITHUB_STEP_SUMMARY}"
268+ printf '|---|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
269+ printf '| relay (rpc) | %s | %s | `%s` |\n' "${RELAY_CPU}" "${RELAY_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
270+ printf '| relay (ws) | %s | %s | `%s` |\n\n' "${RELAY_WS_CPU}" "${RELAY_WS_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}"
271+
272+ printf '### Consensus Node Resources\n' >> "${GITHUB_STEP_SUMMARY}"
273+ printf '| Component | CPU | Memory |\n|---|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}"
274+ printf '| network-node1 | %s | %s |\n\n' "${NODE_CPU}" "${NODE_MEM}" >> "${GITHUB_STEP_SUMMARY}"
275+
276+ printf '### Health & Stability\n' >> "${GITHUB_STEP_SUMMARY}"
277+ printf '| Metric | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}"
278+ printf '| OOMKill Events | %s |\n' "${OOM_STATUS}" >> "${GITHUB_STEP_SUMMARY}"
279+ printf '| Relay Restart Count | %s |\n\n' "${RELAY_RESTARTS}" >> "${GITHUB_STEP_SUMMARY}"
280+
281+ printf '<details><summary>All Pod Resources (<code>kubectl top</code>)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
282+ "${TOP_FULL}" >> "${GITHUB_STEP_SUMMARY}"
283+ printf '<details><summary>Relay Container Limits (from pod spec)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \
284+ "${CONFIGURED_LIMITS}" >> "${GITHUB_STEP_SUMMARY}"
285+ printf '---\n> **Reading guide:**\n> - Relay Memory ≈ limit → under pressure; OOMKills likely at higher load.\n> - Relay Memory ≪ limit → room for further reduction.\n> - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.\n' \
286+ >> "${GITHUB_STEP_SUMMARY}"
287+
288+ echo "Report written to GitHub Job Summary."
0 commit comments