Update solo-test.yml #13
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Solo Test Sandbox | |
| on: | |
| push: | |
| branches: | |
| - 4900-solo-reduce-json-rpc-relay-memory-footprint | |
| permissions: | |
| contents: read | |
| jobs: | |
| solo-test: | |
| runs-on: hiero-smart-contracts-linux-large | |
| timeout-minutes: 50 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| memory_limit: [1000Mi, 512Mi, 256Mi] | |
| permissions: | |
| contents: read | |
| checks: write | |
| env: | |
| SOLO_CLUSTER_NAME: solo | |
| SOLO_NAMESPACE: solo | |
| SOLO_CLUSTER_SETUP_NAMESPACE: solo-cluster | |
| SOLO_DEPLOYMENT: solo-deployment | |
| name: Solo Test (${{ matrix.memory_limit }}) | |
| steps: | |
| - name: Set env variables | |
| run: | | |
| if [ -n "${{ inputs.operator_id }}" ]; then | |
| echo "OPERATOR_ID_MAIN=${{ inputs.operator_id }}" >> $GITHUB_ENV | |
| fi | |
| - name: Harden Runner | |
| uses: step-security/harden-runner@5ef0c079ce82195b2a36a210272d6b661572d83e # v2.14.2 | |
| with: | |
| egress-policy: audit | |
| - name: Checkout Code | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| - name: Setup node | |
| uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 | |
| with: | |
| node-version: 22 | |
| # Set up kind; needed for configuring the solo environment | |
| - name: Setup Kind | |
| uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 | |
| with: | |
| install_only: true | |
| node_image: kindest/node:v1.31.4@sha256:2cb39f7295fe7eafee0842b1052a599a4fb0f8bcf3f83d96c7f4864c357c6c30 | |
| version: v0.26.0 | |
| kubectl_version: v1.31.4 | |
| verbosity: 3 | |
| wait: 120s | |
| - name: Install Solo | |
| run: npm install -g @hashgraph/solo | |
| - name: Configure and run solo | |
| run: | | |
| kind create cluster -n "${SOLO_CLUSTER_NAME}" | |
| # metrics-server is not bundled with Kind; --kubelet-insecure-tls is required | |
| # because Kind kubelets use self-signed certificates. | |
| # See: https://github.com/kubernetes-sigs/metrics-server#requirements | |
| kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml | |
| kubectl patch deployment metrics-server -n kube-system \ | |
| --type=json \ | |
| -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]' | |
| kubectl rollout status deployment/metrics-server -n kube-system --timeout=120s | |
| # initialize solo | |
| solo init | |
| solo cluster-ref config connect --cluster-ref kind-${SOLO_CLUSTER_NAME} --context kind-${SOLO_CLUSTER_NAME} | |
| solo deployment config create -n "${SOLO_NAMESPACE}" --deployment "${SOLO_DEPLOYMENT}" | |
| solo deployment cluster attach --deployment "${SOLO_DEPLOYMENT}" --cluster-ref kind-${SOLO_CLUSTER_NAME} --num-consensus-nodes 1 | |
| solo keys consensus generate --gossip-keys --tls-keys --deployment "${SOLO_DEPLOYMENT}" | |
| solo cluster-ref config setup -s "${SOLO_CLUSTER_SETUP_NAMESPACE}" | |
| solo consensus network deploy --deployment "${SOLO_DEPLOYMENT}" | |
| solo consensus node setup --deployment "${SOLO_DEPLOYMENT}" | |
| solo consensus node start --deployment "${SOLO_DEPLOYMENT}" | |
| solo mirror node add --deployment "${SOLO_DEPLOYMENT}" --cluster-ref kind-${SOLO_CLUSTER_NAME} --enable-ingress --pinger | |
| - name: Run Solo Relay | |
| run: | | |
| cat <<EOF > relay-resources.yaml | |
| relay: | |
| resources: | |
| requests: | |
| cpu: 0 | |
| memory: 0 | |
| limits: | |
| cpu: 1100m | |
| memory: ${{ matrix.memory_limit }} | |
| EOF | |
| cat relay-resources.yaml | |
| solo relay node add -i node1 --deployment "${SOLO_DEPLOYMENT}" -f relay-resources.yaml | |
| - name: check Relay Resources | |
| run: | | |
| echo "Describing Relay pod resources:" | |
| kubectl -n "${SOLO_NAMESPACE}" describe pod relay-1 | |
| - name: Port-forward Consensus Node | |
| run: | | |
| sudo kill -9 $(sudo lsof -ti :50211) || true | |
| kubectl port-forward -n "${SOLO_NAMESPACE}" network-node1-0 50211:50211 & | |
| - name: Checkout Relay repo | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: 4900-solo-reduce-json-rpc-relay-memory-footprint | |
| - name: Install packages | |
| run: npm ci | |
| - name: Build Relay | |
| run: npm run build | |
| - name: Record test start time | |
| id: test-timing | |
| run: echo "start=$(date +%s)" >> "$GITHUB_OUTPUT" | |
| - name: Start json-rpc-relay-test-client | |
| env: | |
| CHAIN_ID: "0x12a" | |
| MIRROR_NODE_URL: "http://127.0.0.1:8081" | |
| HEDERA_NETWORK: '{"127.0.0.1:50211":"0.0.3"}' | |
| OPERATOR_ID_MAIN: 0.0.2 | |
| OPERATOR_KEY_MAIN: 302e020100300506032b65700422042091132178e72057a1d7528025956fe39b0b847f200ab59b2fdd367017f3087137 | |
| REDIS_ENABLED: false | |
| USE_ASYNC_TX_PROCESSING: false | |
| E2E_RELAY_HOST: http://localhost:7546 | |
| SDK_LOG_LEVEL: trace | |
| USE_INTERNAL_RELAY: false | |
| run: npm run acceptancetest:xts | |
| - name: Generate Report | |
| if: always() | |
| env: | |
| MEMORY_LIMIT: ${{ matrix.memory_limit }} | |
| TEST_STATUS: ${{ job.status }} | |
| TEST_START: ${{ steps.test-timing.outputs.start }} | |
| run: | | |
| set +e # individual command failures must not abort the report | |
| # ── Wall-clock test duration ────────────────────────────── | |
| END_TIME=$(date +%s) | |
| WALL_SECS=$(( END_TIME - ${TEST_START:-$END_TIME} )) | |
| WALL_TIME="$(( WALL_SECS / 60 ))m $(( WALL_SECS % 60 ))s" | |
| # ── Test results from JUnit XML (Mocha format) ─────────── | |
| # Parse the root <testsuites> element which carries aggregated totals. | |
| # Uses awk with '"' delimiter — portable, no PCRE dependency. | |
| TOTAL=0; PASSED=0; FAILURES=0; DURATION_S="0.0" | |
| if compgen -G "test-results.*.xml" > /dev/null 2>&1; then | |
| TOTAL=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / tests=$/) s+=$(i+1) } END { print s+0 }' test-results.*.xml) | |
| FAILURES=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / failures=$/) s+=$(i+1) } END { print s+0 }' test-results.*.xml) | |
| DURATION_S=$(awk -F'"' '/<testsuites / { for(i=1;i<=NF;i++) if($(i) ~ / time=$/) s+=$(i+1) } END { printf "%.1f", s }' test-results.*.xml) | |
| PASSED=$(( TOTAL - FAILURES )) | |
| fi | |
| # ── TPS estimate (test-cases / wall-clock seconds) ─────── | |
| TPS="N/A" | |
| if [ "$TOTAL" -gt 0 ] 2>/dev/null && [ "$WALL_SECS" -gt 0 ] 2>/dev/null; then | |
| TPS=$(awk "BEGIN { printf \"%.2f\", $TOTAL / $WALL_SECS }") | |
| fi | |
| # ── Pod resource snapshot via metrics-server ───────────── | |
| # Capture both header and no-header forms before the heredoc. | |
| # Embedded $(...) inside a heredoc runs in a subshell that does not | |
| # inherit the runner environment reliably; pre-computing avoids that. | |
| TOP_FULL=$(kubectl top pods -n "${SOLO_NAMESPACE}" 2>&1) || TOP_FULL="(kubectl top unavailable)" | |
| TOP_OUTPUT=$(echo "$TOP_FULL" | tail -n +2) # strip header row for parsing | |
| # ── Relay-specific metrics from kubectl top ────────────── | |
| RELAY_CPU="N/A"; RELAY_MEM="N/A" | |
| RELAY_WS_CPU="N/A"; RELAY_WS_MEM="N/A" | |
| RELAY_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-' | grep -v -- '-ws-' | head -1) | |
| RELAY_WS_LINE=$(echo "$TOP_OUTPUT" | grep -E '^relay-[0-9]+-ws-' | head -1) | |
| [ -n "$RELAY_LINE" ] && RELAY_CPU=$(echo "$RELAY_LINE" | awk '{print $2}') \ | |
| && RELAY_MEM=$(echo "$RELAY_LINE" | awk '{print $3}') | |
| [ -n "$RELAY_WS_LINE" ] && RELAY_WS_CPU=$(echo "$RELAY_WS_LINE" | awk '{print $2}') \ | |
| && RELAY_WS_MEM=$(echo "$RELAY_WS_LINE" | awk '{print $3}') | |
| # ── Consensus node metrics ─────────────────────────────── | |
| NODE_CPU="N/A"; NODE_MEM="N/A" | |
| NODE_LINE=$(echo "$TOP_OUTPUT" | grep -E '^network-node' | head -1) | |
| [ -n "$NODE_LINE" ] && NODE_CPU=$(echo "$NODE_LINE" | awk '{print $2}') \ | |
| && NODE_MEM=$(echo "$NODE_LINE" | awk '{print $3}') | |
| # ── OOMKill detection ──────────────────────────────────── | |
| OOM_STATUS="None detected" | |
| OOM_PODS=$(kubectl get pods -n "${SOLO_NAMESPACE}" -o json 2>/dev/null | python3 -c " | |
| import sys, json | |
| data = json.load(sys.stdin) | |
| oom = set() | |
| for p in data.get('items', []): | |
| for c in p.get('status', {}).get('containerStatuses', []): | |
| for sk in ('state', 'lastState'): | |
| if c.get(sk, {}).get('terminated', {}).get('reason') == 'OOMKilled': | |
| oom.add(p['metadata']['name']) | |
| print(', '.join(sorted(oom)) if oom else '') | |
| " 2>/dev/null) || OOM_PODS="" | |
| [ -n "$OOM_PODS" ] && OOM_STATUS="OOMKilled: $OOM_PODS" | |
| # ── Relay restart count ────────────────────────────────── | |
| RELAY_RESTARTS=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers 2>/dev/null \ | |
| | awk '/^relay-/ { sum += $4 } END { print sum+0 }') || RELAY_RESTARTS="N/A" | |
| # ── Relay pod resource limits from spec ────────────────── | |
| RELAY_POD=$(kubectl get pods -n "${SOLO_NAMESPACE}" --no-headers \ | |
| -o custom-columns=":metadata.name" 2>/dev/null \ | |
| | grep -E '^relay-[0-9]+-[^w]' | head -1) || RELAY_POD="" | |
| CONFIGURED_LIMITS="(not found)" | |
| if [ -n "$RELAY_POD" ]; then | |
| CONFIGURED_LIMITS=$(kubectl get pod "$RELAY_POD" -n "${SOLO_NAMESPACE}" \ | |
| -o jsonpath='{range .spec.containers[*]}{.name}: cpu={.resources.limits.cpu}, mem={.resources.limits.memory}{"\n"}{end}' \ | |
| 2>/dev/null) || CONFIGURED_LIMITS="(query failed)" | |
| fi | |
| # ── Echo all KPIs to step log (always visible regardless of summary write) ── | |
| echo "============================================" | |
| echo " Solo Memory Benchmark — ${MEMORY_LIMIT}" | |
| echo "============================================" | |
| echo " Job Status : ${TEST_STATUS}" | |
| echo " Wall-Clock : ${WALL_TIME}" | |
| echo " Tests Total : ${TOTAL}" | |
| echo " Tests Passed : ${PASSED}" | |
| echo " Tests Failed : ${FAILURES}" | |
| echo " Suite Time(s) : ${DURATION_S}" | |
| echo " Est. TPS : ${TPS}" | |
| echo "--------------------------------------------" | |
| echo " Relay (rpc) : CPU=${RELAY_CPU} MEM=${RELAY_MEM} LIMIT=${MEMORY_LIMIT}" | |
| echo " Relay (ws) : CPU=${RELAY_WS_CPU} MEM=${RELAY_WS_MEM}" | |
| echo " Consensus : CPU=${NODE_CPU} MEM=${NODE_MEM}" | |
| echo " Relay Limits : ${CONFIGURED_LIMITS}" | |
| echo " OOMKills : ${OOM_STATUS}" | |
| echo " Restarts : ${RELAY_RESTARTS}" | |
| echo "============================================" | |
| echo "" | |
| echo "==> All pod resources:" | |
| echo "${TOP_FULL}" | |
| # ── Write GitHub Job Summary ───────────────────────────── | |
| # All variables are pre-computed; no command substitutions inside | |
| # the heredoc to avoid subshell environment inheritance issues. | |
| # Use printf to append each section: avoids heredoc quoting/escaping | |
| # pitfalls and makes each write independently verifiable. | |
| printf '## Solo Memory Benchmark — `%s`\n\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Key | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Memory Limit | `%s` |\n' "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Runner | `hiero-smart-contracts-linux-large` |\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Test Suite | `acceptancetest:xts` |\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Job Status | `%s` |\n' "${TEST_STATUS}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Wall-Clock Duration | `%s` |\n\n' "${WALL_TIME}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '### Test Results\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Total | Passed | Failed | Suite Duration (s) | Est. TPS |\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '|:---:|:---:|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| %s | %s | %s | %s | %s |\n\n' \ | |
| "${TOTAL}" "${PASSED}" "${FAILURES}" "${DURATION_S}" "${TPS}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '### Relay Resource Consumption (snapshot at test end)\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Component | CPU | Memory (RSS) | Configured Limit |\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '|---|:---:|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| relay (rpc) | %s | %s | `%s` |\n' "${RELAY_CPU}" "${RELAY_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| relay (ws) | %s | %s | `%s` |\n\n' "${RELAY_WS_CPU}" "${RELAY_WS_MEM}" "${MEMORY_LIMIT}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '### Consensus Node Resources\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Component | CPU | Memory |\n|---|:---:|:---:|\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| network-node1 | %s | %s |\n\n' "${NODE_CPU}" "${NODE_MEM}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '### Health & Stability\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Metric | Value |\n|---|---|\n' >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| OOMKill Events | %s |\n' "${OOM_STATUS}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '| Relay Restart Count | %s |\n\n' "${RELAY_RESTARTS}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '<details><summary>All Pod Resources (<code>kubectl top</code>)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \ | |
| "${TOP_FULL}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '<details><summary>Relay Container Limits (from pod spec)</summary>\n\n```\n%s\n```\n\n</details>\n\n' \ | |
| "${CONFIGURED_LIMITS}" >> "${GITHUB_STEP_SUMMARY}" | |
| printf '---\n> **Reading guide:**\n> - Relay Memory ≈ limit → under pressure; OOMKills likely at higher load.\n> - Relay Memory ≪ limit → room for further reduction.\n> - OOMKills or Restarts > 0 → the limit is too aggressive for this workload.\n' \ | |
| >> "${GITHUB_STEP_SUMMARY}" | |
| echo "Report written to GitHub Job Summary." |