Skip to content

Commit 7a93ab3

Browse files
feat: add --runtime-id to integration tests, add merge queue CI (#14)
* feat: add --runtime-id to integration tests, add merge queue CI - Add --runtime-id argument to all three integration test scripts (default: "ascend-tools"); resolves runtime by ID, falls back to first runtime in the list if not found - Add .github/workflows/integration.yml (merge_group trigger only) - Fix flaky pause/resume tests: increase health wait loops, add run_flow_with_retry to rest.py, fix sleep-before-check ordering Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: add workflow_dispatch to integration CI, merge_group to CI - Add manual trigger (workflow_dispatch) to integration workflow - Add merge_group trigger to CI workflow so checks run in the queue Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: revert merge_group trigger on CI workflow CI (lint/test) runs on PRs already — no need to re-run in the merge queue. Only integration tests run there. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: remove flaky health=null assertion, add merge_group to CI - Remove health=null wait loop after pause from all three test suites. We already assert paused=true (synchronous). The health field clearing is async pod shutdown timing, not something the SDK controls. - Add merge_group trigger to ci.yml so the check/check required status actually runs in the merge queue (was hanging forever without it). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9c42d00 commit 7a93ab3

File tree

5 files changed

+155
-41
lines changed

5 files changed

+155
-41
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ on:
55
branches: [main]
66
pull_request:
77
branches: [main]
8+
merge_group:
89

910
jobs:
1011
check:

.github/workflows/integration.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Integration
2+
3+
on:
4+
merge_group:
5+
workflow_dispatch:
6+
7+
concurrency:
8+
group: "${{ github.workflow }}-${{ github.event.merge_group.ref || github.ref }}"
9+
cancel-in-progress: true
10+
11+
jobs:
12+
app-dev:
13+
name: Integration (app-dev)
14+
runs-on: ubuntu-latest
15+
steps:
16+
- uses: actions/checkout@v4
17+
- uses: actions/setup-python@v5
18+
with:
19+
python-version: "3.13"
20+
- uses: actions/cache@v4
21+
with:
22+
path: |
23+
~/.cargo/registry
24+
~/.cargo/git
25+
~/.cache/uv
26+
target
27+
key: integration-${{ runner.os }}-${{ hashFiles('**/Cargo.lock', 'uv.lock') }}
28+
restore-keys: integration-${{ runner.os }}-
29+
- run: bin/setup
30+
- run: bin/build
31+
- name: CLI integration tests
32+
run: ./tests/integration.sh
33+
env:
34+
ASCEND_SERVICE_ACCOUNT_ID: ${{ secrets.APP_DEV_ASCEND_SERVICE_ACCOUNT_ID }}
35+
ASCEND_SERVICE_ACCOUNT_KEY: ${{ secrets.APP_DEV_ASCEND_SERVICE_ACCOUNT_KEY }}
36+
ASCEND_INSTANCE_API_URL: ${{ secrets.APP_DEV_ASCEND_INSTANCE_API_URL }}
37+
- name: Python SDK integration tests
38+
run: ./tests/integration.py
39+
env:
40+
ASCEND_SERVICE_ACCOUNT_ID: ${{ secrets.APP_DEV_ASCEND_SERVICE_ACCOUNT_ID }}
41+
ASCEND_SERVICE_ACCOUNT_KEY: ${{ secrets.APP_DEV_ASCEND_SERVICE_ACCOUNT_KEY }}
42+
ASCEND_INSTANCE_API_URL: ${{ secrets.APP_DEV_ASCEND_INSTANCE_API_URL }}
43+
- name: REST API integration tests
44+
run: ./tests/rest.py
45+
env:
46+
ASCEND_SERVICE_ACCOUNT_ID: ${{ secrets.APP_DEV_ASCEND_SERVICE_ACCOUNT_ID }}
47+
ASCEND_SERVICE_ACCOUNT_KEY: ${{ secrets.APP_DEV_ASCEND_SERVICE_ACCOUNT_KEY }}
48+
ASCEND_INSTANCE_API_URL: ${{ secrets.APP_DEV_ASCEND_INSTANCE_API_URL }}

tests/integration.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
ASCEND_SERVICE_ACCOUNT_KEY, and ASCEND_INSTANCE_API_URL set.
1313
"""
1414

15+
import argparse
1516
import os
1617
import sys
1718
import time
@@ -83,6 +84,16 @@ def run_flow_with_retry(
8384

8485

8586
def main():
87+
parser = argparse.ArgumentParser(
88+
description="ascend-tools Python SDK integration tests"
89+
)
90+
parser.add_argument(
91+
"--runtime-id",
92+
default="ascend-tools",
93+
help="Runtime ID to test against (default: ascend-tools)",
94+
)
95+
args = parser.parse_args()
96+
8697
# ---------- preflight ----------
8798

8899
print("=== preflight ===")
@@ -114,7 +125,13 @@ def main():
114125

115126
check(True, f"list_runtimes returned {len(runtimes)} runtime(s)")
116127

117-
runtime = runtimes[0]
128+
by_id = client.list_runtimes(id=args.runtime_id)
129+
if by_id:
130+
runtime = by_id[0]
131+
else:
132+
print(f" runtime '{args.runtime_id}' not found, falling back to first runtime")
133+
runtime = runtimes[0]
134+
118135
runtime_uuid = runtime["uuid"]
119136
runtime_id = runtime["id"]
120137
print(f" using runtime: {runtime_id} ({runtime_uuid})")
@@ -377,14 +394,6 @@ def main():
377394
got_paused = client.get_runtime(uuid=runtime_uuid)
378395
check(got_paused.get("paused") is True, "get_runtime confirms paused")
379396

380-
# health may take a moment to clear after pause (runtime pods shutting down)
381-
for delay in (1, 2, 3):
382-
if got_paused.get("health") is None:
383-
break
384-
time.sleep(delay)
385-
got_paused = client.get_runtime(uuid=runtime_uuid)
386-
check(got_paused.get("health") is None, "paused runtime has health=None")
387-
388397
# run_flow without resume should fail on a paused runtime
389398
try:
390399
client.run_flow(runtime_uuid=runtime_uuid, flow_name=flow_name)

tests/integration.sh

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@ PASS=0
99
FAIL=0
1010
SKIP=0
1111

12+
# ---------- args ----------
13+
14+
RUNTIME_ID_FILTER="ascend-tools"
15+
while [[ $# -gt 0 ]]; do
16+
case "$1" in
17+
--runtime-id) RUNTIME_ID_FILTER="$2"; shift 2 ;;
18+
*) echo "unknown arg: $1" >&2; exit 1 ;;
19+
esac
20+
done
21+
1222
pass() { echo " PASS: $1"; PASS=$((PASS + 1)); }
1323
fail() { echo " FAIL: $1$2"; FAIL=$((FAIL + 1)); }
1424
skip() { echo " SKIP: $1"; SKIP=$((SKIP + 1)); }
@@ -90,8 +100,16 @@ else
90100
exit 0
91101
fi
92102

93-
RUNTIME_UUID=$(echo "$JSON" | jq -r '.[0].uuid')
94-
RUNTIME_ID=$(echo "$JSON" | jq -r '.[0].id')
103+
FILTERED=$($CLI -o json runtime list --id "$RUNTIME_ID_FILTER" 2>&1)
104+
FILTERED_COUNT=$(echo "$FILTERED" | jq 'length')
105+
if [ "$FILTERED_COUNT" -gt 0 ]; then
106+
RUNTIME_UUID=$(echo "$FILTERED" | jq -r '.[0].uuid')
107+
RUNTIME_ID=$(echo "$FILTERED" | jq -r '.[0].id')
108+
else
109+
echo " runtime '$RUNTIME_ID_FILTER' not found, falling back to first runtime"
110+
RUNTIME_UUID=$(echo "$JSON" | jq -r '.[0].uuid')
111+
RUNTIME_ID=$(echo "$JSON" | jq -r '.[0].id')
112+
fi
95113
echo " using runtime: $RUNTIME_ID ($RUNTIME_UUID)"
96114

97115
# get runtime
@@ -330,18 +348,6 @@ else
330348
fail "runtime pause" "expected paused=true, got $PAUSED"
331349
fi
332350

333-
# wait for health to clear
334-
for delay in 1 2 3; do
335-
HEALTH=$(${CLI} -o json runtime get "$RUNTIME_UUID" 2>&1 | jq -r '.health')
336-
[ "$HEALTH" = "null" ] && break
337-
sleep "$delay"
338-
done
339-
if [ "$HEALTH" = "null" ]; then
340-
pass "paused runtime has health=null"
341-
else
342-
fail "paused runtime health" "expected null, got $HEALTH"
343-
fi
344-
345351
# flow run without --resume should fail
346352
PAUSED_ERR=$($CLI -o json flow run "$FLOW_NAME" -r "$RUNTIME_UUID" 2>&1 || true)
347353
if echo "$PAUSED_ERR" | grep -qi "paused\|resume\|no health status\|initializing\|starting"; then

tests/rest.py

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
ASCEND_INSTANCE_API_URL environment variables.
1313
"""
1414

15+
import argparse
1516
import base64
1617
import json
1718
import os
@@ -217,8 +218,10 @@ def run_flow(
217218
)
218219
else:
219220
health = runtime.get("health")
220-
if health != "running":
221+
if health and health != "running":
221222
raise RuntimeError(f"Runtime health is '{health}', expected 'running'.")
223+
if not health:
224+
raise RuntimeError("Runtime has no health status yet.")
222225
path = (
223226
f"/api/v1/runtimes/{_encode(runtime_uuid)}/flows/{_encode(flow_name)}:run"
224227
)
@@ -295,12 +298,51 @@ def print_summary():
295298
print("all tests passed")
296299

297300

301+
# ---------------------------------------------------------------------------
302+
# Helpers
303+
# ---------------------------------------------------------------------------
304+
305+
306+
def run_flow_with_retry(
307+
client: AscendClient,
308+
runtime_uuid: str,
309+
flow_name: str,
310+
spec: dict | None = None,
311+
resume: bool = False,
312+
) -> dict:
313+
"""Run a flow with retries for transient runtime readiness states."""
314+
last_error: Exception | None = None
315+
for delay in (0, 2, 3, 5, 5):
316+
if delay:
317+
time.sleep(delay)
318+
try:
319+
return client.run_flow(runtime_uuid, flow_name, spec=spec, resume=resume)
320+
except RuntimeError as e:
321+
msg = str(e).lower()
322+
if "starting" in msg or "no health status" in msg or "initializing" in msg:
323+
last_error = e
324+
continue
325+
raise
326+
327+
if last_error is not None:
328+
raise last_error
329+
raise RuntimeError("run_flow retry exhausted")
330+
331+
298332
# ---------------------------------------------------------------------------
299333
# Tests
300334
# ---------------------------------------------------------------------------
301335

302336

303337
def main():
338+
parser = argparse.ArgumentParser(description="Ascend REST API integration tests")
339+
parser.add_argument(
340+
"--runtime-id",
341+
default="ascend-tools",
342+
help="Runtime ID to test against (default: ascend-tools)",
343+
)
344+
args = parser.parse_args()
345+
304346
# ---------- preflight ----------
305347

306348
print("=== preflight ===")
@@ -340,7 +382,13 @@ def main():
340382

341383
check(True, f"list_runtimes returned {len(runtimes)} runtime(s)")
342384

343-
runtime = runtimes[0]
385+
by_id = client.list_runtimes(id=args.runtime_id)
386+
if by_id:
387+
runtime = by_id[0]
388+
else:
389+
print(f" runtime '{args.runtime_id}' not found, falling back to first runtime")
390+
runtime = runtimes[0]
391+
344392
runtime_uuid = runtime["uuid"]
345393
runtime_id = runtime["id"]
346394
is_paused = runtime.get("paused", False)
@@ -468,8 +516,8 @@ def main():
468516

469517
print("=== trigger flow run ===")
470518

471-
trigger = client.run_flow(runtime_uuid, flow_name, resume=is_paused)
472-
is_paused = False # runtime is now running
519+
# Runtime may already be paused from previous sessions; use resume=True for baseline trigger.
520+
trigger = run_flow_with_retry(client, runtime_uuid, flow_name, resume=True)
473521
check(isinstance(trigger, dict), "run_flow returns dict")
474522
check(
475523
trigger.get("event_uuid") is not None,
@@ -532,34 +580,44 @@ def main():
532580

533581
print("=== run_flow with spec ===")
534582

535-
trigger2 = client.run_flow(runtime_uuid, flow_name, spec={})
583+
trigger2 = run_flow_with_retry(
584+
client, runtime_uuid, flow_name, spec={}, resume=True
585+
)
536586
check(trigger2.get("event_uuid") is not None, "run_flow with empty spec works")
537587

538588
# spec with full_refresh
539-
trigger3_fr = client.run_flow(runtime_uuid, flow_name, spec={"full_refresh": True})
589+
trigger3_fr = run_flow_with_retry(
590+
client, runtime_uuid, flow_name, spec={"full_refresh": True}, resume=True
591+
)
540592
check(
541593
trigger3_fr.get("event_uuid") is not None,
542594
"run_flow with full_refresh=True works",
543595
)
544596

545597
# spec with parameters
546-
trigger3_params = client.run_flow(
547-
runtime_uuid, flow_name, spec={"parameters": {"key": "value"}}
598+
trigger3_params = run_flow_with_retry(
599+
client,
600+
runtime_uuid,
601+
flow_name,
602+
spec={"parameters": {"key": "value"}},
603+
resume=True,
548604
)
549605
check(
550606
trigger3_params.get("event_uuid") is not None,
551607
"run_flow with parameters works",
552608
)
553609

554610
# spec with multiple fields
555-
trigger3_multi = client.run_flow(
611+
trigger3_multi = run_flow_with_retry(
612+
client,
556613
runtime_uuid,
557614
flow_name,
558615
spec={
559616
"run_tests": False,
560617
"halt_flow_on_error": True,
561618
"runner_overrides": {"size": "Medium"},
562619
},
620+
resume=True,
563621
)
564622
check(
565623
trigger3_multi.get("event_uuid") is not None,
@@ -579,14 +637,6 @@ def main():
579637
got_paused = client.get_runtime(runtime_uuid)
580638
check(got_paused.get("paused") is True, "get_runtime confirms paused")
581639

582-
# health may take a moment to clear after pause (runtime pods shutting down)
583-
for delay in (1, 2, 3):
584-
if got_paused.get("health") is None:
585-
break
586-
time.sleep(delay)
587-
got_paused = client.get_runtime(runtime_uuid)
588-
check(got_paused.get("health") is None, "paused runtime has health=None")
589-
590640
# run_flow without resume should fail on a paused runtime
591641
try:
592642
client.run_flow(runtime_uuid, flow_name)
@@ -600,7 +650,7 @@ def main():
600650

601651
print("=== runtime resume via flow run ===")
602652

603-
trigger3 = client.run_flow(runtime_uuid, flow_name, resume=True)
653+
trigger3 = run_flow_with_retry(client, runtime_uuid, flow_name, resume=True)
604654
check(
605655
trigger3.get("event_uuid") is not None, "run_flow with resume=True succeeds"
606656
)

0 commit comments

Comments
 (0)