-
Notifications
You must be signed in to change notification settings - Fork 464
504 lines (441 loc) · 19.1 KB
/
test-integration.yml
File metadata and controls
504 lines (441 loc) · 19.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
name: Integration Tests
on:
pull_request_target:
branches:
- main
paths:
- 'manager/**'
- 'docker-proxy/**'
- 'hiclaw-controller/**'
- 'tests/**'
- '.github/workflows/test-integration.yml'
push:
branches:
- main
tags:
- 'v*'
paths:
- 'manager/**'
- 'docker-proxy/**'
- 'hiclaw-controller/**'
- 'tests/**'
workflow_dispatch:
inputs:
test_filter:
description: 'Test filter pattern (e.g. "01 02 14"). Leave empty to run all non-GitHub tests.'
required: false
default: ''
baseline_version:
description: 'Re-generate baseline for a specific release version (e.g. v1.2.3). Pulls published image, runs tests, uploads baseline to that release. Leave empty to run normal integration tests.'
required: false
default: ''
worker_runtime:
description: 'Worker runtime to use'
required: false
type: choice
options:
- openclaw
- copaw
default: 'openclaw'
model:
description: 'LLM model to use'
required: false
default: 'qwen3.5-plus'
env:
MANAGER_IMAGE: hiclaw/manager-agent:ci-test
MANAGER_COPAW_IMAGE: hiclaw/manager-copaw:ci-test
WORKER_IMAGE: hiclaw/worker-agent:ci-test
COPAW_WORKER_IMAGE: hiclaw/copaw-worker:ci-test
DOCKER_PROXY_IMAGE: hiclaw/docker-proxy:ci-test
# Tests that do not require a GitHub token
NON_GITHUB_TESTS: "01 02 03 04 05 06 14 15 17 18 19 20 100"
jobs:
integration-tests:
runs-on: ubuntu-latest
timeout-minutes: 120
permissions:
contents: write
pull-requests: write
actions: read
steps:
- name: Free Up Disk Space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha || github.sha }}
repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Pull base images
run: docker pull higress-registry.us-west-1.cr.aliyuncs.com/higress/openclaw-base:latest
- name: Build images
run: |
RUNTIME="${{ inputs.worker_runtime || 'openclaw' }}"
BUILD_TARGETS="build-manager build-docker-proxy"
if [ "$RUNTIME" = "copaw" ]; then
BUILD_TARGETS="build-manager-copaw build-copaw-worker build-docker-proxy"
else
# Build copaw-worker too — team workers always use copaw runtime
BUILD_TARGETS="build-manager build-worker build-copaw-worker build-docker-proxy"
fi
make ${BUILD_TARGETS} VERSION=ci-test HIGRESS_REGISTRY=higress-registry.us-west-1.cr.aliyuncs.com \
DOCKER_BUILD_ARGS="--build-arg APT_MIRROR= --build-arg PIP_INDEX_URL=https://pypi.org/simple/"
- name: Install dependencies
run: sudo apt-get update && sudo apt-get install -y jq curl unzip
- name: Install HiClaw
env:
HICLAW_LLM_API_KEY: ${{ secrets.HICLAW_LLM_API_KEY }}
run: |
RUNTIME="${{ inputs.worker_runtime || 'openclaw' }}"
MODEL="${{ inputs.model || 'qwen3.5-plus' }}"
if [ "$RUNTIME" = "copaw" ]; then
WORKER_IMG="${{ env.COPAW_WORKER_IMAGE }}"
MANAGER_IMG="${{ env.MANAGER_COPAW_IMAGE }}"
MANAGER_RUNTIME="copaw"
else
WORKER_IMG="${{ env.WORKER_IMAGE }}"
MANAGER_IMG="${{ env.MANAGER_IMAGE }}"
MANAGER_RUNTIME="openclaw"
fi
HICLAW_NON_INTERACTIVE=1 \
HICLAW_VERSION=ci-test \
HICLAW_MOUNT_SOCKET=1 \
HICLAW_MATRIX_E2EE=0 \
HICLAW_LLM_PROVIDER=qwen \
HICLAW_MANAGER_RUNTIME="$MANAGER_RUNTIME" \
HICLAW_DEFAULT_WORKER_RUNTIME="$RUNTIME" \
HICLAW_DEFAULT_MODEL="$MODEL" \
HICLAW_INSTALL_MANAGER_IMAGE="$MANAGER_IMG" \
HICLAW_INSTALL_MANAGER_COPAW_IMAGE="${{ env.MANAGER_COPAW_IMAGE }}" \
HICLAW_INSTALL_WORKER_IMAGE="$WORKER_IMG" \
HICLAW_INSTALL_COPAW_WORKER_IMAGE=${{ env.COPAW_WORKER_IMAGE }} \
HICLAW_INSTALL_DOCKER_PROXY_IMAGE=${{ env.DOCKER_PROXY_IMAGE }} \
bash ./install/hiclaw-install.sh manager
- name: Wait for Manager to be ready
run: |
for i in {1..60}; do
if docker exec hiclaw-manager curl -sf http://127.0.0.1:8001/ >/dev/null 2>&1; then
if docker exec hiclaw-manager curl -sf http://127.0.0.1:6167/_matrix/client/versions >/dev/null 2>&1; then
echo "Manager ready, waiting 60s for agent initialization..."
sleep 60
exit 0
fi
fi
echo "Waiting... ($i/60)"
sleep 5
done
echo "Manager did not become ready in time"
docker logs hiclaw-manager --tail 50
exit 1
- name: Enable YOLO mode
run: docker exec hiclaw-manager touch /root/manager-workspace/yolo-mode
- name: Run integration tests
id: tests
env:
HICLAW_LLM_API_KEY: ${{ secrets.HICLAW_LLM_API_KEY }}
run: |
FILTER="${{ github.event.inputs.test_filter }}"
[ -z "$FILTER" ] && FILTER="${NON_GITHUB_TESTS}"
TEST_GATEWAY_PORT=18080 TEST_CONSOLE_PORT=18001 TEST_MANAGER_CONTAINER=hiclaw-manager \
./tests/run-all-tests.sh --skip-build --use-existing --test-filter "$FILTER"
# ============================================================
# Metrics: download latest release baseline for comparison
# ============================================================
- name: Download latest release baseline
if: github.event_name == 'pull_request_target'
continue-on-error: true
run: |
mkdir -p baseline-metrics
# Find latest release with a metrics-baseline.json asset
# Note: gh release list --json does not support the "assets" field,
# so we use the GitHub API directly.
LATEST=$(gh api repos/${{ github.repository }}/releases --paginate --jq \
'[.[] | select(.assets | map(.name) | any(. == "metrics-baseline.json"))] | .[0].tag_name' \
2>/dev/null || true)
if [ -n "$LATEST" ] && [ "$LATEST" != "null" ]; then
echo "Downloading baseline from release $LATEST"
gh release download "$LATEST" --pattern "metrics-baseline.json" --dir baseline-metrics --repo "${{ github.repository }}"
else
echo "No release baseline found"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Generate metrics comparison and post PR comment
if: github.event_name == 'pull_request_target'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
source tests/lib/agent-metrics.sh
# Collect current summary across all tests that ran
FILTER="${{ github.event.inputs.test_filter }}"
[ -z "$FILTER" ] && FILTER="${NON_GITHUB_TESTS}"
TEST_NAMES=$(echo "$FILTER" | tr ' ' '\n' | while read n; do
f=$(ls tests/output/metrics-*${n}*.json 2>/dev/null | head -1)
[ -n "$f" ] && basename "$f" .json | sed 's/^metrics-//'
done | tr '\n' ' ')
CURRENT=$(generate_metrics_summary $TEST_NAMES)
BASELINE=""
if [ -f "baseline-metrics/metrics-baseline.json" ]; then
BASELINE=$(cat baseline-metrics/metrics-baseline.json)
fi
COMPARISON=$(compare_metrics_with_baseline "$CURRENT" "$BASELINE")
REPORT=$(generate_comparison_markdown "$COMPARISON")
# Append debug log artifact link
ARTIFACT_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
REPORT="${REPORT}
---
📦 [Download debug logs & test artifacts](${ARTIFACT_URL}#artifacts)"
mkdir -p tests/output
echo "$COMPARISON" > tests/output/metrics-comparison.json
echo "$REPORT" > tests/output/metrics-report.md
PR_NUM="${{ github.event.pull_request.number }}"
REPO="${{ github.repository }}"
EXISTING=$(gh api "repos/$REPO/issues/$PR_NUM/comments" \
--jq '.[] | select(.body | startswith("## 📊 CI Metrics Report")) | .id' | head -1)
if [ -n "$EXISTING" ]; then
gh api --method PATCH "repos/$REPO/issues/comments/$EXISTING" -f body="$REPORT"
else
gh api --method POST "repos/$REPO/issues/$PR_NUM/comments" -f body="$REPORT"
fi
# ============================================================
# Artifacts
# ============================================================
- name: Export debug logs
if: always()
continue-on-error: true
run: |
python3 scripts/export-debug-log.py --range 2h --no-redact
DEBUG_DIR=$(ls -td debug-log/*/ 2>/dev/null | head -1)
if [ -n "$DEBUG_DIR" ]; then
echo "Debug log exported: $DEBUG_DIR"
else
echo "No debug logs generated"
fi
- name: Post failure comment to PR
if: github.event_name == 'pull_request_target' && failure()
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
PR_NUM="${{ github.event.pull_request.number }}"
REPO="${{ github.repository }}"
ARTIFACT_URL="https://github.com/${REPO}/actions/runs/${{ github.run_id }}"
# Collect test result summary
TEST_SUMMARY=""
if ls tests/output/result-*.txt 1>/dev/null 2>&1; then
TEST_SUMMARY=$(cat tests/output/result-*.txt 2>/dev/null | head -50)
fi
# Collect debug log tail
DEBUG_TAIL=""
DEBUG_DIR=$(ls -td debug-log/*/ 2>/dev/null | head -1)
if [ -n "$DEBUG_DIR" ]; then
# Get the last 80 lines from agent logs
DEBUG_TAIL=$(find "$DEBUG_DIR" -name "*.log" -exec tail -20 {} + 2>/dev/null | tail -80)
fi
BODY="## ❌ Integration Tests Failed
**Commit:** ${{ github.event.pull_request.head.sha }}
**Workflow run:** [#${{ github.run_number }}](${ARTIFACT_URL})
<details>
<summary>Test Results</summary>
\`\`\`
${TEST_SUMMARY:-No test output captured.}
\`\`\`
</details>
<details>
<summary>Debug Log (tail)</summary>
\`\`\`
${DEBUG_TAIL:-No debug logs available.}
\`\`\`
</details>
---
📦 [Download full debug logs & test artifacts](${ARTIFACT_URL}#artifacts)"
# Update or create comment
EXISTING=$(gh api "repos/$REPO/issues/$PR_NUM/comments" \
--jq '.[] | select(.body | startswith("## ❌ Integration Tests Failed")) | .id' | head -1)
if [ -n "$EXISTING" ]; then
gh api --method PATCH "repos/$REPO/issues/comments/$EXISTING" -f body="$BODY"
else
gh api --method POST "repos/$REPO/issues/$PR_NUM/comments" -f body="$BODY"
fi
- name: Collect test artifacts
if: always()
run: |
mkdir -p test-artifacts
[ -d "tests/output" ] && cp -r tests/output/* test-artifacts/ 2>/dev/null || true
[ -d "debug-log" ] && cp -r debug-log test-artifacts/debug-log 2>/dev/null || true
- name: Upload test artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: test-artifacts-${{ github.sha }}
path: test-artifacts/
retention-days: 7
# ============================================================
# Release baseline: generate and upload on tag push
# ============================================================
- name: Generate release baseline
if: startsWith(github.ref, 'refs/tags/v')
run: |
source tests/lib/agent-metrics.sh
TEST_NAMES=$(echo "$NON_GITHUB_TESTS" | tr ' ' '\n' | while read n; do
f=$(ls tests/output/metrics-*${n}*.json 2>/dev/null | head -1)
[ -n "$f" ] && basename "$f" .json | sed 's/^metrics-//'
done | tr '\n' ' ')
generate_metrics_summary $TEST_NAMES > metrics-baseline.json
echo "Baseline generated for ${GITHUB_REF_NAME}"
cat metrics-baseline.json | jq '{totals: .totals, by_role: .by_role}'
- name: Upload baseline to release
if: startsWith(github.ref, 'refs/tags/v')
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh release upload "${GITHUB_REF_NAME}" metrics-baseline.json \
--repo "${{ github.repository }}" --clobber
echo "✅ Baseline uploaded to release ${GITHUB_REF_NAME}"
- name: Upload debug log to release
if: startsWith(github.ref, 'refs/tags/v') && always()
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [ -d debug-log ]; then
cd debug-log && zip -r ../debug-log.zip . && cd ..
gh release upload "${GITHUB_REF_NAME}" debug-log.zip \
--repo "${{ github.repository }}" --clobber
echo "✅ Debug log uploaded to release ${GITHUB_REF_NAME}"
fi
- name: Cleanup
if: always()
run: make uninstall || true && docker system prune -af || true
- name: Test Summary
if: always()
run: |
echo "### Integration Test Summary" >> $GITHUB_STEP_SUMMARY
echo "- Tests: \`$NON_GITHUB_TESTS\`" >> $GITHUB_STEP_SUMMARY
echo "- Manager Image: \`${{ env.MANAGER_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY
echo "- Worker Runtime: \`${{ inputs.worker_runtime || 'openclaw' }}\`" >> $GITHUB_STEP_SUMMARY
echo "- Model: \`${{ inputs.model || 'qwen3.5-plus' }}\`" >> $GITHUB_STEP_SUMMARY
# ============================================================
# Manual baseline re-generation for a specific release version
# ============================================================
release-baseline:
if: github.event_name == 'workflow_dispatch' && inputs.baseline_version != ''
runs-on: ubuntu-latest
timeout-minutes: 120
permissions:
contents: write
steps:
- name: Resolve target version
id: version
run: |
echo "version=${{ inputs.baseline_version }}" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ steps.version.outputs.version }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install dependencies
run: sudo apt-get update && sudo apt-get install -y jq curl unzip
- name: Pull published images
run: |
VERSION=${{ steps.version.outputs.version }}
REGISTRY=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress
docker pull ${REGISTRY}/hiclaw-manager:${VERSION}
docker pull ${REGISTRY}/hiclaw-worker:${VERSION}
docker pull ${REGISTRY}/hiclaw-docker-proxy:${VERSION}
docker tag ${REGISTRY}/hiclaw-manager:${VERSION} hiclaw/manager-agent:${VERSION}
docker tag ${REGISTRY}/hiclaw-worker:${VERSION} hiclaw/worker-agent:${VERSION}
docker tag ${REGISTRY}/hiclaw-docker-proxy:${VERSION} hiclaw/docker-proxy:${VERSION}
- name: Install HiClaw
env:
HICLAW_LLM_API_KEY: ${{ secrets.HICLAW_LLM_API_KEY }}
run: |
VERSION=${{ steps.version.outputs.version }}
HICLAW_NON_INTERACTIVE=1 \
HICLAW_VERSION=$VERSION \
HICLAW_MOUNT_SOCKET=1 \
HICLAW_MATRIX_E2EE=0 \
HICLAW_LLM_PROVIDER=qwen \
HICLAW_INSTALL_MANAGER_IMAGE=hiclaw/manager-agent:${VERSION} \
HICLAW_INSTALL_WORKER_IMAGE=hiclaw/worker-agent:${VERSION} \
HICLAW_INSTALL_DOCKER_PROXY_IMAGE=hiclaw/docker-proxy:${VERSION} \
bash ./install/hiclaw-install.sh manager
- name: Wait for Manager to be ready
run: |
for i in {1..60}; do
if docker exec hiclaw-manager curl -sf http://127.0.0.1:8001/ >/dev/null 2>&1; then
if docker exec hiclaw-manager curl -sf http://127.0.0.1:6167/_matrix/client/versions >/dev/null 2>&1; then
echo "Manager ready, waiting 60s for agent initialization..."
sleep 60
exit 0
fi
fi
echo "Waiting... ($i/60)"
sleep 5
done
exit 1
- name: Enable YOLO mode
run: docker exec hiclaw-manager touch /root/manager-workspace/yolo-mode
- name: Run integration tests
env:
HICLAW_LLM_API_KEY: ${{ secrets.HICLAW_LLM_API_KEY }}
run: |
TEST_GATEWAY_PORT=18080 TEST_CONSOLE_PORT=18001 TEST_MANAGER_CONTAINER=hiclaw-manager \
./tests/run-all-tests.sh --skip-build --use-existing --test-filter "$NON_GITHUB_TESTS"
- name: Generate release baseline
run: |
VERSION=${{ steps.version.outputs.version }}
source tests/lib/agent-metrics.sh
TEST_NAMES=$(echo "$NON_GITHUB_TESTS" | tr ' ' '\n' | while read n; do
f=$(ls tests/output/metrics-*${n}*.json 2>/dev/null | head -1)
[ -n "$f" ] && basename "$f" .json | sed 's/^metrics-//'
done | tr '\n' ' ')
generate_metrics_summary $TEST_NAMES > metrics-baseline.json
echo "Baseline generated for version ${VERSION}"
cat metrics-baseline.json | jq '{totals: .totals, by_role: .by_role}'
- name: Export debug logs
if: always()
continue-on-error: true
run: |
python3 scripts/export-debug-log.py --range 2h --no-redact
DEBUG_DIR=$(ls -td debug-log/*/ 2>/dev/null | head -1)
if [ -n "$DEBUG_DIR" ]; then
cd debug-log && zip -r ../debug-log.zip . && cd ..
echo "Debug log exported: $(du -sh debug-log.zip | cut -f1)"
else
echo "No debug logs generated"
fi
- name: Upload baseline to release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
VERSION=${{ steps.version.outputs.version }}
gh release upload "${VERSION}" metrics-baseline.json \
--repo "${{ github.repository }}" --clobber
echo "✅ Baseline uploaded to release ${VERSION}"
- name: Upload debug log to release
if: always()
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
VERSION=${{ steps.version.outputs.version }}
if [ -f debug-log.zip ]; then
gh release upload "${VERSION}" debug-log.zip \
--repo "${{ github.repository }}" --clobber
echo "✅ Debug log uploaded to release ${VERSION}"
else
echo "No debug log to upload"
fi
- name: Cleanup
if: always()
run: make uninstall || true && docker system prune -af || true