CI Notify #1162
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI Notify | |
| on: | |
| workflow_run: | |
| workflows: ["CI Validation (MUSA GPU)"] | |
| types: [completed] | |
| permissions: | |
| actions: read | |
| contents: read | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.workflow_run.id }} | |
| cancel-in-progress: true | |
| env: | |
| MAX_AUTO_RERUNS: 5 | |
| MAX_RERUNS_PREPARE_METADATA: 5 | |
| MAX_RERUNS_FORMAT_CHECK: 5 | |
| MAX_RERUNS_BUILD_CURRENT: 5 | |
| MAX_RERUNS_INTEGRATION_TEST: 5 | |
| MAX_RERUNS_BUILD_BASELINE: 5 | |
| MAX_RERUNS_T_PERFORMANCE: 5 | |
| MAX_RERUNS_T_ACCURACY: 5 | |
| MAX_RERUNS_BD_MODEL_1: 5 | |
| MAX_RERUNS_BD_MODEL_2: 5 | |
| MAX_RERUNS_BD_MODEL_3: 5 | |
| MAX_RERUNS_TRAINING: 5 | |
| MAX_RERUNS_FINAL_SUMMARY: 5 | |
| jobs: | |
| notify: | |
| name: Send DingTalk Notification | |
| if: ${{ github.event.workflow_run.conclusion != 'cancelled' }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Fetch run metadata | |
| id: runmeta | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| RUN_ID: ${{ github.event.workflow_run.id }} | |
| EVENT_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt || 1 }} | |
| run: | | |
| set -euo pipefail | |
| RUN_FILE="$RUNNER_TEMP/workflow-run.json" | |
| gh api "repos/$REPO/actions/runs/$RUN_ID" > "$RUN_FILE" | |
| echo "event_run_attempt=$EVENT_RUN_ATTEMPT" >> "$GITHUB_OUTPUT" | |
| echo "current_run_attempt=$(jq -r '.run_attempt // 1' "$RUN_FILE")" >> "$GITHUB_OUTPUT" | |
| echo "current_status=$(jq -r '.status // "unknown"' "$RUN_FILE")" >> "$GITHUB_OUTPUT" | |
| echo "current_conclusion=$(jq -r '.conclusion // "unknown"' "$RUN_FILE")" >> "$GITHUB_OUTPUT" | |
| { | |
| echo "pull_requests_json<<EOF" | |
| jq -c '.pull_requests // []' "$RUN_FILE" | |
| echo "EOF" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Wait for failure run to stabilize | |
| id: stability | |
| if: github.event.workflow_run.conclusion == 'failure' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| RUN_ID: ${{ github.event.workflow_run.id }} | |
| EVENT_RUN_ATTEMPT: ${{ steps.runmeta.outputs.event_run_attempt }} | |
| run: | | |
| set -euo pipefail | |
| STALE="no" | |
| REASON="run remained on the completed failure attempt" | |
| SETTLED_ATTEMPT="$EVENT_RUN_ATTEMPT" | |
| for i in 1 2 3 4 5 6; do | |
| RUN_FILE="$RUNNER_TEMP/workflow-run-stability-$i.json" | |
| gh api "repos/$REPO/actions/runs/$RUN_ID" > "$RUN_FILE" | |
| SETTLED_ATTEMPT="$(jq -r '.run_attempt // 1' "$RUN_FILE")" | |
| RUN_STATUS="$(jq -r '.status // "unknown"' "$RUN_FILE")" | |
| if [[ "$SETTLED_ATTEMPT" != "$EVENT_RUN_ATTEMPT" ]]; then | |
| STALE="yes" | |
| REASON="run advanced to attempt $SETTLED_ATTEMPT before notification" | |
| break | |
| fi | |
| if [[ "$RUN_STATUS" != "completed" ]]; then | |
| STALE="yes" | |
| REASON="run moved back to status $RUN_STATUS before notification" | |
| break | |
| fi | |
| if [[ "$i" != "6" ]]; then | |
| sleep 5 | |
| fi | |
| done | |
| echo "stale=$STALE" >> "$GITHUB_OUTPUT" | |
| echo "reason=$REASON" >> "$GITHUB_OUTPUT" | |
| echo "settled_attempt=$SETTLED_ATTEMPT" >> "$GITHUB_OUTPUT" | |
| - name: Compute rerun state | |
| id: retry_state | |
| uses: actions/github-script@v7 | |
| env: | |
| RUN_ATTEMPT: ${{ steps.runmeta.outputs.current_run_attempt }} | |
| STABILITY_STALE: ${{ steps.stability.outputs.stale || 'no' }} | |
| STABILITY_REASON: ${{ steps.stability.outputs.reason || '' }} | |
| MAX_AUTO_RERUNS: ${{ env.MAX_AUTO_RERUNS }} | |
| MAX_RERUNS_PREPARE_METADATA: ${{ env.MAX_RERUNS_PREPARE_METADATA }} | |
| MAX_RERUNS_FORMAT_CHECK: ${{ env.MAX_RERUNS_FORMAT_CHECK }} | |
| MAX_RERUNS_BUILD_CURRENT: ${{ env.MAX_RERUNS_BUILD_CURRENT }} | |
| MAX_RERUNS_INTEGRATION_TEST: ${{ env.MAX_RERUNS_INTEGRATION_TEST }} | |
| MAX_RERUNS_BUILD_BASELINE: ${{ env.MAX_RERUNS_BUILD_BASELINE }} | |
| MAX_RERUNS_T_PERFORMANCE: ${{ env.MAX_RERUNS_T_PERFORMANCE }} | |
| MAX_RERUNS_T_ACCURACY: ${{ env.MAX_RERUNS_T_ACCURACY }} | |
| MAX_RERUNS_BD_MODEL_1: ${{ env.MAX_RERUNS_BD_MODEL_1 }} | |
| MAX_RERUNS_BD_MODEL_2: ${{ env.MAX_RERUNS_BD_MODEL_2 }} | |
| MAX_RERUNS_BD_MODEL_3: ${{ env.MAX_RERUNS_BD_MODEL_3 }} | |
| MAX_RERUNS_TRAINING: ${{ env.MAX_RERUNS_TRAINING }} | |
| MAX_RERUNS_FINAL_SUMMARY: ${{ env.MAX_RERUNS_FINAL_SUMMARY }} | |
| with: | |
| script: | | |
| const owner = context.repo.owner; | |
| const repo = context.repo.repo; | |
| const runId = context.payload.workflow_run.id; | |
| const runConclusion = context.payload.workflow_run.conclusion; | |
| const attempt = Number(process.env.RUN_ATTEMPT || "1"); | |
| const defaultLimit = Number(process.env.MAX_AUTO_RERUNS || "5"); | |
| if (process.env.STABILITY_STALE === "yes") { | |
| core.setOutput("will_auto_rerun", "no"); | |
| core.setOutput("reason", process.env.STABILITY_REASON || "stale workflow_run event"); | |
| return; | |
| } | |
| const getJobLimit = (jobName) => { | |
| const envVarName = `MAX_RERUNS_${jobName | |
| .replace(/[^a-zA-Z0-9_]/g, "_") | |
| .toUpperCase()}`; | |
| return Number(process.env[envVarName] || defaultLimit || "5"); | |
| }; | |
| if (runConclusion !== "failure") { | |
| core.setOutput("will_auto_rerun", "no"); | |
| core.setOutput("reason", "run is not in failure state"); | |
| return; | |
| } | |
| const failedJobCounts = {}; | |
| const currentlyFailedJobs = new Set(); | |
| for (let i = 1; i <= attempt; i++) { | |
| const jobs = await github.paginate( | |
| github.rest.actions.listJobsForWorkflowRunAttempt, | |
| { | |
| owner, | |
| repo, | |
| run_id: runId, | |
| attempt_number: i, | |
| per_page: 100, | |
| } | |
| ); | |
| for (const job of jobs) { | |
| if (job.conclusion === "failure") { | |
| failedJobCounts[job.name] = (failedJobCounts[job.name] || 0) + 1; | |
| if (i === attempt) { | |
| currentlyFailedJobs.add(job.name); | |
| } | |
| } | |
| } | |
| } | |
| if (currentlyFailedJobs.size === 0) { | |
| core.setOutput("will_auto_rerun", "no"); | |
| core.setOutput("reason", "no currently failed jobs in latest attempt"); | |
| return; | |
| } | |
| const exhaustedJobs = []; | |
| const retryableJobs = []; | |
| const detailLines = []; | |
| for (const jobName of [...currentlyFailedJobs].sort()) { | |
| const limit = getJobLimit(jobName); | |
| const failedAttempts = failedJobCounts[jobName] || 1; | |
| const rerunsUsed = Math.max(0, failedAttempts - 1); | |
| const maxReruns = Math.max(0, limit - 1); | |
| detailLines.push( | |
| `${jobName}: total attempts ${failedAttempts}/${limit} including the first run, auto reruns used ${rerunsUsed}/${maxReruns}` | |
| ); | |
| if (failedAttempts >= limit) { | |
| exhaustedJobs.push(jobName); | |
| } else { | |
| retryableJobs.push(jobName); | |
| } | |
| } | |
| if (retryableJobs.length > 0 && exhaustedJobs.length === 0) { | |
| core.setOutput("will_auto_rerun", "yes"); | |
| core.setOutput( | |
| "reason", | |
| `current failed jobs still have retry budget: ${detailLines.join("; ")}` | |
| ); | |
| return; | |
| } | |
| core.setOutput("will_auto_rerun", "no"); | |
| core.setOutput( | |
| "reason", | |
| exhaustedJobs.length > 0 | |
| ? `auto rerun stopped because at least one current failed job exhausted its limit: ${detailLines.join("; ")}` | |
| : `no retryable failed jobs remain: ${detailLines.join("; ")}` | |
| ); | |
| - name: Decide notification policy | |
| id: policy | |
| env: | |
| RUN_EVENT: ${{ github.event.workflow_run.event }} | |
| RUN_CONCLUSION: ${{ github.event.workflow_run.conclusion }} | |
| CURRENT_STATUS: ${{ steps.runmeta.outputs.current_status }} | |
| CURRENT_RUN_ATTEMPT: ${{ steps.runmeta.outputs.current_run_attempt }} | |
| EVENT_RUN_ATTEMPT: ${{ steps.runmeta.outputs.event_run_attempt }} | |
| STABILITY_STALE: ${{ steps.stability.outputs.stale || 'no' }} | |
| STABILITY_REASON: ${{ steps.stability.outputs.reason || '' }} | |
| WILL_AUTO_RERUN: ${{ steps.retry_state.outputs.will_auto_rerun }} | |
| RETRY_REASON: ${{ steps.retry_state.outputs.reason }} | |
| run: | | |
| set -euo pipefail | |
| NOTIFY="no" | |
| STYLE="skip" | |
| REASON="notification skipped by policy" | |
| if [[ "$STABILITY_STALE" == "yes" ]]; then | |
| REASON="$STABILITY_REASON" | |
| elif [[ "$CURRENT_STATUS" != "completed" ]]; then | |
| REASON="run is currently $CURRENT_STATUS" | |
| elif [[ "$CURRENT_RUN_ATTEMPT" != "$EVENT_RUN_ATTEMPT" ]]; then | |
| REASON="run attempt advanced from $EVENT_RUN_ATTEMPT to $CURRENT_RUN_ATTEMPT" | |
| elif [[ "$RUN_CONCLUSION" == "failure" && "$WILL_AUTO_RERUN" == "yes" ]]; then | |
| REASON="run failed and will be auto-rerun: $RETRY_REASON" | |
| elif [[ "$RUN_EVENT" == "schedule" ]]; then | |
| NOTIFY="yes" | |
| STYLE="daily" | |
| REASON="daily run should always notify" | |
| elif [[ "$RUN_EVENT" == "pull_request" ]]; then | |
| NOTIFY="yes" | |
| STYLE="pr" | |
| REASON="pull request should always notify" | |
| elif [[ "$RUN_EVENT" == "workflow_dispatch" ]]; then | |
| NOTIFY="yes" | |
| STYLE="manual" | |
| REASON="manual run should always notify" | |
| fi | |
| echo "notify=$NOTIFY" >> "$GITHUB_OUTPUT" | |
| echo "style=$STYLE" >> "$GITHUB_OUTPUT" | |
| echo "reason=$REASON" >> "$GITHUB_OUTPUT" | |
| - name: Print notification policy | |
| run: | | |
| echo "notify=${{ steps.policy.outputs.notify }}" | |
| echo "style=${{ steps.policy.outputs.style }}" | |
| echo "reason=${{ steps.policy.outputs.reason }}" | |
| echo "event_run_attempt=${{ steps.runmeta.outputs.event_run_attempt }}" | |
| echo "current_run_attempt=${{ steps.runmeta.outputs.current_run_attempt }}" | |
| echo "will_auto_rerun=${{ steps.retry_state.outputs.will_auto_rerun }}" | |
| echo "retry_reason=${{ steps.retry_state.outputs.reason }}" | |
| - name: Fetch run artifacts | |
| id: artifacts | |
| if: steps.policy.outputs.notify == 'yes' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| RUN_ID: ${{ github.event.workflow_run.id }} | |
| run: | | |
| set -euo pipefail | |
| ARTIFACTS_FILE="$RUNNER_TEMP/workflow-run-artifacts.json" | |
| gh api "repos/$REPO/actions/runs/$RUN_ID/artifacts?per_page=100" > "$ARTIFACTS_FILE" | |
| echo "artifacts_file=$ARTIFACTS_FILE" >> "$GITHUB_OUTPUT" | |
| - name: Download final summary artifact | |
| id: summary | |
| if: steps.policy.outputs.notify == 'yes' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| ARTIFACTS_FILE: ${{ steps.artifacts.outputs.artifacts_file }} | |
| REPO: ${{ github.repository }} | |
| run: | | |
| set -euo pipefail | |
| ARTIFACT_ID="$(jq -r '.artifacts[] | select(.name=="summary-final") | .id' "$ARTIFACTS_FILE" | head -n1 || true)" | |
| if [[ -z "$ARTIFACT_ID" ]]; then | |
| echo "found=no" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| ZIP_FILE="$RUNNER_TEMP/summary-final.zip" | |
| OUT_DIR="$RUNNER_TEMP/summary-final" | |
| mkdir -p "$OUT_DIR" | |
| gh api "repos/$REPO/actions/artifacts/$ARTIFACT_ID/zip" > "$ZIP_FILE" | |
| unzip -q "$ZIP_FILE" -d "$OUT_DIR" | |
| SUMMARY_FILE="$(find "$OUT_DIR" -type f -name 'final-summary.md' | head -n1 || true)" | |
| SUMMARY_JSON="$(find "$OUT_DIR" -type f -name 'final-summary.json' | head -n1 || true)" | |
| echo "found=yes" >> "$GITHUB_OUTPUT" | |
| echo "summary_file=$SUMMARY_FILE" >> "$GITHUB_OUTPUT" | |
| echo "summary_json=$SUMMARY_JSON" >> "$GITHUB_OUTPUT" | |
| - name: Fetch workflow jobs | |
| id: jobs | |
| if: steps.policy.outputs.notify == 'yes' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| RUN_ID: ${{ github.event.workflow_run.id }} | |
| ATTEMPT: ${{ steps.runmeta.outputs.current_run_attempt }} | |
| MAX_AUTO_RERUNS: ${{ env.MAX_AUTO_RERUNS }} | |
| MAX_RERUNS_PREPARE_METADATA: ${{ env.MAX_RERUNS_PREPARE_METADATA }} | |
| MAX_RERUNS_FORMAT_CHECK: ${{ env.MAX_RERUNS_FORMAT_CHECK }} | |
| MAX_RERUNS_BUILD_CURRENT: ${{ env.MAX_RERUNS_BUILD_CURRENT }} | |
| MAX_RERUNS_INTEGRATION_TEST: ${{ env.MAX_RERUNS_INTEGRATION_TEST }} | |
| MAX_RERUNS_BUILD_BASELINE: ${{ env.MAX_RERUNS_BUILD_BASELINE }} | |
| MAX_RERUNS_T_PERFORMANCE: ${{ env.MAX_RERUNS_T_PERFORMANCE }} | |
| MAX_RERUNS_T_ACCURACY: ${{ env.MAX_RERUNS_T_ACCURACY }} | |
| MAX_RERUNS_BD_MODEL_1: ${{ env.MAX_RERUNS_BD_MODEL_1 }} | |
| MAX_RERUNS_BD_MODEL_2: ${{ env.MAX_RERUNS_BD_MODEL_2 }} | |
| MAX_RERUNS_BD_MODEL_3: ${{ env.MAX_RERUNS_BD_MODEL_3 }} | |
| MAX_RERUNS_TRAINING: ${{ env.MAX_RERUNS_TRAINING }} | |
| MAX_RERUNS_FINAL_SUMMARY: ${{ env.MAX_RERUNS_FINAL_SUMMARY }} | |
| run: | | |
| set -euo pipefail | |
| JOBS_FILE="$RUNNER_TEMP/workflow-run-jobs.json" | |
| gh api "repos/$REPO/actions/runs/$RUN_ID/attempts/$ATTEMPT/jobs?per_page=100" > "$JOBS_FILE" | |
| echo "jobs_file=$JOBS_FILE" >> "$GITHUB_OUTPUT" | |
| RETRY_FILE="$RUNNER_TEMP/workflow-run-retry-info.json" | |
| node - "$ATTEMPT" "$REPO" "$RUN_ID" > "$RETRY_FILE" <<'NODE_SCRIPT' | |
| const cp = require('child_process'); | |
| const attempt = parseInt(process.argv[2], 10) || 1; | |
| const repo = process.argv[3]; | |
| const runId = process.argv[4]; | |
| const defaultLimit = Number(process.env.MAX_AUTO_RERUNS || "5"); | |
| const retryInfo = {}; | |
| const getJobLimit = (jobName) => { | |
| const envVarName = `MAX_RERUNS_${jobName.replace(/[^a-zA-Z0-9_]/g, "_").toUpperCase()}`; | |
| return Number(process.env[envVarName] || defaultLimit || "5"); | |
| }; | |
| for (let i = 1; i <= attempt; i++) { | |
| try { | |
| const out = cp.execSync( | |
| `gh api repos/${repo}/actions/runs/${runId}/attempts/${i}/jobs?per_page=100`, | |
| { encoding: 'utf8', stdio: ['pipe', 'pipe', 'ignore'] } | |
| ); | |
| const data = JSON.parse(out); | |
| for (const job of (data.jobs || [])) { | |
| if (!retryInfo[job.name]) { | |
| retryInfo[job.name] = { | |
| attempts: 0, | |
| reruns_used: 0, | |
| max_reruns: Math.max(0, getJobLimit(job.name) - 1), | |
| limit: getJobLimit(job.name), | |
| }; | |
| } | |
| retryInfo[job.name].attempts += 1; | |
| } | |
| } catch (error) {} | |
| } | |
| for (const jobName of Object.keys(retryInfo)) { | |
| retryInfo[jobName].reruns_used = Math.max(0, retryInfo[jobName].attempts - 1); | |
| } | |
| console.log(JSON.stringify(retryInfo)); | |
| NODE_SCRIPT | |
| echo "retry_file=$RETRY_FILE" >> "$GITHUB_OUTPUT" | |
| - name: Resolve PR context | |
| id: prctx | |
| if: steps.policy.outputs.notify == 'yes' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| HEAD_SHA: ${{ github.event.workflow_run.head_sha }} | |
| RUN_EVENT: ${{ github.event.workflow_run.event }} | |
| RUN_PULL_REQUESTS_JSON: ${{ steps.runmeta.outputs.pull_requests_json }} | |
| run: | | |
| set -euo pipefail | |
| PRS_JSON="${RUN_PULL_REQUESTS_JSON:-[]}" | |
| if [[ -z "$PRS_JSON" ]]; then | |
| PRS_JSON="[]" | |
| fi | |
| if [[ "$PRS_JSON" == "[]" && "$RUN_EVENT" == "pull_request" && -n "${HEAD_SHA:-}" ]]; then | |
| PRS_JSON="$(gh api -H "Accept: application/vnd.github+json" "repos/$REPO/commits/$HEAD_SHA/pulls" 2>/dev/null || echo '[]')" | |
| fi | |
| { | |
| echo "pull_requests_json<<EOF" | |
| echo "$PRS_JSON" | |
| echo "EOF" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Send DingTalk notification | |
| if: steps.policy.outputs.notify == 'yes' | |
| env: | |
| DINGTALK_WEBHOOK: ${{ secrets.DINGTALK_WEBHOOK }} | |
| RUN_STYLE: ${{ steps.policy.outputs.style }} | |
| REPOSITORY: ${{ github.repository }} | |
| RUN_ID: ${{ github.event.workflow_run.id }} | |
| RUN_URL: ${{ github.event.workflow_run.html_url }} | |
| RUN_EVENT: ${{ github.event.workflow_run.event }} | |
| RUN_CONCLUSION: ${{ github.event.workflow_run.conclusion }} | |
| HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }} | |
| HEAD_SHA: ${{ github.event.workflow_run.head_sha }} | |
| SUMMARY_FILE: ${{ steps.summary.outputs.summary_file }} | |
| SUMMARY_JSON: ${{ steps.summary.outputs.summary_json }} | |
| JOBS_FILE: ${{ steps.jobs.outputs.jobs_file }} | |
| RETRY_FILE: ${{ steps.jobs.outputs.retry_file }} | |
| ARTIFACTS_FILE: ${{ steps.artifacts.outputs.artifacts_file }} | |
| HEAD_REPO: ${{ github.event.workflow_run.head_repository.full_name }} | |
| PULL_REQUESTS_JSON: ${{ steps.prctx.outputs.pull_requests_json }} | |
| run: | | |
| set -euo pipefail | |
| if [[ -z "${DINGTALK_WEBHOOK:-}" ]]; then | |
| echo "::warning::DINGTALK_WEBHOOK is not configured; skip notification." | |
| exit 0 | |
| fi | |
| PAYLOAD="$(python3 - <<'PY' | |
| import json | |
| import os | |
| import re | |
| from datetime import datetime | |
| from pathlib import Path | |
| def parse_timestamp(value): | |
| if not value: | |
| return None | |
| try: | |
| return datetime.fromisoformat(value.replace("Z", "+00:00")) | |
| except ValueError: | |
| return None | |
| def format_duration(seconds): | |
| if seconds is None: | |
| return "N/A" | |
| seconds = max(0, int(seconds)) | |
| hours, rem = divmod(seconds, 3600) | |
| minutes, secs = divmod(rem, 60) | |
| parts = [] | |
| if hours: | |
| parts.append(f"{hours}小时") | |
| if minutes: | |
| parts.append(f"{minutes}分钟") | |
| if secs or not parts: | |
| parts.append(f"{secs}秒") | |
| return "".join(parts) | |
| def find_value(lines, prefix, default="n/a"): | |
| for line in lines: | |
| if line.startswith(prefix): | |
| value = line[len(prefix):].strip() | |
| return value or default | |
| return default | |
| def normalize_text(value): | |
| return re.sub(r"\s+", " ", str(value or "").strip()) | |
| def normalize_accuracy(value): | |
| text = normalize_text(value) | |
| match = re.search(r"\b(PASSED|FAILED)\b", text) | |
| if match: | |
| return match.group(1) | |
| return text or "n/a" | |
| def format_value(value): | |
| if value is None: | |
| return "n/a" | |
| if isinstance(value, float): | |
| return f"{value:.4f}" | |
| return str(value) | |
| def format_metric_lines(metrics): | |
| lines = [] | |
| for metric in metrics: | |
| name = metric.get("name", "Unknown Metric") | |
| kind = metric.get("kind", "unknown") | |
| if kind == "single": | |
| lines.append( | |
| f"- {name}: baseline={metric.get('baseline_ms', 'n/a')} ms, " | |
| f"current={metric.get('current_ms', 'n/a')} ms, " | |
| f"threshold={metric.get('threshold_ms', 'n/a')} ms, " | |
| f"result={metric.get('status', 'unknown')}" | |
| ) | |
| elif kind == "batch_compare": | |
| for row in metric.get("rows", []): | |
| lines.append( | |
| f"- {name} / bs{row.get('batch_size', 'n/a')}: " | |
| f"baseline={format_value(row.get('baseline_ms'))}, " | |
| f"current={format_value(row.get('current_ms'))}, " | |
| f"threshold={format_value(row.get('threshold_ms'))}, " | |
| f"result={row.get('result', 'unknown')}" | |
| ) | |
| elif kind == "accuracy": | |
| lines.append( | |
| f"- {name}: current={normalize_accuracy(metric.get('current', 'n/a'))}, " | |
| f"result={metric.get('status', 'unknown')}" | |
| ) | |
| return lines | |
| repo = os.environ["REPOSITORY"] | |
| run_style = os.environ["RUN_STYLE"] | |
| run_id = os.environ["RUN_ID"] | |
| run_url = os.environ["RUN_URL"] | |
| run_event = os.environ["RUN_EVENT"] | |
| run_conclusion = os.environ["RUN_CONCLUSION"] | |
| head_repo = os.environ.get("HEAD_REPO") or repo | |
| head_owner = head_repo.split("/", 1)[0] if "/" in head_repo else head_repo | |
| head_branch_raw = os.environ.get("HEAD_BRANCH") or "unknown" | |
| head_branch = f"{head_owner}/{head_branch_raw}" | |
| head_sha = os.environ.get("HEAD_SHA") or "" | |
| short_sha = head_sha[:8] if head_sha else "unknown" | |
| pull_requests = json.loads(os.environ.get("PULL_REQUESTS_JSON", "[]") or "[]") | |
| pr_number = pull_requests[0]["number"] if pull_requests else None | |
| pr_url = f"https://github.com/{repo}/pull/{pr_number}" if pr_number else "n/a" | |
| summary_json_payload = {} | |
| summary_json_file = os.environ.get("SUMMARY_JSON", "") | |
| if summary_json_file and Path(summary_json_file).is_file(): | |
| summary_json_payload = json.loads(Path(summary_json_file).read_text(encoding="utf-8")) | |
| summary_lines = [] | |
| summary_file = os.environ.get("SUMMARY_FILE", "") | |
| if summary_file and Path(summary_file).is_file(): | |
| summary_lines = Path(summary_file).read_text(encoding="utf-8", errors="ignore").splitlines() | |
| host = summary_json_payload.get("host", {}) | |
| host_name = host.get("name") or find_value(summary_lines, "- Host name: ", "unavailable") | |
| host_ip = host.get("ip") or find_value(summary_lines, "- Host IP: ", "unavailable") | |
| baseline_commit = summary_json_payload.get("baseline_commit") or find_value(summary_lines, "- Baseline commit: ", "unavailable") | |
| baseline_commit_short = baseline_commit[:6] if baseline_commit not in {"", "unavailable"} else baseline_commit | |
| metrics = summary_json_payload.get("metrics", []) | |
| jobs_payload = {} | |
| jobs_file = os.environ.get("JOBS_FILE", "") | |
| if jobs_file and Path(jobs_file).is_file(): | |
| jobs_payload = json.loads(Path(jobs_file).read_text(encoding="utf-8")) | |
| retry_payload = {} | |
| retry_file = os.environ.get("RETRY_FILE", "") | |
| if retry_file and Path(retry_file).is_file(): | |
| retry_payload = json.loads(Path(retry_file).read_text(encoding="utf-8")) | |
| artifacts_payload = {} | |
| artifacts_file = os.environ.get("ARTIFACTS_FILE", "") | |
| if artifacts_file and Path(artifacts_file).is_file(): | |
| artifacts_payload = json.loads(Path(artifacts_file).read_text(encoding="utf-8")) | |
| job_order = [ | |
| "Prepare Metadata", | |
| "Format Check", | |
| "Build Current", | |
| "Integration Test", | |
| "Build Baseline", | |
| "T Performance", | |
| "T Accuracy", | |
| "BD Model 1", | |
| "BD Model 2", | |
| "BD Model 3", | |
| "Training", | |
| "Final Summary", | |
| ] | |
| def sort_job(job): | |
| name = job.get("name", "") | |
| try: | |
| return job_order.index(name) | |
| except ValueError: | |
| return len(job_order) | |
| jobs = sorted(jobs_payload.get("jobs", []), key=sort_job) | |
| display_jobs = [] | |
| failed_jobs = [] | |
| for job in jobs: | |
| name = job.get("name", "unknown") | |
| if name == "Final Summary": | |
| continue | |
| conclusion = (job.get("conclusion") or job.get("status") or "unknown").lower() | |
| started = parse_timestamp(job.get("started_at")) | |
| completed = parse_timestamp(job.get("completed_at")) | |
| duration = (completed - started).total_seconds() if started and completed else None | |
| duration_text = format_duration(duration) | |
| status_map = { | |
| "success": "成功", | |
| "failure": "失败", | |
| "cancelled": "取消", | |
| "skipped": "跳过", | |
| "queued": "排队中", | |
| "pending": "等待中", | |
| "in_progress": "运行中", | |
| "action_required": "需处理", | |
| } | |
| display_jobs.append(f"- {name}: {status_map.get(conclusion, conclusion.upper())}") | |
| if conclusion not in {"success", "skipped"}: | |
| failed_jobs.append(name) | |
| artifact_order = [ | |
| "summary-final", | |
| "logs-format", | |
| "logs-build-current", | |
| "logs-integration", | |
| "logs-build-baseline", | |
| "logs-t-perf", | |
| "logs-t-accuracy", | |
| "logs-bd-model1", | |
| "logs-bd-model2", | |
| "logs-bd-model3", | |
| "logs-training", | |
| ] | |
| artifact_by_name = { | |
| item.get("name"): item | |
| for item in artifacts_payload.get("artifacts", []) | |
| if item.get("name") == "summary-final" or str(item.get("name", "")).startswith("logs-") | |
| } | |
| artifact_lines = [] | |
| for artifact_name in artifact_order: | |
| artifact = artifact_by_name.get(artifact_name) | |
| if not artifact: | |
| continue | |
| artifact_url = f"https://github.com/{repo}/actions/runs/{run_id}/artifacts/{artifact['id']}" | |
| artifact_lines.append(f"- [{artifact_name}]({artifact_url})") | |
| metric_lines = format_metric_lines(metrics) | |
| if run_conclusion == "success": | |
| status_text = "成功" | |
| else: | |
| status_text = "失败" | |
| common_lines = [ | |
| f"- 分支: {head_branch}", | |
| f"- Commit: `{short_sha}`", | |
| f"- 主机名: {host_name}", | |
| f"- 主机IP: {host_ip}", | |
| f"- Baseline commit: `{baseline_commit_short}`", | |
| f"- 运行详情: [GitHub Actions]({run_url})", | |
| ] | |
| if run_style == "daily": | |
| title = f"Daily CI {status_text}" | |
| lines = [ | |
| "### CI通知: Daily 回归", | |
| f"- 总体状态: {status_text}", | |
| f"- 仓库: {repo}", | |
| ] | |
| lines.extend(common_lines) | |
| elif run_style == "pr": | |
| title = f"PR CI {status_text}" | |
| lines = [ | |
| f"### CI通知: PR {status_text}", | |
| f"- 仓库: {repo}", | |
| f"- PR: [#{pr_number}]({pr_url})" if pr_number else "- PR: n/a", | |
| ] | |
| lines.extend(common_lines) | |
| else: | |
| title = f"Manual CI {status_text}" | |
| lines = [ | |
| f"### CI通知: 手动执行{status_text}", | |
| f"- 仓库: {repo}", | |
| ] | |
| lines.extend(common_lines) | |
| lines.extend(["", "#### Job 状态"]) | |
| lines.extend(display_jobs or ["- 暂无 job 信息"]) | |
| if failed_jobs: | |
| lines.extend(["", "#### 失败任务"]) | |
| lines.extend([f"- {name}" for name in failed_jobs]) | |
| if metric_lines: | |
| lines.extend(["", "#### 关键摘要"]) | |
| lines.extend(metric_lines) | |
| if artifact_lines: | |
| lines.extend(["", "#### Artifact 下载"]) | |
| lines.extend(artifact_lines) | |
| payload = { | |
| "msgtype": "markdown", | |
| "markdown": { | |
| "title": title, | |
| "text": "\n".join(lines), | |
| }, | |
| } | |
| print(json.dumps(payload, ensure_ascii=False)) | |
| PY | |
| )" | |
| curl -fsSL \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$PAYLOAD" \ | |
| "$DINGTALK_WEBHOOK" |