Skip to content

Commit 798cb34

Browse files
authored
Merge branch 'master' into wwb_trnsf5
2 parents ba70e23 + 39a0f2c commit 798cb34

269 files changed

Lines changed: 1391 additions & 521 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/aw/actions-lock.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
"version": "v8",
66
"sha": "ed597411d8f924073f98dfc5c65a23a2325f34cd"
77
},
8-
"github/gh-aw/actions/setup@v0.46.5": {
8+
"github/gh-aw/actions/setup@v0.58.3": {
99
"repo": "github/gh-aw/actions/setup",
10-
"version": "v0.46.5",
11-
"sha": "5a79466d65414632d47c7869b27170ade5b9404e"
10+
"version": "v0.58.3",
11+
"sha": "08a903b1fb2e493a84a57577778fe5dd711f9468"
1212
}
1313
}
1414
}
Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
# Copyright (C) 2026 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from pathlib import Path
5+
from zipfile import ZipFile
6+
import tempfile
7+
8+
import requests
9+
from github.WorkflowRun import WorkflowRun
10+
from urllib3.util.retry import Retry
11+
import argparse
12+
from requests.adapters import HTTPAdapter
13+
from github import Github, Auth
14+
15+
import os
16+
import re
17+
import logging
18+
19+
20+
def init_logger():
21+
LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
22+
logging.basicConfig(
23+
level=LOGLEVEL, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s", datefmt="%m-%d-%Y %H:%M:%S"
24+
)
25+
26+
27+
init_logger()
28+
29+
LOGGER = logging.getLogger("ci-doctor-preanalysis")
30+
31+
CI_DOCTOR_DIR = Path("/tmp/ci-doctor/")
32+
33+
34+
def get_arguments() -> argparse.Namespace:
35+
def repository_name(value: str) -> str:
36+
if not re.match(r"^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$", value):
37+
raise argparse.ArgumentTypeError(f"Invalid format (expected 'owner/name'): {value}")
38+
return value
39+
40+
def run_id(value: str) -> int:
41+
if not re.match(r"^[0-9]+$", value):
42+
raise argparse.ArgumentTypeError(f"Run ID must be a positive integer: {value}")
43+
return int(value)
44+
45+
parser = argparse.ArgumentParser()
46+
parser.add_argument(
47+
"-r",
48+
"--repository-name",
49+
type=repository_name,
50+
required=True,
51+
help="Repository name in the OWNER/REPOSITORY format",
52+
)
53+
parser.add_argument("--run-id", type=run_id, required=True, help="Workflow Run ID")
54+
return parser.parse_args()
55+
56+
57+
def _safe_extract(archive_path: Path, dest_dir: Path) -> None:
58+
"""Extract a zip archive, rejecting entries that would escape dest_dir."""
59+
dest_dir = dest_dir.resolve()
60+
with ZipFile(file=archive_path, mode="r") as zip_file:
61+
for member in zip_file.namelist():
62+
member_path = (dest_dir / member).resolve()
63+
if not str(member_path).startswith(str(dest_dir) + os.sep):
64+
raise ValueError(f"Zip entry escapes target directory: {member}")
65+
zip_file.extractall(dest_dir)
66+
67+
68+
def collect_logs_for_run(run: WorkflowRun, logs_dir: Path, GITHUB_TOKEN: str, session: requests.Session):
69+
"""
70+
Downloads logs of a given Workflow Run,
71+
saves them to a specified path, and returns that path.
72+
73+
We don't need successful job logs, so we remove them.
74+
We could've just downloaded logs for failed jobs only,
75+
but when you download all logs from a workflow run,
76+
GitHub includes "system.txt" files for each job, which can also
77+
contain errors on which we might want to trigger rerun.
78+
79+
Example log archive structure:
80+
.
81+
├── 10_Pytorch Layer Tests _ PyTorch Layer Tests.txt
82+
├── 11_CPU functional tests _ CPU functional tests.txt
83+
├── 12_C++ unit tests _ C++ unit tests.txt
84+
├── 13_OpenVINO tokenizers extension _ OpenVINO tokenizers extension.txt
85+
├── C++ unit tests _ C++ unit tests
86+
│ └── system.txt
87+
├── CPU functional tests _ CPU functional tests
88+
│ └── system.txt
89+
├── OpenVINO tokenizers extension _ OpenVINO tokenizers extension
90+
│ └── system.txt
91+
├── Pytorch Layer Tests _ PyTorch Layer Tests
92+
└── system.txt
93+
94+
Sometimes though, directories contain log files for each individual step,
95+
IN ADDITION to the full log in root of the directory:
96+
.
97+
├── 1_Build.txt
98+
└── Build
99+
├── 13_Upload build logs.txt
100+
├── 1_Set up job.txt
101+
├── 24_Post Clone vcpkg.txt
102+
├── 25_Post Clone OpenVINO.txt
103+
├── 26_Stop containers.txt
104+
├── 27_Complete job.txt
105+
├── 2_Initialize containers.txt
106+
├── 3_Clone OpenVINO.txt
107+
├── 4_Get VCPKG version and put it into GitHub ENV.txt
108+
├── 5_Init submodules for non vcpkg dependencies.txt
109+
├── 6_Clone vcpkg.txt
110+
├── 7_System info.txt
111+
├── 8_Build vcpkg.txt
112+
├── 9_CMake - configure.txt
113+
└── system.txt
114+
115+
In that case, we need only 'system.txt' file from each directory
116+
"""
117+
# Get failed jobs
118+
failed_jobs = [job for job in run.jobs() if job.conclusion in ("failure", "cancelled")]
119+
LOGGER.info(f"FAILED JOBS: {[job.name for job in failed_jobs]}")
120+
121+
with tempfile.NamedTemporaryFile(suffix=".zip") as temp_file:
122+
log_archive_path = Path(temp_file.name)
123+
124+
# Download logs archive
125+
with open(file=log_archive_path, mode="wb") as log_archive:
126+
LOGGER.info(f"DOWNLOADING LOGS FOR RUN ID {run.id}")
127+
# PyGitHub does not expose the "/repos/{owner}/{repo}/actions/runs/{run_id}/logs" endpoint so we have to use requests
128+
LOGGER.debug(f"Downloading logs from {run.logs_url}")
129+
response = session.get(url=run.logs_url, headers={"Authorization": f"Bearer {GITHUB_TOKEN}"})
130+
response.raise_for_status()
131+
log_archive.write(response.content)
132+
133+
# Unpack it
134+
with tempfile.TemporaryDirectory() as temp_dir:
135+
logs_temp_dir = Path(temp_dir).resolve()
136+
_safe_extract(log_archive_path, logs_temp_dir)
137+
138+
# Traverse the unpacked logs to find the ones of failed jobs
139+
for job in failed_jobs:
140+
job_filename = job.name.replace("/", "_")
141+
LOGGER.debug(f"Looking for failed job logs with filename: {job_filename}")
142+
143+
for p in logs_temp_dir.iterdir():
144+
# Move failed jobs' logs to the final destination
145+
if p.is_dir() and p.name == job_filename:
146+
system_log_path = p / "system.txt"
147+
if system_log_path.is_file():
148+
LOGGER.debug(f"Keeping system.txt from directory {p} for failed job {job.name}")
149+
system_log_path.rename(logs_dir / f"{job_filename}__system.txt")
150+
elif p.is_file() and p.name.endswith(f"{job_filename}.txt"):
151+
LOGGER.debug(f"Keeping file {p} for failed job {job.name}")
152+
p.rename(logs_dir / p.name)
153+
154+
LOGGER.info(f"COLLECTED LOGS FOR {run.id} IN {logs_dir}")
155+
156+
157+
# Lines that match ERROR_PATTERN but are known false positives.
158+
NOISE_PATTERN = re.compile(
159+
r"(-o pipefail|xfail|XFAIL|Defaulting to unsafe serialization|SCCACHE_IGNORE_SERVER_IO_ERROR)",
160+
)
161+
162+
# Case-insensitive pattern matching common CI error indicators.
163+
ERROR_PATTERN = re.compile(
164+
r"("
165+
r"\berror[\s:\[)]"
166+
r"|\bfail(?:ed|ure|ing|s)?\b"
167+
r"|panic:"
168+
r"|\bfatal[\s:]"
169+
r"|\bundefined[\s:]"
170+
r"|\bexception\b"
171+
r"|exit status [^0]"
172+
r")",
173+
re.IGNORECASE,
174+
)
175+
176+
MAX_HINT_LINES = 30
177+
178+
179+
def extract_hints(logs_dir: Path, hints_dir: Path) -> None:
180+
"""Extracts lines matching ERROR_PATTERN from log files, writes them to separate hint files."""
181+
182+
for log_file in logs_dir.iterdir():
183+
if not log_file.is_file() or not log_file.name.endswith(".txt"):
184+
continue
185+
hints: list[str] = []
186+
with log_file.open() as f:
187+
for lineno, line in enumerate(f, start=1):
188+
if NOISE_PATTERN.search(line) or not ERROR_PATTERN.search(line):
189+
continue
190+
hints.append(f"{lineno}:{line.strip()}")
191+
if len(hints) >= MAX_HINT_LINES:
192+
break
193+
194+
hints_file_path = hints_dir / f"{log_file.name}-hints.txt"
195+
if hints:
196+
hints_file_path.write_text("\n".join(hints))
197+
198+
199+
def count_lines(file_path: Path) -> int:
200+
with file_path.open() as f:
201+
return sum(1 for _ in f)
202+
203+
204+
def write_summary(run: WorkflowRun, logs_dir: Path, hints_dir: Path) -> None:
205+
"""Write a consolidated summary file for the CI Doctor agent."""
206+
lines: list[str] = [
207+
"=== Failed Jobs Summary ===",
208+
f"Run ID: {run.id}",
209+
"",
210+
]
211+
212+
failed_jobs = [job for job in run.jobs() if job.conclusion in ("failure", "cancelled")]
213+
for job in failed_jobs:
214+
failed_steps = ", ".join([step.name for step in job.steps if step.conclusion in ("failure", "cancelled")])
215+
lines.append(f" Job {job.id} {job.name} {job.url}:")
216+
lines.append(f" Failed steps: {failed_steps if failed_steps else '(none)'}")
217+
218+
lines.append("")
219+
lines.append(f"Downloaded log files ({logs_dir}):")
220+
for log_file in sorted(logs_dir.glob("*.txt")):
221+
lines.append(f" {log_file}")
222+
223+
lines.append("")
224+
lines.append(f"Hint files ({hints_dir}):")
225+
for hints_file in sorted(hints_dir.glob("*-hints.txt")):
226+
if not hints_file.stat().st_size:
227+
continue
228+
hint_count = count_lines(hints_file)
229+
lines.append(f" {hints_file} ({hint_count} matches)")
230+
# Show first 3 hint lines as preview.
231+
try:
232+
with hints_file.open() as f:
233+
for i, line in enumerate(f):
234+
if i >= 3:
235+
break
236+
lines.append(f" {line.rstrip()}")
237+
except OSError:
238+
pass
239+
240+
summary_text = "\n".join(lines) + "\n"
241+
242+
SUMMARY_FILE = logs_dir.parent / "summary.txt"
243+
SUMMARY_FILE.write_text(summary_text)
244+
print(summary_text)
245+
print(f"Pre-analysis complete. Agent should start with {SUMMARY_FILE}")
246+
247+
248+
PATTERNS_TO_FILTER_OUT = [
249+
# 2026-03-13T13:42:55.9786288Z Received 35870 data chunks (chunk size: 16384 bytes), time passed: 30784ms
250+
re.compile(r"Received \d+ data chunks \(chunk size: \d+ bytes\), time passed: \d+ms"),
251+
]
252+
253+
254+
def filter_logs(job_logs_dir: Path):
255+
"""Remove lines matching patterns in PATTERNS_TO_FILTER_OUT from log files in LOG_DIR."""
256+
for log_file in job_logs_dir.glob("*.txt"):
257+
filtered_lines: list[str] = []
258+
with log_file.open() as f:
259+
for line in f:
260+
if any(pattern.search(line) for pattern in PATTERNS_TO_FILTER_OUT):
261+
continue
262+
filtered_lines.append(line.rstrip())
263+
264+
log_file.write_text("\n".join(filtered_lines) + "\n")
265+
266+
267+
def main():
268+
args = get_arguments()
269+
run_id = args.run_id
270+
repository_name = args.repository_name
271+
272+
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
273+
274+
session = requests.Session()
275+
retry_strategy = Retry(total=5, backoff_factor=3, backoff_jitter=1, status_forcelist=[429, 500, 502, 503, 504])
276+
session.mount("https://api.github.com", HTTPAdapter(max_retries=retry_strategy))
277+
session.mount("https://results-receiver.actions.githubusercontent.com", HTTPAdapter(max_retries=retry_strategy))
278+
279+
github = Github(auth=Auth.Token(token=GITHUB_TOKEN))
280+
gh_repo = github.get_repo(full_name_or_id=repository_name)
281+
run = gh_repo.get_workflow_run(id_=run_id)
282+
283+
if run.conclusion not in ("failure", "cancelled"):
284+
LOGGER.warning(
285+
f"Run {run_id} in {repository_name} has conclusion '{run.conclusion}'. Expected conclusion is 'failure' or 'cancelled'. No logs will be collected."
286+
)
287+
return
288+
289+
RUN_DIR = CI_DOCTOR_DIR / f"run_{run_id}"
290+
291+
# check if run_dir is empty
292+
if RUN_DIR.exists() and any(RUN_DIR.iterdir()):
293+
raise RuntimeError(f"Run directory {RUN_DIR} is not empty. Clean it up before running the script.")
294+
295+
LOGS_DIR = RUN_DIR / "logs"
296+
LOGS_DIR.mkdir(parents=True, exist_ok=True)
297+
298+
collect_logs_for_run(run=run, logs_dir=LOGS_DIR, GITHUB_TOKEN=GITHUB_TOKEN, session=session)
299+
filter_logs(job_logs_dir=LOGS_DIR)
300+
301+
HINTS_DIR = RUN_DIR / "hints"
302+
HINTS_DIR.mkdir(exist_ok=True, parents=True)
303+
304+
extract_hints(logs_dir=LOGS_DIR, hints_dir=HINTS_DIR)
305+
306+
write_summary(run=run, logs_dir=LOGS_DIR, hints_dir=HINTS_DIR)
307+
308+
309+
if __name__ == "__main__":
310+
main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
PyGithub==2.8.1
2+
requests==2.32.5

0 commit comments

Comments
 (0)