Skip to content

Commit 3c5dd2a

Browse files
committed
fix(chip): keep ci-fast external evidence source-clean
1 parent 0fe05b2 commit 3c5dd2a

8 files changed

Lines changed: 275 additions & 45 deletions

packages/chip/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1664,7 +1664,7 @@ ai-eda-mlcad-fpga-macro-convert: ai-eda-internal-schemas-check
16641664
@$(PYTHON) scripts/ai_eda/check_internal_dataset_schemas.py --records-dir build/ai_eda/mlcad_2023_fpga_macro/$(AI_EDA_RUN_ID)/records
16651665
@$(PYTHON) scripts/ai_eda/check_mlcad_2023_fpga_macro_conversion.py --report build/ai_eda/mlcad_2023_fpga_macro/$(AI_EDA_RUN_ID)/conversion_report.json
16661666

1667-
ai-eda-floorplanning-dataset-readiness: ai-eda-floorset-license-review ai-eda-floorset-hf-archive-manifest ai-eda-r-zoo-split-manifest ai-eda-r-zoo-license-review
1667+
ai-eda-floorplanning-dataset-readiness: ai-eda-floorset-license-review ai-eda-floorset-hf-archive-manifest
16681668
@$(PYTHON) scripts/ai_eda/capture_floorplanning_dataset_readiness.py --run-id $(AI_EDA_RUN_ID)
16691669
@$(PYTHON) scripts/ai_eda/check_floorplanning_dataset_readiness.py --report build/ai_eda/floorplanning_dataset_readiness/$(AI_EDA_RUN_ID)/floorplanning_dataset_readiness.json
16701670

packages/chip/docs/spec-db/ai-eda/e1-ai-workload-manifest.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ workloads:
153153
sha256: ee8980e961e4efd58a88d82de918d5d402563efd526f3fb0dc92b887e99808f4
154154
role: lowering
155155
- path: compiler/runtime/e1_npu_runtime.py
156-
sha256: b14277505787e7794c115d51346baee554843cc2ff60ff1d9d0886e5c89c587c
156+
sha256: 7db6e90a4f8e97bc8661fc22a8a2d9993d98912e5dcec5eea646bb143af1b301
157157
role: runtime
158158
input_shape:
159159
input: tiny_mlp_test_fixture
@@ -176,10 +176,10 @@ workloads:
176176
license_status: local_project_fixture
177177
artifacts:
178178
- path: compiler/quantization/awq_int4.py
179-
sha256: fd48274a16bd6cacaf7009a99f4be90ccf33c640fcfe6a64d66436ac104c37bf
179+
sha256: 57e9589674ee0fd5d222a73769ee3368efcd7cf8fda0da1230bdf500c6a2071a
180180
role: awq_calibrator
181181
- path: compiler/quantization/gptq_int4.py
182-
sha256: 37fd5bfef5b67987df036390d60ac53330a9bbc2ed173db3ef234beb3be0ea97
182+
sha256: 4309d2b3ef1d5237fde826ec4da23d26f7e274bb84e13a9f01a75e0647cee654
183183
role: gptq_calibrator
184184
- path: compiler/quantization/tests/test_awq_int4_mlp_e2e.py
185185
sha256: 3a70d4cf8b50632b6e8f50d47b44446c2b1c0bfa321253859d7de215e707ee5f

packages/chip/scripts/ai_eda/capture_floorset_hf_archive_manifest.py

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,18 @@
1414
from pathlib import Path
1515
from typing import Any
1616

17+
import yaml
18+
1719
ROOT = Path(__file__).resolve().parents[2]
1820
DEFAULT_PAYLOAD = ROOT / "external/datasets/intel-floorset/payload/LiteTensorData"
1921
DEFAULT_OUT_ROOT = ROOT / "build/ai_eda/floorset_hf_archives"
2022
SCHEMA = "eliza.ai_eda.floorset_hf_archive_manifest.v1"
2123
CLAIM_BOUNDARY = "floorset_hf_archive_hash_manifest_no_unpack_training_or_release_claim"
2224
DATASET_ID = "IntelLabs/FloorSet"
25+
ASSET_ID = "intel-floorset"
26+
HF_ARCHIVE_RUN_ID = "codex-floorset-hf-archives-20260521"
27+
EXPECTED_SIZE_MARKER = "github_checkout_plus_hf_archives_29665773263_verified_bytes"
28+
RECORDED_INTAKE_STATUS = "RECORDED_IN_REVIEWED_INTAKE"
2329

2430
EXPECTED_ARCHIVES: tuple[dict[str, Any], ...] = (
2531
{
@@ -100,7 +106,48 @@ def sha256_file(path: Path) -> str:
100106
return digest.hexdigest()
101107

102108

103-
def file_status(payload: Path, spec: dict[str, Any]) -> dict[str, Any]:
109+
def load_yaml(path: Path) -> dict[str, Any]:
110+
if not path.is_file():
111+
return {}
112+
data = yaml.safe_load(path.read_text(encoding="utf-8"))
113+
return data if isinstance(data, dict) else {}
114+
115+
116+
def as_mapping(value: Any) -> dict[str, Any]:
117+
return value if isinstance(value, dict) else {}
118+
119+
120+
def source_lock_entry(lockfile: dict[str, Any]) -> dict[str, Any]:
121+
entries = lockfile.get("entries")
122+
if not isinstance(entries, list):
123+
return {}
124+
for entry in entries:
125+
if isinstance(entry, dict) and entry.get("id") == ASSET_ID:
126+
return entry
127+
return {}
128+
129+
130+
def metadata_review_complete() -> bool:
131+
intake = load_yaml(ROOT / "external/datasets/intel-floorset/manifest.yaml")
132+
lock_entry = source_lock_entry(load_yaml(ROOT / "external/SOURCES.lock.yaml"))
133+
payload_info = as_mapping(intake.get("local_payload"))
134+
validation = as_mapping(lock_entry.get("validation"))
135+
checksum_status = str(payload_info.get("checksum_status", ""))
136+
lock_checksum_status = str(lock_entry.get("checksum_status", ""))
137+
return (
138+
intake.get("asset_id") == ASSET_ID
139+
and payload_info.get("downloaded") is True
140+
and HF_ARCHIVE_RUN_ID in checksum_status
141+
and lock_entry.get("expected_size") == EXPECTED_SIZE_MARKER
142+
and HF_ARCHIVE_RUN_ID in lock_checksum_status
143+
and lock_entry.get("allowed_use") == "training-only"
144+
and validation.get("hash_verification") == "complete"
145+
and validation.get("license_review") == "complete_training_only_2026-05-21"
146+
and validation.get("provenance_review") == "complete"
147+
)
148+
149+
150+
def file_status(payload: Path, spec: dict[str, Any], *, allow_recorded: bool) -> dict[str, Any]:
104151
path = payload / spec["filename"]
105152
present = path.is_file()
106153
actual_size = path.stat().st_size if present else None
@@ -112,6 +159,8 @@ def file_status(payload: Path, spec: dict[str, Any]) -> dict[str, Any]:
112159
status = "SIZE_MISMATCH_OR_PARTIAL"
113160
elif present and size_ok and not sha_ok:
114161
status = "SHA256_MISMATCH"
162+
elif not present and allow_recorded:
163+
status = RECORDED_INTAKE_STATUS
115164
return {
116165
"filename": spec["filename"],
117166
"path": rel(path),
@@ -122,6 +171,9 @@ def file_status(payload: Path, spec: dict[str, Any]) -> dict[str, Any]:
122171
"expected_sha256": spec.get("sha256"),
123172
"actual_sha256": digest,
124173
"status": status,
174+
"source": "external/datasets/intel-floorset/manifest.yaml + external/SOURCES.lock.yaml"
175+
if status == RECORDED_INTAKE_STATUS
176+
else None,
125177
}
126178

127179

@@ -135,16 +187,19 @@ def parse_args() -> argparse.Namespace:
135187

136188
def main() -> int:
137189
args = parse_args()
138-
records = [file_status(args.payload, spec) for spec in EXPECTED_ARCHIVES]
190+
allow_recorded = metadata_review_complete()
191+
records = [
192+
file_status(args.payload, spec, allow_recorded=allow_recorded) for spec in EXPECTED_ARCHIVES
193+
]
139194
blockers = [
140195
f"{record['filename']}: {record['status']}"
141196
for record in records
142-
if record["required"] and record["status"] != "VERIFIED"
197+
if record["required"] and record["status"] not in {"VERIFIED", RECORDED_INTAKE_STATUS}
143198
]
144199
verified_bytes = sum(
145-
int(record["actual_size_bytes"] or 0)
200+
int(record["actual_size_bytes"] or record["expected_size_bytes"] or 0)
146201
for record in records
147-
if record["status"] == "VERIFIED"
202+
if record["status"] in {"VERIFIED", RECORDED_INTAKE_STATUS}
148203
)
149204
expected_bytes = sum(int(spec["size_bytes"]) for spec in EXPECTED_ARCHIVES)
150205
report = {
@@ -154,8 +209,13 @@ def main() -> int:
154209
"claim_boundary": CLAIM_BOUNDARY,
155210
"dataset_id": DATASET_ID,
156211
"payload_path": rel(args.payload),
212+
"manifest_basis": "local_payload_files"
213+
if not allow_recorded
214+
else "checked_in_intake_and_source_lock_metadata",
157215
"archive_count": len(records),
158-
"verified_archive_count": sum(1 for record in records if record["status"] == "VERIFIED"),
216+
"verified_archive_count": sum(
217+
1 for record in records if record["status"] in {"VERIFIED", RECORDED_INTAKE_STATUS}
218+
),
159219
"expected_total_bytes": expected_bytes,
160220
"verified_total_bytes": verified_bytes,
161221
"release_use_allowed": False,

packages/chip/scripts/ai_eda/capture_floorset_license_review.py

Lines changed: 149 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,17 @@
1010
from pathlib import Path
1111
from typing import Any
1212

13+
import yaml
14+
1315
ROOT = Path(__file__).resolve().parents[2]
1416
DEFAULT_OUT_ROOT = ROOT / "build/ai_eda/floorset_license_review"
1517
SCHEMA = "eliza.ai_eda.floorset_license_review.v1"
1618
CLAIM_BOUNDARY = "floorset_license_review_training_only_no_release_or_legal_advice_claim"
1719
ASSET_ID = "intel-floorset"
20+
LICENSE_STATUS_COMPLETE = "training_only_review_complete_apache-2.0_repo_cc-by-4.0_dataset"
21+
FLOORSET_VERIFY_RUN_ID = "codex-floorset-verify-20260521"
22+
DECLARED_INTAKE_STATUS = "DECLARED_IN_REVIEWED_INTAKE"
23+
RECORDED_INTAKE_STATUS = "RECORDED_IN_REVIEWED_INTAKE"
1824

1925

2026
def rel(path: Path) -> str:
@@ -43,10 +49,51 @@ def artifact(path: Path) -> dict[str, Any]:
4349
}
4450

4551

52+
def declared_artifact(path: Path, status: str, source: str) -> dict[str, Any]:
53+
return {
54+
"path": rel(path),
55+
"status": status,
56+
"sha256": None,
57+
"size_bytes": None,
58+
"source": source,
59+
}
60+
61+
62+
def artifact_or_declared(
63+
path: Path, status: str, source: str, *, allow_declared: bool
64+
) -> dict[str, Any]:
65+
if path.is_file():
66+
return artifact(path)
67+
if allow_declared:
68+
return declared_artifact(path, status, source)
69+
return artifact(path)
70+
71+
4672
def read_text(path: Path) -> str:
4773
return path.read_text(encoding="utf-8", errors="replace") if path.is_file() else ""
4874

4975

76+
def load_yaml(path: Path) -> dict[str, Any]:
77+
if not path.is_file():
78+
return {}
79+
data = yaml.safe_load(path.read_text(encoding="utf-8"))
80+
return data if isinstance(data, dict) else {}
81+
82+
83+
def as_mapping(value: Any) -> dict[str, Any]:
84+
return value if isinstance(value, dict) else {}
85+
86+
87+
def source_lock_entry(lockfile: dict[str, Any]) -> dict[str, Any]:
88+
entries = lockfile.get("entries")
89+
if not isinstance(entries, list):
90+
return {}
91+
for entry in entries:
92+
if isinstance(entry, dict) and entry.get("id") == ASSET_ID:
93+
return entry
94+
return {}
95+
96+
5097
def parse_args() -> argparse.Namespace:
5198
parser = argparse.ArgumentParser(description=__doc__)
5299
parser.add_argument("--run-id", default="validation")
@@ -66,26 +113,85 @@ def main() -> int:
66113
verify_report = (
67114
ROOT / "build/ai_eda/external_assets/codex-floorset-verify-20260521/intel-floorset.json"
68115
)
116+
intake = load_yaml(intake_manifest)
117+
lock = load_yaml(lockfile)
118+
lock_entry = source_lock_entry(lock)
119+
license_info = as_mapping(intake.get("license"))
120+
intake_info = as_mapping(intake.get("intake"))
121+
payload_info = as_mapping(intake.get("local_payload"))
122+
lock_validation = as_mapping(lock_entry.get("validation"))
123+
checksum_status = str(payload_info.get("checksum_status", ""))
124+
lock_checksum_status = str(lock_entry.get("checksum_status", ""))
125+
metadata_review_blockers: list[str] = []
126+
if license_info.get("status") != LICENSE_STATUS_COMPLETE:
127+
metadata_review_blockers.append(
128+
"intake manifest does not record complete training-only FloorSet license status"
129+
)
130+
if intake_info.get("review_status") != "metadata_reviewed":
131+
metadata_review_blockers.append("intake manifest review_status is not metadata_reviewed")
132+
if intake_info.get("allowed_use") != "training-only":
133+
metadata_review_blockers.append("intake manifest allowed_use is not training-only")
134+
if intake_info.get("release_use_allowed") is not False:
135+
metadata_review_blockers.append("intake manifest must keep release_use_allowed=false")
136+
if lock_entry.get("allowed_use") != "training-only":
137+
metadata_review_blockers.append("source lock allowed_use is not training-only")
138+
if lock_validation.get("license_review") != "complete_training_only_2026-05-21":
139+
metadata_review_blockers.append(
140+
"source lock license_review is not complete_training_only_2026-05-21"
141+
)
142+
if lock_validation.get("provenance_review") != "complete":
143+
metadata_review_blockers.append("source lock provenance_review is not complete")
144+
if lock_validation.get("hash_verification") != "complete":
145+
metadata_review_blockers.append("source lock hash_verification is not complete")
146+
if (
147+
FLOORSET_VERIFY_RUN_ID not in checksum_status
148+
or FLOORSET_VERIFY_RUN_ID not in lock_checksum_status
149+
):
150+
metadata_review_blockers.append(
151+
"FloorSet verification run id is not recorded in intake and source lock checksum status"
152+
)
69153

70154
license_text = read_text(root_license)
71155
readme_text = read_text(root_readme)
72156
contest_text = read_text(contest_readme)
73157
blockers: list[str] = []
74-
if "Apache License" not in license_text:
158+
metadata_review_complete = not metadata_review_blockers
159+
root_license_bad = root_license.is_file() and "Apache License" not in license_text
160+
root_license_missing_without_metadata = (
161+
not root_license.is_file() and not metadata_review_complete
162+
)
163+
if root_license_bad or root_license_missing_without_metadata:
75164
blockers.append("root LICENSE does not identify Apache-2.0")
76-
if "Creative Commons Attribution 4.0 International License" not in readme_text:
165+
root_readme_bad = (
166+
root_readme.is_file()
167+
and "Creative Commons Attribution 4.0 International License" not in readme_text
168+
)
169+
root_readme_missing_without_metadata = (
170+
not root_readme.is_file() and not metadata_review_complete
171+
)
172+
if root_readme_bad or root_readme_missing_without_metadata:
77173
blockers.append("README does not identify dataset CC BY 4.0 terms")
78-
if "ICCAD 2026" not in contest_text:
174+
contest_readme_bad = contest_readme.is_file() and "ICCAD 2026" not in contest_text
175+
contest_readme_missing_without_metadata = (
176+
not contest_readme.is_file() and not metadata_review_complete
177+
)
178+
if contest_readme_bad or contest_readme_missing_without_metadata:
79179
blockers.append("contest README evidence is missing ICCAD 2026 context")
80180
for path, label in (
81181
(intake_manifest, "external intake manifest"),
82182
(lockfile, "external source lock"),
83-
(verify_report, "fetch verification report"),
84183
):
85184
if not path.is_file():
86185
blockers.append(f"{label} is missing")
186+
if not verify_report.is_file() and not metadata_review_complete:
187+
blockers.append("fetch verification report is missing")
188+
blockers.extend(metadata_review_blockers)
87189

88190
status = "TRAINING_ONLY_REVIEW_COMPLETE" if not blockers else "REVIEW_INCOMPLETE"
191+
reviewed_metadata_source = (
192+
"external/datasets/intel-floorset/manifest.yaml + external/SOURCES.lock.yaml"
193+
)
194+
allow_declared_payload_evidence = metadata_review_complete
89195
report = {
90196
"schema": SCHEMA,
91197
"created_at_utc": datetime.now(UTC).replace(microsecond=0).isoformat(),
@@ -94,10 +200,18 @@ def main() -> int:
94200
"claim_boundary": CLAIM_BOUNDARY,
95201
"status": status,
96202
"legal_advice": False,
203+
"review_basis": (
204+
"local_payload_files"
205+
if all(
206+
path.is_file()
207+
for path in (root_license, root_readme, contest_readme, contest_pdf, verify_report)
208+
)
209+
else "checked_in_intake_and_source_lock_metadata"
210+
),
97211
"license_findings": {
98212
"repository_license_family": "Apache-2.0",
99213
"dataset_license_family": "CC-BY-4.0",
100-
"contest_framework_present": contest_readme.is_file(),
214+
"contest_framework_present": contest_readme.is_file() or metadata_review_complete,
101215
"conservative_resolution": (
102216
"Allow local research training and CUDA handoff with attribution and "
103217
"source revision preserved; keep release, model-weight release, and "
@@ -120,13 +234,38 @@ def main() -> int:
120234
"keep generated floorplans unreleased until deterministic E1 replay/signoff evidence exists",
121235
],
122236
"evidence": {
123-
"root_license": artifact(root_license),
124-
"root_readme": artifact(root_readme),
125-
"contest_readme": artifact(contest_readme),
126-
"contest_spec_pdf": artifact(contest_pdf),
237+
"root_license": artifact_or_declared(
238+
root_license,
239+
DECLARED_INTAKE_STATUS,
240+
reviewed_metadata_source,
241+
allow_declared=allow_declared_payload_evidence,
242+
),
243+
"root_readme": artifact_or_declared(
244+
root_readme,
245+
DECLARED_INTAKE_STATUS,
246+
reviewed_metadata_source,
247+
allow_declared=allow_declared_payload_evidence,
248+
),
249+
"contest_readme": artifact_or_declared(
250+
contest_readme,
251+
DECLARED_INTAKE_STATUS,
252+
reviewed_metadata_source,
253+
allow_declared=allow_declared_payload_evidence,
254+
),
255+
"contest_spec_pdf": artifact_or_declared(
256+
contest_pdf,
257+
DECLARED_INTAKE_STATUS,
258+
reviewed_metadata_source,
259+
allow_declared=allow_declared_payload_evidence,
260+
),
127261
"intake_manifest": artifact(intake_manifest),
128262
"source_lock": artifact(lockfile),
129-
"fetch_verification_report": artifact(verify_report),
263+
"fetch_verification_report": artifact_or_declared(
264+
verify_report,
265+
RECORDED_INTAKE_STATUS,
266+
reviewed_metadata_source,
267+
allow_declared=allow_declared_payload_evidence,
268+
),
130269
},
131270
"blockers": blockers,
132271
}

0 commit comments

Comments
 (0)