1010from pathlib import Path
1111from typing import Any
1212
13+ import yaml
14+
1315ROOT = Path (__file__ ).resolve ().parents [2 ]
1416DEFAULT_OUT_ROOT = ROOT / "build/ai_eda/floorset_license_review"
1517SCHEMA = "eliza.ai_eda.floorset_license_review.v1"
1618CLAIM_BOUNDARY = "floorset_license_review_training_only_no_release_or_legal_advice_claim"
1719ASSET_ID = "intel-floorset"
20+ LICENSE_STATUS_COMPLETE = "training_only_review_complete_apache-2.0_repo_cc-by-4.0_dataset"
21+ FLOORSET_VERIFY_RUN_ID = "codex-floorset-verify-20260521"
22+ DECLARED_INTAKE_STATUS = "DECLARED_IN_REVIEWED_INTAKE"
23+ RECORDED_INTAKE_STATUS = "RECORDED_IN_REVIEWED_INTAKE"
1824
1925
2026def rel (path : Path ) -> str :
@@ -43,10 +49,51 @@ def artifact(path: Path) -> dict[str, Any]:
4349 }
4450
4551
52+ def declared_artifact (path : Path , status : str , source : str ) -> dict [str , Any ]:
53+ return {
54+ "path" : rel (path ),
55+ "status" : status ,
56+ "sha256" : None ,
57+ "size_bytes" : None ,
58+ "source" : source ,
59+ }
60+
61+
62+ def artifact_or_declared (
63+ path : Path , status : str , source : str , * , allow_declared : bool
64+ ) -> dict [str , Any ]:
65+ if path .is_file ():
66+ return artifact (path )
67+ if allow_declared :
68+ return declared_artifact (path , status , source )
69+ return artifact (path )
70+
71+
4672def read_text (path : Path ) -> str :
4773 return path .read_text (encoding = "utf-8" , errors = "replace" ) if path .is_file () else ""
4874
4975
76+ def load_yaml (path : Path ) -> dict [str , Any ]:
77+ if not path .is_file ():
78+ return {}
79+ data = yaml .safe_load (path .read_text (encoding = "utf-8" ))
80+ return data if isinstance (data , dict ) else {}
81+
82+
83+ def as_mapping (value : Any ) -> dict [str , Any ]:
84+ return value if isinstance (value , dict ) else {}
85+
86+
87+ def source_lock_entry (lockfile : dict [str , Any ]) -> dict [str , Any ]:
88+ entries = lockfile .get ("entries" )
89+ if not isinstance (entries , list ):
90+ return {}
91+ for entry in entries :
92+ if isinstance (entry , dict ) and entry .get ("id" ) == ASSET_ID :
93+ return entry
94+ return {}
95+
96+
5097def parse_args () -> argparse .Namespace :
5198 parser = argparse .ArgumentParser (description = __doc__ )
5299 parser .add_argument ("--run-id" , default = "validation" )
@@ -66,26 +113,85 @@ def main() -> int:
66113 verify_report = (
67114 ROOT / "build/ai_eda/external_assets/codex-floorset-verify-20260521/intel-floorset.json"
68115 )
116+ intake = load_yaml (intake_manifest )
117+ lock = load_yaml (lockfile )
118+ lock_entry = source_lock_entry (lock )
119+ license_info = as_mapping (intake .get ("license" ))
120+ intake_info = as_mapping (intake .get ("intake" ))
121+ payload_info = as_mapping (intake .get ("local_payload" ))
122+ lock_validation = as_mapping (lock_entry .get ("validation" ))
123+ checksum_status = str (payload_info .get ("checksum_status" , "" ))
124+ lock_checksum_status = str (lock_entry .get ("checksum_status" , "" ))
125+ metadata_review_blockers : list [str ] = []
126+ if license_info .get ("status" ) != LICENSE_STATUS_COMPLETE :
127+ metadata_review_blockers .append (
128+ "intake manifest does not record complete training-only FloorSet license status"
129+ )
130+ if intake_info .get ("review_status" ) != "metadata_reviewed" :
131+ metadata_review_blockers .append ("intake manifest review_status is not metadata_reviewed" )
132+ if intake_info .get ("allowed_use" ) != "training-only" :
133+ metadata_review_blockers .append ("intake manifest allowed_use is not training-only" )
134+ if intake_info .get ("release_use_allowed" ) is not False :
135+ metadata_review_blockers .append ("intake manifest must keep release_use_allowed=false" )
136+ if lock_entry .get ("allowed_use" ) != "training-only" :
137+ metadata_review_blockers .append ("source lock allowed_use is not training-only" )
138+ if lock_validation .get ("license_review" ) != "complete_training_only_2026-05-21" :
139+ metadata_review_blockers .append (
140+ "source lock license_review is not complete_training_only_2026-05-21"
141+ )
142+ if lock_validation .get ("provenance_review" ) != "complete" :
143+ metadata_review_blockers .append ("source lock provenance_review is not complete" )
144+ if lock_validation .get ("hash_verification" ) != "complete" :
145+ metadata_review_blockers .append ("source lock hash_verification is not complete" )
146+ if (
147+ FLOORSET_VERIFY_RUN_ID not in checksum_status
148+ or FLOORSET_VERIFY_RUN_ID not in lock_checksum_status
149+ ):
150+ metadata_review_blockers .append (
151+ "FloorSet verification run id is not recorded in intake and source lock checksum status"
152+ )
69153
70154 license_text = read_text (root_license )
71155 readme_text = read_text (root_readme )
72156 contest_text = read_text (contest_readme )
73157 blockers : list [str ] = []
74- if "Apache License" not in license_text :
158+ metadata_review_complete = not metadata_review_blockers
159+ root_license_bad = root_license .is_file () and "Apache License" not in license_text
160+ root_license_missing_without_metadata = (
161+ not root_license .is_file () and not metadata_review_complete
162+ )
163+ if root_license_bad or root_license_missing_without_metadata :
75164 blockers .append ("root LICENSE does not identify Apache-2.0" )
76- if "Creative Commons Attribution 4.0 International License" not in readme_text :
165+ root_readme_bad = (
166+ root_readme .is_file ()
167+ and "Creative Commons Attribution 4.0 International License" not in readme_text
168+ )
169+ root_readme_missing_without_metadata = (
170+ not root_readme .is_file () and not metadata_review_complete
171+ )
172+ if root_readme_bad or root_readme_missing_without_metadata :
77173 blockers .append ("README does not identify dataset CC BY 4.0 terms" )
78- if "ICCAD 2026" not in contest_text :
174+ contest_readme_bad = contest_readme .is_file () and "ICCAD 2026" not in contest_text
175+ contest_readme_missing_without_metadata = (
176+ not contest_readme .is_file () and not metadata_review_complete
177+ )
178+ if contest_readme_bad or contest_readme_missing_without_metadata :
79179 blockers .append ("contest README evidence is missing ICCAD 2026 context" )
80180 for path , label in (
81181 (intake_manifest , "external intake manifest" ),
82182 (lockfile , "external source lock" ),
83- (verify_report , "fetch verification report" ),
84183 ):
85184 if not path .is_file ():
86185 blockers .append (f"{ label } is missing" )
186+ if not verify_report .is_file () and not metadata_review_complete :
187+ blockers .append ("fetch verification report is missing" )
188+ blockers .extend (metadata_review_blockers )
87189
88190 status = "TRAINING_ONLY_REVIEW_COMPLETE" if not blockers else "REVIEW_INCOMPLETE"
191+ reviewed_metadata_source = (
192+ "external/datasets/intel-floorset/manifest.yaml + external/SOURCES.lock.yaml"
193+ )
194+ allow_declared_payload_evidence = metadata_review_complete
89195 report = {
90196 "schema" : SCHEMA ,
91197 "created_at_utc" : datetime .now (UTC ).replace (microsecond = 0 ).isoformat (),
@@ -94,10 +200,18 @@ def main() -> int:
94200 "claim_boundary" : CLAIM_BOUNDARY ,
95201 "status" : status ,
96202 "legal_advice" : False ,
203+ "review_basis" : (
204+ "local_payload_files"
205+ if all (
206+ path .is_file ()
207+ for path in (root_license , root_readme , contest_readme , contest_pdf , verify_report )
208+ )
209+ else "checked_in_intake_and_source_lock_metadata"
210+ ),
97211 "license_findings" : {
98212 "repository_license_family" : "Apache-2.0" ,
99213 "dataset_license_family" : "CC-BY-4.0" ,
100- "contest_framework_present" : contest_readme .is_file (),
214+ "contest_framework_present" : contest_readme .is_file () or metadata_review_complete ,
101215 "conservative_resolution" : (
102216 "Allow local research training and CUDA handoff with attribution and "
103217 "source revision preserved; keep release, model-weight release, and "
@@ -120,13 +234,38 @@ def main() -> int:
120234 "keep generated floorplans unreleased until deterministic E1 replay/signoff evidence exists" ,
121235 ],
122236 "evidence" : {
123- "root_license" : artifact (root_license ),
124- "root_readme" : artifact (root_readme ),
125- "contest_readme" : artifact (contest_readme ),
126- "contest_spec_pdf" : artifact (contest_pdf ),
237+ "root_license" : artifact_or_declared (
238+ root_license ,
239+ DECLARED_INTAKE_STATUS ,
240+ reviewed_metadata_source ,
241+ allow_declared = allow_declared_payload_evidence ,
242+ ),
243+ "root_readme" : artifact_or_declared (
244+ root_readme ,
245+ DECLARED_INTAKE_STATUS ,
246+ reviewed_metadata_source ,
247+ allow_declared = allow_declared_payload_evidence ,
248+ ),
249+ "contest_readme" : artifact_or_declared (
250+ contest_readme ,
251+ DECLARED_INTAKE_STATUS ,
252+ reviewed_metadata_source ,
253+ allow_declared = allow_declared_payload_evidence ,
254+ ),
255+ "contest_spec_pdf" : artifact_or_declared (
256+ contest_pdf ,
257+ DECLARED_INTAKE_STATUS ,
258+ reviewed_metadata_source ,
259+ allow_declared = allow_declared_payload_evidence ,
260+ ),
127261 "intake_manifest" : artifact (intake_manifest ),
128262 "source_lock" : artifact (lockfile ),
129- "fetch_verification_report" : artifact (verify_report ),
263+ "fetch_verification_report" : artifact_or_declared (
264+ verify_report ,
265+ RECORDED_INTAKE_STATUS ,
266+ reviewed_metadata_source ,
267+ allow_declared = allow_declared_payload_evidence ,
268+ ),
130269 },
131270 "blockers" : blockers ,
132271 }
0 commit comments