Skip to content

Commit 8c33add

Browse files
Fix GATE-004 duplicate detection logic
- Fixed namespace ID pattern regex (removed line anchors ^ and $) - Fixed duplicate detection to check scanned entries before registration - Fixed report generation to use tracked conflicts - Fixed output display to show conflicts from report - Tested with duplicate and non-duplicate scenarios - Exit code 0 for pass, 1 for fail with duplicates Co-authored-by: AmedeoPelliccia <164860269+AmedeoPelliccia@users.noreply.github.com>
1 parent a471302 commit 8c33add

File tree

1 file changed

+58
-12
lines changed

1 file changed

+58
-12
lines changed

scripts/check_ata99_registry.py

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ class ATA99RegistryChecker:
6161

6262
# Namespace ID patterns for validation
6363
NAMESPACE_PATTERNS = {
64-
'ata99_namespace': r'^NS-ATA99-[A-Z0-9-]+$',
65-
'schema_namespace': r'^NS-SCH-[A-Z0-9-]+$',
66-
'trace_namespace': r'^NS-TR-[A-Z0-9-]+$',
67-
'identifier_namespace': r'^NS-ID-[A-Z0-9-]+$',
64+
'ata99_namespace': r'NS-ATA99-[A-Z0-9-]+',
65+
'schema_namespace': r'NS-SCH-[A-Z0-9-]+',
66+
'trace_namespace': r'NS-TR-[A-Z0-9-]+',
67+
'identifier_namespace': r'NS-ID-[A-Z0-9-]+',
6868
}
6969

7070
def __init__(self, db_path: str = "plc_ontology.db"):
@@ -213,12 +213,17 @@ def _compute_file_hash(self, file_path: Path) -> str:
213213

214214
def generate_report(self) -> Dict[str, Any]:
215215
"""Generate deduplication report."""
216-
duplicates = self.db.check_namespace_duplicates()
217-
218216
report = {
219-
'total_duplicates': len(duplicates),
217+
'total_duplicates': len(self.conflicts),
220218
'conflicts': self.conflicts,
221-
'duplicates_detail': duplicates
219+
'duplicates_detail': [
220+
{
221+
'namespace_id': c['namespace_id'],
222+
'count': c['count'],
223+
'paths': ', '.join(c['paths'])
224+
}
225+
for c in self.conflicts
226+
]
222227
}
223228

224229
return report
@@ -248,18 +253,46 @@ def run_gate_004(db_path: str = "plc_ontology.db", directory: Path = Path('.'))
248253
(passed, report)
249254
"""
250255
import time
256+
from collections import defaultdict
251257
start_time = time.time()
252258

253259
checker = ATA99RegistryChecker(db_path)
254260

255261
# Scan and register namespaces
256262
entries = checker.scan_repository(directory)
263+
264+
# Check for duplicates in scanned entries BEFORE registering
265+
namespace_to_paths = defaultdict(list)
266+
for entry in entries:
267+
namespace_to_paths[entry.namespace_id].append(entry.artifact_path)
268+
269+
conflicts = []
270+
for namespace_id, paths in namespace_to_paths.items():
271+
if len(paths) > 1:
272+
# Record conflict
273+
conflict_id = checker.db.record_namespace_conflict(
274+
namespace_id=namespace_id,
275+
artifact_path_1=paths[0],
276+
artifact_path_2=paths[1],
277+
conflict_type='DUPLICATE_ID'
278+
)
279+
conflicts.append({
280+
'conflict_id': conflict_id,
281+
'namespace_id': namespace_id,
282+
'count': len(paths),
283+
'paths': paths
284+
})
285+
checker.conflicts.append({
286+
'conflict_id': conflict_id,
287+
'namespace_id': namespace_id,
288+
'count': len(paths),
289+
'paths': paths
290+
})
291+
292+
# Register namespaces (INSERT OR REPLACE will update existing ones)
257293
if entries:
258294
checker.register_namespaces(entries)
259295

260-
# Check for duplicates
261-
conflicts = checker.check_duplicates()
262-
263296
# Generate report
264297
report = checker.generate_report()
265298

@@ -388,7 +421,20 @@ def main():
388421
if args.json:
389422
print(json.dumps(report, indent=2))
390423
else:
391-
checker.print_conflicts()
424+
# Print conflicts from report
425+
conflicts = report.get('conflicts', [])
426+
if not conflicts:
427+
print("\n✅ No namespace conflicts detected")
428+
else:
429+
print(f"\n❌ Found {len(conflicts)} namespace conflict(s):\n")
430+
431+
for conflict in conflicts:
432+
print(f"Namespace ID: {conflict['namespace_id']}")
433+
print(f" Occurrences: {conflict['count']}")
434+
print(f" Conflicting files:")
435+
for path in conflict['paths']:
436+
print(f" - {path}")
437+
print()
392438

393439
print(f"\n{'═'*70}")
394440
print(f"GATE-004: Namespace Deduplication Check")

0 commit comments

Comments
 (0)