Skip to content

Commit e2b25ca

Browse files
authored
Merge pull request #58 from redhat-performance/feat/RPOPC-1347-uuid-fields
RPOPC-1347: Add UUID tracking fields to Chronicler schema
2 parents 1e1ae48 + 122482a commit e2b25ca

2 files changed

Lines changed: 101 additions & 6 deletions

File tree

src/chronicler/schema.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ class Metadata:
4646
# URL to the Performance CoPilot (PCP) archive for this test run (e.g. internal server, S3).
4747
pcp_archive_url: Optional[str] = None
4848

49+
# UUIDs for tracking (Zathras integration)
50+
project_uuid: Optional[str] = None
51+
run_uuid: Optional[str] = None
52+
result_uuid: Optional[str] = None
53+
4954
def to_dict(self) -> Dict[str, Any]:
5055
return {k: v for k, v in asdict(self).items() if v is not None}
5156

@@ -389,6 +394,7 @@ def calculate_content_hash(self, exclude_processing_timestamp: bool = True) -> s
389394
The hash excludes:
390395
- All timestamps (test_timestamp, processing_timestamp, collection_timestamp)
391396
- Document ID (computed from hash)
397+
- UUIDs (project_uuid, run_uuid, result_uuid - identifiers, not content)
392398
- Timeseries data (stored separately, often has synthetic timestamps)
393399
394400
Args:
@@ -402,15 +408,21 @@ def calculate_content_hash(self, exclude_processing_timestamp: bool = True) -> s
402408
doc_dict = copy.deepcopy(self.to_dict_summary_only())
403409

404410
# Remove fields that change on re-processing or are metadata-only
405-
if exclude_processing_timestamp and 'metadata' in doc_dict:
406-
# Remove ALL timestamps - they're metadata, not test results
407-
doc_dict['metadata'].pop('processing_timestamp', None)
408-
doc_dict['metadata'].pop('test_timestamp', None)
409-
doc_dict['metadata'].pop('collection_timestamp', None)
410-
# Also remove document_id as we're computing it
411+
if 'metadata' in doc_dict:
412+
if exclude_processing_timestamp:
413+
# Remove ALL timestamps - they're metadata, not test results
414+
doc_dict['metadata'].pop('processing_timestamp', None)
415+
doc_dict['metadata'].pop('test_timestamp', None)
416+
doc_dict['metadata'].pop('collection_timestamp', None)
417+
# Always remove these fields regardless of timestamp flag
418+
# Document ID is computed from hash, so it shouldn't be in the hash
411419
doc_dict['metadata'].pop('document_id', None)
412420
# PCP archive URL is storage location, not part of result identity
413421
doc_dict['metadata'].pop('pcp_archive_url', None)
422+
# UUIDs are identifiers, not content - always exclude from hash
423+
doc_dict['metadata'].pop('project_uuid', None)
424+
doc_dict['metadata'].pop('run_uuid', None)
425+
doc_dict['metadata'].pop('result_uuid', None)
414426

415427
# Sort keys for deterministic ordering
416428
sorted_json = json.dumps(doc_dict, sort_keys=True, separators=(',', ':'))

tests/test_schema.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,27 @@ def test_to_dict_includes_optional_when_set(self):
7272
assert d["instance_type"] == "m5.large"
7373
assert d["iteration"] == 1
7474

75+
def test_to_dict_includes_uuids_when_set(self):
76+
"""Test that UUID fields are included in to_dict() when present."""
77+
meta = Metadata(
78+
document_id="doc123",
79+
project_uuid="550e8400-e29b-41d4-a716-446655440000",
80+
run_uuid="660e8400-e29b-41d4-a716-446655440001",
81+
result_uuid="770e8400-e29b-41d4-a716-446655440002",
82+
)
83+
d = meta.to_dict()
84+
assert d["project_uuid"] == "550e8400-e29b-41d4-a716-446655440000"
85+
assert d["run_uuid"] == "660e8400-e29b-41d4-a716-446655440001"
86+
assert d["result_uuid"] == "770e8400-e29b-41d4-a716-446655440002"
87+
88+
def test_to_dict_excludes_uuids_when_not_set(self):
89+
"""Test backward compatibility: UUID fields are excluded when None."""
90+
meta = Metadata(document_id="doc123")
91+
d = meta.to_dict()
92+
assert "project_uuid" not in d
93+
assert "run_uuid" not in d
94+
assert "result_uuid" not in d
95+
7596

7697
class TestTestInfo:
7798
"""Tests for TestInfo dataclass."""
@@ -521,6 +542,68 @@ def test_calculate_content_hash_different_for_different_content(self):
521542
)
522543
assert doc1.calculate_content_hash() != doc2.calculate_content_hash()
523544

545+
def test_calculate_content_hash_excludes_uuids(self):
546+
"""Test that UUID fields don't affect content hash (they're identifiers, not content)."""
547+
doc1 = ZathrasDocument(
548+
metadata=Metadata(
549+
document_id="doc123",
550+
project_uuid="550e8400-e29b-41d4-a716-446655440000",
551+
run_uuid="660e8400-e29b-41d4-a716-446655440001",
552+
result_uuid="770e8400-e29b-41d4-a716-446655440002",
553+
),
554+
test=TestInfo(name="test", version="1.0"),
555+
system_under_test=SystemUnderTest(),
556+
test_configuration=TestConfiguration(),
557+
results=Results(status="PASS"),
558+
)
559+
doc2 = ZathrasDocument(
560+
metadata=Metadata(
561+
document_id="doc456",
562+
project_uuid="880e8400-e29b-41d4-a716-446655440003",
563+
run_uuid="990e8400-e29b-41d4-a716-446655440004",
564+
result_uuid="aa0e8400-e29b-41d4-a716-446655440005",
565+
),
566+
test=TestInfo(name="test", version="1.0"),
567+
system_under_test=SystemUnderTest(),
568+
test_configuration=TestConfiguration(),
569+
results=Results(status="PASS"),
570+
)
571+
# Same content, different UUIDs - should produce same hash
572+
assert doc1.calculate_content_hash() == doc2.calculate_content_hash()
573+
574+
def test_calculate_content_hash_excludes_uuids_regardless_of_timestamp_flag(self):
575+
"""Test that UUIDs are excluded even when exclude_processing_timestamp=False."""
576+
doc1 = ZathrasDocument(
577+
metadata=Metadata(
578+
document_id="doc123",
579+
test_timestamp="2026-03-17T10:00:00Z",
580+
project_uuid="550e8400-e29b-41d4-a716-446655440000",
581+
run_uuid="660e8400-e29b-41d4-a716-446655440001",
582+
result_uuid="770e8400-e29b-41d4-a716-446655440002",
583+
),
584+
test=TestInfo(name="test", version="1.0"),
585+
system_under_test=SystemUnderTest(),
586+
test_configuration=TestConfiguration(),
587+
results=Results(status="PASS"),
588+
)
589+
doc2 = ZathrasDocument(
590+
metadata=Metadata(
591+
document_id="doc456",
592+
test_timestamp="2026-03-17T10:00:00Z",
593+
project_uuid="880e8400-e29b-41d4-a716-446655440003",
594+
run_uuid="990e8400-e29b-41d4-a716-446655440004",
595+
result_uuid="aa0e8400-e29b-41d4-a716-446655440005",
596+
),
597+
test=TestInfo(name="test", version="1.0"),
598+
system_under_test=SystemUnderTest(),
599+
test_configuration=TestConfiguration(),
600+
results=Results(status="PASS"),
601+
)
602+
# Same content, different UUIDs, timestamp flag False - UUIDs still excluded
603+
hash1 = doc1.calculate_content_hash(exclude_processing_timestamp=False)
604+
hash2 = doc2.calculate_content_hash(exclude_processing_timestamp=False)
605+
assert hash1 == hash2
606+
524607
def test_extract_timeseries_documents(self, full_document):
525608
ts_docs = full_document.extract_timeseries_documents()
526609
assert len(ts_docs) == 2

0 commit comments

Comments
 (0)