Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/chronicler/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ class Metadata:
# URL to the Performance CoPilot (PCP) archive for this test run (e.g. internal server, S3).
pcp_archive_url: Optional[str] = None

# UUIDs for tracking (Zathras integration)
project_uuid: Optional[str] = None
run_uuid: Optional[str] = None
result_uuid: Optional[str] = None

def to_dict(self) -> Dict[str, Any]:
return {k: v for k, v in asdict(self).items() if v is not None}

Expand Down Expand Up @@ -389,6 +394,7 @@ def calculate_content_hash(self, exclude_processing_timestamp: bool = True) -> s
The hash excludes:
- All timestamps (test_timestamp, processing_timestamp, collection_timestamp)
- Document ID (computed from hash)
- UUIDs (project_uuid, run_uuid, result_uuid - identifiers, not content)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
- Timeseries data (stored separately, often has synthetic timestamps)

Args:
Expand All @@ -411,6 +417,10 @@ def calculate_content_hash(self, exclude_processing_timestamp: bool = True) -> s
doc_dict['metadata'].pop('document_id', None)
# PCP archive URL is storage location, not part of result identity
doc_dict['metadata'].pop('pcp_archive_url', None)
# UUIDs are identifiers, not content - exclude from hash
doc_dict['metadata'].pop('project_uuid', None)
doc_dict['metadata'].pop('run_uuid', None)
doc_dict['metadata'].pop('result_uuid', None)

# Sort keys for deterministic ordering
sorted_json = json.dumps(doc_dict, sort_keys=True, separators=(',', ':'))
Expand Down
50 changes: 50 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,27 @@ def test_to_dict_includes_optional_when_set(self):
assert d["instance_type"] == "m5.large"
assert d["iteration"] == 1

def test_to_dict_includes_uuids_when_set(self):
"""Test that UUID fields are included in to_dict() when present."""
meta = Metadata(
document_id="doc123",
project_uuid="550e8400-e29b-41d4-a716-446655440000",
run_uuid="660e8400-e29b-41d4-a716-446655440001",
result_uuid="770e8400-e29b-41d4-a716-446655440002",
)
d = meta.to_dict()
assert d["project_uuid"] == "550e8400-e29b-41d4-a716-446655440000"
assert d["run_uuid"] == "660e8400-e29b-41d4-a716-446655440001"
assert d["result_uuid"] == "770e8400-e29b-41d4-a716-446655440002"

def test_to_dict_excludes_uuids_when_not_set(self):
"""Test backward compatibility: UUID fields are excluded when None."""
meta = Metadata(document_id="doc123")
d = meta.to_dict()
assert "project_uuid" not in d
assert "run_uuid" not in d
assert "result_uuid" not in d


class TestTestInfo:
"""Tests for TestInfo dataclass."""
Expand Down Expand Up @@ -521,6 +542,35 @@ def test_calculate_content_hash_different_for_different_content(self):
)
assert doc1.calculate_content_hash() != doc2.calculate_content_hash()

def test_calculate_content_hash_excludes_uuids(self):
"""Test that UUID fields don't affect content hash (they're identifiers, not content)."""
doc1 = ZathrasDocument(
metadata=Metadata(
document_id="doc123",
project_uuid="550e8400-e29b-41d4-a716-446655440000",
run_uuid="660e8400-e29b-41d4-a716-446655440001",
result_uuid="770e8400-e29b-41d4-a716-446655440002",
),
test=TestInfo(name="test", version="1.0"),
system_under_test=SystemUnderTest(),
test_configuration=TestConfiguration(),
results=Results(status="PASS"),
)
doc2 = ZathrasDocument(
metadata=Metadata(
document_id="doc456",
project_uuid="880e8400-e29b-41d4-a716-446655440003",
run_uuid="990e8400-e29b-41d4-a716-446655440004",
result_uuid="aa0e8400-e29b-41d4-a716-446655440005",
),
test=TestInfo(name="test", version="1.0"),
system_under_test=SystemUnderTest(),
test_configuration=TestConfiguration(),
results=Results(status="PASS"),
)
# Same content, different UUIDs - should produce same hash
assert doc1.calculate_content_hash() == doc2.calculate_content_hash()

def test_extract_timeseries_documents(self, full_document):
ts_docs = full_document.extract_timeseries_documents()
assert len(ts_docs) == 2
Expand Down
Loading