diff --git a/src/chronicler/schema.py b/src/chronicler/schema.py index 0341492..dd3b9aa 100644 --- a/src/chronicler/schema.py +++ b/src/chronicler/schema.py @@ -46,6 +46,11 @@ class Metadata: # URL to the Performance CoPilot (PCP) archive for this test run (e.g. internal server, S3). pcp_archive_url: Optional[str] = None + # UUIDs for tracking (Zathras integration) + project_uuid: Optional[str] = None + run_uuid: Optional[str] = None + result_uuid: Optional[str] = None + def to_dict(self) -> Dict[str, Any]: return {k: v for k, v in asdict(self).items() if v is not None} @@ -389,6 +394,7 @@ def calculate_content_hash(self, exclude_processing_timestamp: bool = True) -> s The hash excludes: - All timestamps (test_timestamp, processing_timestamp, collection_timestamp) - Document ID (computed from hash) + - UUIDs (project_uuid, run_uuid, result_uuid - identifiers, not content) - Timeseries data (stored separately, often has synthetic timestamps) Args: @@ -402,15 +408,21 @@ def calculate_content_hash(self, exclude_processing_timestamp: bool = True) -> s doc_dict = copy.deepcopy(self.to_dict_summary_only()) # Remove fields that change on re-processing or are metadata-only - if exclude_processing_timestamp and 'metadata' in doc_dict: - # Remove ALL timestamps - they're metadata, not test results - doc_dict['metadata'].pop('processing_timestamp', None) - doc_dict['metadata'].pop('test_timestamp', None) - doc_dict['metadata'].pop('collection_timestamp', None) - # Also remove document_id as we're computing it + if 'metadata' in doc_dict: + if exclude_processing_timestamp: + # Remove ALL timestamps - they're metadata, not test results + doc_dict['metadata'].pop('processing_timestamp', None) + doc_dict['metadata'].pop('test_timestamp', None) + doc_dict['metadata'].pop('collection_timestamp', None) + # Always remove these fields regardless of timestamp flag + # Document ID is computed from hash, so it shouldn't be in the hash doc_dict['metadata'].pop('document_id', None) # PCP archive URL is storage location, not part of result identity doc_dict['metadata'].pop('pcp_archive_url', None) + # UUIDs are identifiers, not content - always exclude from hash + doc_dict['metadata'].pop('project_uuid', None) + doc_dict['metadata'].pop('run_uuid', None) + doc_dict['metadata'].pop('result_uuid', None) # Sort keys for deterministic ordering sorted_json = json.dumps(doc_dict, sort_keys=True, separators=(',', ':')) diff --git a/tests/test_schema.py b/tests/test_schema.py index efc71df..a36e987 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -72,6 +72,27 @@ def test_to_dict_includes_optional_when_set(self): assert d["instance_type"] == "m5.large" assert d["iteration"] == 1 + def test_to_dict_includes_uuids_when_set(self): + """Test that UUID fields are included in to_dict() when present.""" + meta = Metadata( + document_id="doc123", + project_uuid="550e8400-e29b-41d4-a716-446655440000", + run_uuid="660e8400-e29b-41d4-a716-446655440001", + result_uuid="770e8400-e29b-41d4-a716-446655440002", + ) + d = meta.to_dict() + assert d["project_uuid"] == "550e8400-e29b-41d4-a716-446655440000" + assert d["run_uuid"] == "660e8400-e29b-41d4-a716-446655440001" + assert d["result_uuid"] == "770e8400-e29b-41d4-a716-446655440002" + + def test_to_dict_excludes_uuids_when_not_set(self): + """Test backward compatibility: UUID fields are excluded when None.""" + meta = Metadata(document_id="doc123") + d = meta.to_dict() + assert "project_uuid" not in d + assert "run_uuid" not in d + assert "result_uuid" not in d + class TestTestInfo: """Tests for TestInfo dataclass.""" @@ -521,6 +542,68 @@ def test_calculate_content_hash_different_for_different_content(self): ) assert doc1.calculate_content_hash() != doc2.calculate_content_hash() + def test_calculate_content_hash_excludes_uuids(self): + """Test that UUID fields don't affect content hash (they're identifiers, not content).""" + doc1 = ZathrasDocument( + metadata=Metadata( + document_id="doc123", + project_uuid="550e8400-e29b-41d4-a716-446655440000", + run_uuid="660e8400-e29b-41d4-a716-446655440001", + result_uuid="770e8400-e29b-41d4-a716-446655440002", + ), + test=TestInfo(name="test", version="1.0"), + system_under_test=SystemUnderTest(), + test_configuration=TestConfiguration(), + results=Results(status="PASS"), + ) + doc2 = ZathrasDocument( + metadata=Metadata( + document_id="doc456", + project_uuid="880e8400-e29b-41d4-a716-446655440003", + run_uuid="990e8400-e29b-41d4-a716-446655440004", + result_uuid="aa0e8400-e29b-41d4-a716-446655440005", + ), + test=TestInfo(name="test", version="1.0"), + system_under_test=SystemUnderTest(), + test_configuration=TestConfiguration(), + results=Results(status="PASS"), + ) + # Same content, different UUIDs - should produce same hash + assert doc1.calculate_content_hash() == doc2.calculate_content_hash() + + def test_calculate_content_hash_excludes_uuids_regardless_of_timestamp_flag(self): + """Test that UUIDs are excluded even when exclude_processing_timestamp=False.""" + doc1 = ZathrasDocument( + metadata=Metadata( + document_id="doc123", + test_timestamp="2026-03-17T10:00:00Z", + project_uuid="550e8400-e29b-41d4-a716-446655440000", + run_uuid="660e8400-e29b-41d4-a716-446655440001", + result_uuid="770e8400-e29b-41d4-a716-446655440002", + ), + test=TestInfo(name="test", version="1.0"), + system_under_test=SystemUnderTest(), + test_configuration=TestConfiguration(), + results=Results(status="PASS"), + ) + doc2 = ZathrasDocument( + metadata=Metadata( + document_id="doc456", + test_timestamp="2026-03-17T10:00:00Z", + project_uuid="880e8400-e29b-41d4-a716-446655440003", + run_uuid="990e8400-e29b-41d4-a716-446655440004", + result_uuid="aa0e8400-e29b-41d4-a716-446655440005", + ), + test=TestInfo(name="test", version="1.0"), + system_under_test=SystemUnderTest(), + test_configuration=TestConfiguration(), + results=Results(status="PASS"), + ) + # Same content, different UUIDs, timestamp flag False - UUIDs still excluded + hash1 = doc1.calculate_content_hash(exclude_processing_timestamp=False) + hash2 = doc2.calculate_content_hash(exclude_processing_timestamp=False) + assert hash1 == hash2 + def test_extract_timeseries_documents(self, full_document): ts_docs = full_document.extract_timeseries_documents() assert len(ts_docs) == 2