Skip to content

Commit 403569d

Browse files
Fix crash recovery data corruption risk by removing should_process_run check
Removing should_process_run check ensures that we reprocess runs since the last checkpoint on recovery, reconstructing the in-memory cache correctly. Deterministic keys ensure this reprocessing is idempotent and safe. Also restored docstring in _get_entry_key_name. TAG=agy CONV=b279eb05-86f7-480a-aa31-ce39ad9d364b
1 parent 7a97d07 commit 403569d

2 files changed

Lines changed: 9 additions & 32 deletions

File tree

scripts/process_test_history.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,11 @@ class MetadataDict(TypedDict):
120120

121121

122122
def _get_entry_key_name(run_id: str, test_name: str, subtest_name: str) -> str:
123-
# We use SHA-256 to hash the test and subtest names to ensure they fit in key name limits
124-
# and are deterministic.
123+
"""Generate a deterministic key name for a TestHistoryEntry.
124+
125+
We use SHA-256 to hash the test and subtest names to ensure they fit in key
126+
name limits and are deterministic.
127+
"""
125128
name_hash = hashlib.sha256(f"{test_name}\n{subtest_name}".encode('utf-8')).hexdigest()
126129
return f"{run_id}_{name_hash}"
127130

@@ -185,10 +188,6 @@ def process_single_run(run_metadata: MetadataDict) -> None:
185188
"""Process a single aligned run and save and deltas to history."""
186189
client = ndb.Client(project=PROJECT_NAME)
187190
with client.context():
188-
if not should_process_run(run_metadata):
189-
print('Run has already been processed! '
190-
'TestHistoryEntry values already exist for this run.')
191-
return
192191

193192
verboseprint('Obtaining the raw results JSON for the test run '
194193
f'at {run_metadata["raw_results_url"]}')
@@ -362,13 +361,6 @@ def _populate_previous_statuses(browser_name: str) -> dict:
362361
return prev_test_statuses
363362

364363

365-
def should_process_run(run_metadata: MetadataDict) -> bool:
366-
"""Check if a run should be processed."""
367-
# A run should be processed if no entities have been written for it.
368-
test_entry = TestHistoryEntry.query(
369-
TestHistoryEntry.RunID == str(run_metadata['id'])).get()
370-
return test_entry is None
371-
372364

373365
def process_runs(runs_list: list[MetadataDict]) -> None:
374366
"""Process each aligned run in parallel."""

scripts/process_test_history_test.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,11 @@ def test_get_aligned_run_info_success(self, mock_get):
7676
self.assertEqual(len(res), 4)
7777
self.assertEqual(next_date, '2025-10-03T01:00:00Z')
7878

79-
@patch('process_test_history.TestHistoryEntry')
80-
def test_should_process_run_true(self, mock_entry):
81-
mock_entry.query.return_value.get.return_value = None
82-
run = {'id': '1'}
83-
self.assertTrue(process_test_history.should_process_run(run))
84-
85-
@patch('process_test_history.TestHistoryEntry')
86-
def test_should_process_run_false(self, mock_entry):
87-
mock_entry.query.return_value.get.return_value = MagicMock()
88-
run = {'id': '1'}
89-
self.assertFalse(process_test_history.should_process_run(run))
90-
91-
@patch('process_test_history.should_process_run', return_value=True)
79+
9280
@patch('process_test_history.ndb')
9381
@patch('process_test_history.storage.Client')
9482
@patch('process_test_history.requests.get')
95-
def test_process_single_run(self, mock_get, mock_storage, mock_ndb, mock_should_process):
83+
def test_process_single_run(self, mock_get, mock_storage, mock_ndb):
9684
# Mock GCS
9785
mock_bucket = mock_storage.return_value.bucket.return_value
9886
mock_blob = mock_bucket.blob.return_value
@@ -208,11 +196,10 @@ def test_build_new_test_history_entry_uses_deterministic_key(self):
208196
self.assertEqual(res.key.id(), expected_key)
209197

210198

211-
@patch('process_test_history.should_process_run', return_value=True)
212199
@patch('process_test_history.ndb')
213200
@patch('process_test_history.storage.Client')
214201
@patch('process_test_history.requests.get')
215-
def test_process_single_run_batching(self, mock_get, mock_storage, mock_ndb, mock_should_process):
202+
def test_process_single_run_batching(self, mock_get, mock_storage, mock_ndb):
216203
# Mock GCS
217204
mock_bucket = mock_storage.return_value.bucket.return_value
218205
mock_blob = mock_bucket.blob.return_value
@@ -257,15 +244,13 @@ def fake_put_multi(ents):
257244

258245
@patch('process_test_history.ThreadPoolExecutor')
259246
@patch('process_test_history.process_single_run')
260-
@patch('process_test_history.should_process_run')
261-
def test_process_runs_parallel(self, mock_should_process, mock_process_single, mock_executor):
247+
def test_process_runs_parallel(self, mock_process_single, mock_executor):
262248
mock_exec_instance = mock_executor.return_value.__enter__.return_value
263249

264250
runs = [
265251
{'id': 1, 'browser_name': 'chrome', 'time_start': '2025-10-03T00:00:00Z'},
266252
{'id': 2, 'browser_name': 'firefox', 'time_start': '2025-10-03T00:00:00Z'},
267253
]
268-
mock_should_process.return_value = True
269254

270255
mock_future = MagicMock()
271256
mock_exec_instance.submit.return_value = mock_future

0 commit comments

Comments
 (0)