From bebe52dc262adeb36ae8d692b9072958b34dcef0 Mon Sep 17 00:00:00 2001 From: Domenico DiNicola Date: Tue, 14 Apr 2026 16:03:26 +0200 Subject: [PATCH] fix-aurora-clean-task --- src/hope/contrib/aurora/celery_tasks.py | 33 +++++++++---- .../test_clean_old_record_files_task.py | 46 +++++++++++++++++++ 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/src/hope/contrib/aurora/celery_tasks.py b/src/hope/contrib/aurora/celery_tasks.py index 56a4ae70727..84c20c3b85a 100644 --- a/src/hope/contrib/aurora/celery_tasks.py +++ b/src/hope/contrib/aurora/celery_tasks.py @@ -1,3 +1,4 @@ +from datetime import timedelta import logging from typing import TYPE_CHECKING, Any, Optional @@ -125,14 +126,30 @@ def automate_rdi_creation_task( @app.task @log_start_and_end @sentry_tags -def clean_old_record_files_task() -> None: - """Task to clean (sets to null) Record's files field.""" - from datetime import timedelta - +def clean_old_record_files_task(batch_size: int = 100) -> None: + """Task to remove old imported aurora records.""" try: time_threshold = timezone.now() - timedelta(config.CLEARING_RECORD_FILES_TIMEDELTA) - Record.objects.filter(timestamp__lt=time_threshold, status=Record.STATUS_IMPORTED).delete() - logger.info("Record's files have benn successfully cleared") - except Exception as e: - logger.warning(e) + qs = Record.objects.filter(timestamp__lt=time_threshold, status=Record.STATUS_IMPORTED) + total = qs.count() + total_batches = (total + batch_size - 1) // batch_size + + batch_num = 0 + total_deleted = 0 + + while True: + batch = list(qs.values_list("pk", flat=True)[:batch_size]) + if not batch: + break + + deleted_count, _ = Record.objects.filter(pk__in=batch).delete() + batch_num += 1 + total_deleted += deleted_count + + logger.info(f"Batch {batch_num}/{total_batches} ({deleted_count} deleted, total: {total_deleted}/{total})") + + logger.info("Record files have been successfully cleared") + + except Exception: # noqa + logger.exception("Error cleaning old record files") raise diff --git a/tests/unit/apps/aurora/test_clean_old_record_files_task.py b/tests/unit/apps/aurora/test_clean_old_record_files_task.py index d6d70be9361..ceeebb46c15 100644 --- a/tests/unit/apps/aurora/test_clean_old_record_files_task.py +++ b/tests/unit/apps/aurora/test_clean_old_record_files_task.py @@ -1,4 +1,5 @@ from datetime import timedelta +from typing import TYPE_CHECKING from django.utils import timezone import pytest @@ -7,6 +8,9 @@ from hope.contrib.aurora.celery_tasks import clean_old_record_files_task from hope.contrib.aurora.models import Record +if TYPE_CHECKING: + from pytest_mock import MockerFixture + pytestmark = pytest.mark.django_db @@ -50,3 +54,45 @@ def test_clean_old_record_files_task(record_set: dict[str, Record]) -> None: assert record_set["recent_imported"].id in remaining_ids assert record_set["old_error"].id in remaining_ids assert record_set["old_to_import"].id in remaining_ids + + +def test_clean_old_record_files_task_empty() -> None: + # No records at all + clean_old_record_files_task() + assert Record.objects.count() == 0 + + +def test_clean_old_record_files_task_batching(mocker: "MockerFixture") -> None: + now = timezone.now() + # Create 5 records that should be deleted + RecordFactory.create_batch(5, status=Record.STATUS_IMPORTED, timestamp=now - timedelta(days=100)) + + # Run with batch_size=2, should take 3 batches (2+2+1) + clean_old_record_files_task(batch_size=2) + + assert Record.objects.count() == 0 + + +def test_clean_old_record_files_task_logging(mocker: "MockerFixture") -> None: + now = timezone.now() + RecordFactory.create_batch(3, status=Record.STATUS_IMPORTED, timestamp=now - timedelta(days=100)) + mock_logger = mocker.patch("hope.contrib.aurora.celery_tasks.logger") + + clean_old_record_files_task(batch_size=2) + + # Should log 2 batches and one final message + assert mock_logger.info.call_count == 3 + mock_logger.info.assert_any_call("Batch 1/2 (2 deleted, total: 2/3)") + mock_logger.info.assert_any_call("Batch 2/2 (1 deleted, total: 3/3)") + mock_logger.info.assert_any_call("Record files have been successfully cleared") + + +def test_clean_old_record_files_task_error_handling(mocker: "MockerFixture") -> None: + mock_logger = mocker.patch("hope.contrib.aurora.celery_tasks.logger") + # Force an exception by mocking timezone.now to raise something + mocker.patch("hope.contrib.aurora.celery_tasks.timezone.now", side_effect=Exception("Database error")) + + with pytest.raises(Exception, match="Database error"): + clean_old_record_files_task() + + mock_logger.exception.assert_called_once_with("Error cleaning old record files")