Skip to content

Commit 7523adf

Browse files
authored
cleaner fix delete from db (#2573)
1 parent 5042d9d commit 7523adf

File tree

3 files changed

+129
-34
lines changed

3 files changed

+129
-34
lines changed

lib/cuckoo/common/cleaners_utils.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
mongo_update_one,
5757
mongo_update_many,
5858
mongo_delete_calls_by_task_id_in_range,
59-
mongo_delete_data_range
59+
mongo_delete_data_range,
6060
)
6161
elif repconf.elasticsearchdb.enabled:
6262
from dev_utils.elasticsearchdb import all_docs, delete_analysis_and_related_calls, get_analysis_index
@@ -238,7 +238,10 @@ def delete_bulk_tasks_n_folders(ids: list, delete_mongo: bool, delete_db_tasks=F
238238
sys.exit()
239239
mongo_delete_data(ids_tmp)
240240
if delete_db_tasks:
241-
db.delete_tasks(ids_tmp)
241+
try:
242+
db.delete_tasks(task_ids=ids_tmp)
243+
except Exception as e:
244+
log.error("Failed to delete tasks from DB: %s", str(e))
242245

243246

244247
def fail_job(tid):
@@ -352,9 +355,7 @@ def cuckoo_clean_failed_tasks():
352355
# ToDo rewrite for bulk delete
353356
ids = [task.id for task in tasks_list]
354357
delete_bulk_tasks_n_folders(ids, delete_mongo=True)
355-
tasks_list = db.list_tasks(
356-
status=f"{TASK_FAILED_ANALYSIS}|{TASK_FAILED_PROCESSING}|{TASK_FAILED_REPORTING}|{TASK_RECOVERED}", delete=True
357-
)
358+
tasks_list = db.delete_tasks(status=f"{TASK_FAILED_ANALYSIS}|{TASK_FAILED_PROCESSING}|{TASK_FAILED_REPORTING}|{TASK_RECOVERED}")
358359

359360

360361
def cuckoo_clean_bson_suri_logs():
@@ -441,7 +442,7 @@ def cuckoo_clean_lower_score(malscore: int):
441442
log.info("number of matching records %s", len(id_arr))
442443
# resolver_pool.map(lambda tid: delete_data(tid), id_arr)
443444
if id_arr:
444-
delete_bulk_tasks_n_folders(id_arr, delete_mongo=True)
445+
delete_bulk_tasks_n_folders(id_arr, delete_mongo=True, delete_db_tasks=True)
445446

446447

447448
def tmp_clean_before(timerange: str):
@@ -537,7 +538,7 @@ def cuckoo_clean_before(args: dict):
537538
mongo_delete_data_range(range_end=highest_id)
538539
# cleanup_files_collection_by_id(highest_id)
539540

540-
db.list_tasks(added_before=added_before, category=category, delete=True)
541+
db.delete_tasks(added_before=added_before, category=category)
541542

542543

543544
def cuckoo_clean_sorted_pcap_dump():
@@ -618,7 +619,7 @@ def cuckoo_clean_pending_tasks(timerange: str = None, delete: bool = False):
618619
# clean_handler = delete_data if delete else fail_job
619620
# resolver_pool.map(lambda tid: clean_handler(pending_tasks), pending_tasks)
620621
if delete:
621-
db.list_tasks(status=TASK_PENDING, added_before=before_time, delete=True)
622+
db.delete_tasks(status=TASK_PENDING, added_before=before_time)
622623
else:
623624
resolver_pool.map(lambda tid: fail_job(pending_tasks), pending_tasks)
624625

@@ -639,7 +640,7 @@ def cuckoo_clean_range_tasks(range_: str):
639640
ids: list[int] = [task.id for task in pending_tasks]
640641
delete_bulk_tasks_n_folders(ids, delete_mongo=False)
641642
mongo_delete_data(ids)
642-
db.list_tasks(id_after=(start - 1), id_before=(end + 1), delete=True)
643+
db.delete_tasks(id_after=(start - 1), id_before=(end + 1))
643644

644645

645646
def delete_unused_file_data_in_mongo():

lib/cuckoo/core/database.py

+116-22
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@
125125
if repconf.mongodb.enabled:
126126
from dev_utils.mongodb import mongo_find
127127
if repconf.elasticsearchdb.enabled:
128-
from dev_utils.elasticsearchdb import elastic_handler # , get_analysis_index
128+
from dev_utils.elasticsearchdb import elastic_handler # , get_analysis_index
129129

130130
es = elastic_handler
131131

@@ -2020,7 +2020,6 @@ def list_tasks(
20202020
include_hashes=False,
20212021
user_id=None,
20222022
for_update=False,
2023-
delete=False,
20242023
) -> List[Task]:
20252024
"""Retrieve list of task.
20262025
@param limit: specify a limit of entries.
@@ -2042,7 +2041,6 @@ def list_tasks(
20422041
@param include_hashes: return task+samples details
20432042
@param user_id: list of tasks submitted by user X
20442043
@param for_update: If True, use "SELECT FOR UPDATE" in order to create a row-level lock on the selected tasks.
2045-
@param delete: delete selected tasks
20462044
@return: list of tasks.
20472045
"""
20482046
tasks: List[Task] = []
@@ -2090,10 +2088,6 @@ def list_tasks(
20902088
if user_id is not None:
20912089
search = search.filter(Task.user_id == user_id)
20922090

2093-
if delete:
2094-
search.delete()
2095-
return []
2096-
20972091
if order_by is not None and isinstance(order_by, tuple):
20982092
search = search.order_by(*order_by)
20992093
elif order_by is not None:
@@ -2108,6 +2102,121 @@ def list_tasks(
21082102

21092103
return tasks
21102104

2105+
def delete_task(self, task_id):
2106+
"""Delete information on a task.
2107+
@param task_id: ID of the task to query.
2108+
@return: operation status.
2109+
"""
2110+
task = self.session.get(Task, task_id)
2111+
if task is None:
2112+
return False
2113+
self.session.delete(task)
2114+
return True
2115+
2116+
def delete_tasks(
2117+
self,
2118+
category=None,
2119+
status=None,
2120+
sample_id=None,
2121+
not_status=None,
2122+
completed_after=None,
2123+
added_before=None,
2124+
id_before=None,
2125+
id_after=None,
2126+
options_like=False,
2127+
options_not_like=False,
2128+
tags_tasks_like=False,
2129+
task_ids=False,
2130+
user_id=None,
2131+
):
2132+
"""Delete tasks based on parameters. If no filters are provided, no tasks will be deleted.
2133+
2134+
Args:
2135+
category: filter by category
2136+
status: filter by task status
2137+
sample_id: filter tasks for a sample
2138+
not_status: exclude this task status from filter
2139+
completed_after: only list tasks completed after this timestamp
2140+
added_before: tasks added before a specific timestamp
2141+
id_before: filter by tasks which is less than this value
2142+
id_after: filter by tasks which is greater than this value
2143+
options_like: filter tasks by specific option inside of the options
2144+
options_not_like: filter tasks by specific option not inside of the options
2145+
tags_tasks_like: filter tasks by specific tag
2146+
task_ids: list of task_id
2147+
user_id: list of tasks submitted by user X
2148+
2149+
Returns:
2150+
bool: True if the operation was successful (including no tasks to delete), False otherwise.
2151+
"""
2152+
filters_applied = False
2153+
search = self.session.query(Task)
2154+
2155+
if status:
2156+
if "|" in status:
2157+
search = search.filter(Task.status.in_(status.split("|")))
2158+
else:
2159+
search = search.filter(Task.status == status)
2160+
filters_applied = True
2161+
if not_status:
2162+
search = search.filter(Task.status != not_status)
2163+
filters_applied = True
2164+
if category:
2165+
search = search.filter(Task.category.in_([category] if isinstance(category, str) else category))
2166+
filters_applied = True
2167+
if sample_id is not None:
2168+
search = search.filter(Task.sample_id == sample_id)
2169+
filters_applied = True
2170+
if id_before is not None:
2171+
search = search.filter(Task.id < id_before)
2172+
filters_applied = True
2173+
if id_after is not None:
2174+
search = search.filter(Task.id > id_after)
2175+
filters_applied = True
2176+
if completed_after:
2177+
search = search.filter(Task.completed_on > completed_after)
2178+
filters_applied = True
2179+
if added_before:
2180+
search = search.filter(Task.added_on < added_before)
2181+
filters_applied = True
2182+
if options_like:
2183+
# Replace '*' wildcards with wildcard for sql
2184+
options_like = options_like.replace("*", "%")
2185+
search = search.filter(Task.options.like(f"%{options_like}%"))
2186+
filters_applied = True
2187+
if options_not_like:
2188+
# Replace '*' wildcards with wildcard for sql
2189+
options_not_like = options_not_like.replace("*", "%")
2190+
search = search.filter(Task.options.notlike(f"%{options_not_like}%"))
2191+
filters_applied = True
2192+
if tags_tasks_like:
2193+
search = search.filter(Task.tags_tasks.like(f"%{tags_tasks_like}%"))
2194+
filters_applied = True
2195+
if task_ids:
2196+
search = search.filter(Task.id.in_(task_ids))
2197+
filters_applied = True
2198+
if user_id is not None:
2199+
search = search.filter(Task.user_id == user_id)
2200+
filters_applied = True
2201+
2202+
if not filters_applied:
2203+
log.warning("No filters provided for delete_tasks. No tasks will be deleted.")
2204+
return True # Indicate success as no deletion was requested/needed
2205+
2206+
try:
2207+
# Perform the deletion and get the count of deleted rows
2208+
deleted_count = search.delete(synchronize_session=False)
2209+
log.info("Deleted %d tasks matching the criteria.", deleted_count)
2210+
# The commit is handled by the calling context (e.g., `with db.session.begin():`)
2211+
return True
2212+
except Exception as e:
2213+
log.error("Error deleting tasks: %s", str(e))
2214+
# Rollback might be needed if this function is called outside a `with db.session.begin():`
2215+
# but typically it should be called within one.
2216+
# self.session.rollback()
2217+
return False
2218+
2219+
21112220
def check_tasks_timeout(self, timeout):
21122221
"""Find tasks which were added_on more than timeout ago and clean"""
21132222
tasks: List[Task] = []
@@ -2212,21 +2321,6 @@ def add_statistics_to_task(self, task_id, details): # pragma: no cover
22122321
task.anti_issues = details["anti_issues"]
22132322
return True
22142323

2215-
def delete_task(self, task_id):
2216-
"""Delete information on a task.
2217-
@param task_id: ID of the task to query.
2218-
@return: operation status.
2219-
"""
2220-
task = self.session.get(Task, task_id)
2221-
if task is None:
2222-
return False
2223-
self.session.delete(task)
2224-
return True
2225-
2226-
def delete_tasks(self, ids):
2227-
self.session.query(Task).filter(Task.id.in_(ids)).delete(synchronize_session=False)
2228-
return True
2229-
22302324
def view_sample(self, sample_id):
22312325
"""Retrieve information on a sample given a sample id.
22322326
@param sample_id: ID of the sample to query.

tests/test_database.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -909,12 +909,12 @@ def test_delete_tasks(self, db: _Database, temp_filename):
909909
t2 = db.add_path(temp_filename, tags="x86")
910910
t3 = db.add_url("https://3.com")
911911
with db.session.begin():
912-
assert db.delete_tasks([])
913-
assert db.delete_tasks([t1, t2, t3 + 1])
912+
assert db.delete_tasks(task_ids=[])
913+
assert db.delete_tasks(task_ids=[t1, t2, t3 + 1])
914914
tasks = db.session.query(Task).all()
915915
assert len(tasks) == 1
916916
assert tasks[0].id == t3
917-
assert db.delete_tasks([t1, t2])
917+
assert db.delete_tasks(task_ids=[t1, t2])
918918
tasks = db.session.query(Task).all()
919919
assert len(tasks) == 1
920920
assert tasks[0].id == t3

0 commit comments

Comments
 (0)