Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c7e25d8
create recalculate_descriptors kwarg in tile.delete, re #12015
whatisgalen May 3, 2025
cf72cc9
pass recalculate_descriptors=False to tile.delete in bulk_deletion, r…
whatisgalen May 3, 2025
c825647
use recalc_desc kwarg to selectively resrce.save_descriptors in tile.…
whatisgalen May 8, 2025
e945f5d
create prefetch for resource relations in index_db optimize_resource,…
whatisgalen May 8, 2025
dd06b2c
create resource proxy model fromrelations, torelations props, re #12015
whatisgalen May 8, 2025
d52fab0
iterate through prefetch rels in resource.delete, accept fetch_relati…
whatisgalen May 8, 2025
5a41c04
leverage optimize_resource_iteration in bulk.delete_resources, re #12015
whatisgalen May 8, 2025
bdb93df
only save tile with non-empty array else pass recalc_desc=False to ti…
whatisgalen May 8, 2025
fcc1993
access resourceXresource.node_id instead of .node if .node not pre-se…
whatisgalen May 8, 2025
f402c71
create release file for 8.1.0, note for #12015
whatisgalen May 8, 2025
bf8ced7
bifurcate ResourceXResource.delete on directionality, re #12015
whatisgalen May 8, 2025
0ee21eb
resource.save calls save_descriptors, passes recalc_descriptors=False…
whatisgalen May 8, 2025
49da04a
arches primary_desc fn only gets tile from context if ctxt not None, …
whatisgalen May 8, 2025
58b60e5
fix tests setting name=str to name=json_str, re #12015
whatisgalen May 8, 2025
617e35c
otpimize_resource_iteration, indexing in transaction editlog-based de…
whatisgalen May 10, 2025
65670cb
fix casting logic, outsource index_tile_deletion, re #12015
whatisgalen May 23, 2025
20ff95d
create index_tile_deletion_by_transaction(), re #12015
whatisgalen May 23, 2025
bb9616d
pass user to reverse_edit_log_entries in reverse_load, re #12015
whatisgalen May 23, 2025
81f5a92
pass user to reverse_edit_log_entries in base_import.reverse_load, re…
whatisgalen May 23, 2025
bc62f08
use admin user by default in CLI reverse, re #12015
whatisgalen May 23, 2025
ab3ffd3
pass request.user to reverse_edit_log_entries in ReverseTransaction v…
whatisgalen May 23, 2025
2afb4bb
minor cleanup in bulk_data_deletion, re #12015
whatisgalen May 23, 2025
0dacbdc
accept user kwarg in tile.delete, re #12015
whatisgalen May 23, 2025
b65c70c
pass popped kwargs to tile.delete, tile.save, re #12015
whatisgalen May 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions arches/app/etl_modules/base_data_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import logging
from urllib.parse import urlsplit, parse_qs
import uuid
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.core.validators import URLValidator
from django.db import connection
from django.http import HttpRequest
from django.utils.decorators import method_decorator
from django.utils.translation import gettext as _
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.models.models import GraphModel, Node, ETLModule, LoadStaging
from arches.app.models.models import GraphModel, Node, ETLModule, LoadStaging, LoadEvent
from arches.app.models.system_settings import settings
from arches.app.search.elasticsearch_dsl_builder import (
Bool,
Expand Down Expand Up @@ -51,12 +52,17 @@ def __init__(self, request=None, loadid=None):
self.node_lookup = {}

def reverse_load(self, loadid):
user = (
User.objects.get(id=self.userid)
if self.userid
else LoadEvent.objects.get(loadid=loadid).user
)
with connection.cursor() as cursor:
cursor.execute(
"""UPDATE load_event SET status = %s WHERE loadid = %s""",
("reversing", loadid),
)
resources_changed_count = reverse_edit_log_entries(loadid)
resources_changed_count = reverse_edit_log_entries(loadid, user=user)
cursor.execute(
"""UPDATE load_event SET status = %s, load_details = load_details::jsonb || ('{"resources_removed":' || %s || '}')::jsonb WHERE loadid = %s""",
("unloaded", resources_changed_count, loadid),
Expand Down
28 changes: 23 additions & 5 deletions arches/app/etl_modules/base_import_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import zipfile
from openpyxl import load_workbook

from django.contrib.auth.models import User
from django.core.files import File
from django.core.files.storage import default_storage
from django.core.files.uploadedfile import InMemoryUploadedFile
Expand All @@ -17,7 +18,7 @@
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.etl_modules.decorators import load_data_async
from arches.app.etl_modules.save import save_to_tiles
from arches.app.models.models import ETLModule, Node
from arches.app.models.models import ETLModule, Node, LoadEvent
from arches.app.models.system_settings import settings
from arches.app.utils.decorators import user_created_transaction_match
from arches.app.utils.file_validator import FileValidator
Expand Down Expand Up @@ -62,15 +63,32 @@ def filesize_format(self, bytes):

def reverse_load(self, loadid):
with connection.cursor() as cursor:
cursor.execute(
"""SELECT status FROM load_event WHERE loadid = %s""",
[loadid],
)
original_status = cursor.fetchone()[0]
cursor.execute(
"""UPDATE load_event SET status = %s WHERE loadid = %s""",
("reversing", loadid),
)
resources_changed_count = reverse_edit_log_entries(loadid)
cursor.execute(
"""UPDATE load_event SET status = %s, load_details = load_details::jsonb || ('{"resources_removed":' || %s || '}')::jsonb WHERE loadid = %s""",
("unloaded", resources_changed_count, loadid),
user = (
User.objects.get(id=self.userid)
if self.userid
else LoadEvent.objects.get(loadid=loadid).user
)
try:
resources_changed_count = reverse_edit_log_entries(loadid, user=user)
cursor.execute(
"""UPDATE load_event SET status = %s, load_details = load_details::jsonb || ('{"resources_removed":' || %s || '}')::jsonb WHERE loadid = %s""",
("unloaded", resources_changed_count, loadid),
)
except Exception as e:
cursor.execute(
"""UPDATE load_event SET status = %s WHERE loadid = %s""",
(original_status, loadid),
)
raise e

@method_decorator(user_created_transaction_match, name="dispatch")
def reverse(self, request, **kwargs):
Expand Down
26 changes: 17 additions & 9 deletions arches/app/etl_modules/bulk_data_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.db import connection
from django.http import HttpRequest
from django.utils.translation import gettext as _
from arches.app.utils import task_management
from arches.app.etl_modules.base_data_editor import BaseBulkEditor
from arches.app.etl_modules.decorators import load_data_async
from arches.app.etl_modules.save import get_resourceids_from_search_url
Expand All @@ -16,7 +16,10 @@
from arches.app.models.system_settings import settings
from arches.app.models.tile import Tile
import arches.app.tasks as tasks
from arches.app.utils.index_database import index_resources_by_transaction
from arches.app.utils.index_database import (
index_resources_by_transaction,
optimize_resource_iteration,
)
from arches.app.utils.label_based_graph_v2 import LabelBasedGraph as LabelBasedGraphV2

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -176,8 +179,10 @@ def delete_resources(

if verbose is True:
bar = pyprind.ProgBar(deleted_count)
for resource in resources.iterator(chunk_size=2000):
resource.delete(user=user, index=False, transaction_id=loadid)
for resource in optimize_resource_iteration(resources, chunk_size=2000):
resource.delete(
user=user, index=False, transaction_id=loadid, fetch_relations=False
)
if verbose is True:
bar.update()

Expand Down Expand Up @@ -206,9 +211,12 @@ def delete_tiles(self, userid, loadid, nodegroupid, resourceids):
else:
tiles = Tile.objects.filter(nodegroup_id=nodegroupid)
for tile in tiles.iterator(chunk_size=2000):
request = HttpRequest()
request.user = user
tile.delete(request=request, index=False, transaction_id=loadid)
tile.delete(
user=user,
index=False,
transaction_id=loadid,
recalculate_descriptors=False,
)
result["success"] = True
except Exception as e:
logger.exception(e)
Expand Down Expand Up @@ -302,7 +310,7 @@ def delete(self, request):
},
}

use_celery_bulk_delete = True
celery_worker_running = task_management.check_if_celery_available()

load_details = {
"graph": graph_name,
Expand All @@ -313,7 +321,7 @@ def delete(self, request):
with connection.cursor() as cursor:
event_created = self.create_load_event(cursor, load_details)
if event_created["success"]:
if use_celery_bulk_delete:
if celery_worker_running:
response = self.run_bulk_task_async(request, self.loadid)
else:
response = self.run_bulk_task(
Expand Down
2 changes: 1 addition & 1 deletion arches/app/functions/primary_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_primary_descriptor_from_nodes(
and config["nodegroup_id"] != ""
and config["nodegroup_id"] is not None
):
tile = context.get("tile")
tile = context.get("tile") if context else None

if not tile or tile.sortorder:
tile = (
Expand Down
27 changes: 16 additions & 11 deletions arches/app/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,17 +1303,22 @@ class ResourceXResource(SaveSupportsBlindOverwriteMixin, models.Model):
def delete(self, *args, **kwargs):
# update the resource-instance tile by removing any references to a deleted resource
deletedResourceId = kwargs.pop("deletedResourceId", None)
if deletedResourceId and self.tile and self.node:
newTileData = []
data = self.tile.data[str(self.node_id)]
if type(data) != list:
data = [data]
for relatedresourceItem in data:
if relatedresourceItem:
if relatedresourceItem["resourceId"] != str(deletedResourceId):
newTileData.append(relatedresourceItem)
self.tile.data[str(self.node_id)] = newTileData
self.tile.save()
if deletedResourceId and self.tile and self.node_id:
if self.from_resource_id == deletedResourceId:
self.tile.delete(index=False, recalculate_descriptors=False)
elif self.to_resource_id == deletedResourceId:
newTileData = []
data = self.tile.data[str(self.node_id)]
if type(data) != list:
data = [data]
newTileData = list(
filter(lambda x: x["resourceId"] != str(deletedResourceId), data)
)
if len(newTileData):
self.tile.data[str(self.node_id)] = newTileData
self.tile.save()
else:
self.tile.delete()

super(ResourceXResource, self).delete()

Expand Down
20 changes: 14 additions & 6 deletions arches/app/models/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def __init__(self, *args, **kwargs):
# self.resourceinstancesecurity
# end from models.ResourceInstance
self.tiles = []
self.fromrelations = []
self.torelations = []
self.descriptor_function = None
self.serialized_graph = None
self.node_datatypes = None
Expand Down Expand Up @@ -306,8 +308,10 @@ def save(self, **kwargs):
index=False,
resource_creation=True,
transaction_id=transaction_id,
recalculate_descriptors=False,
context=context,
)
self.save_descriptors()

if index is True:
self.index(context)
Expand Down Expand Up @@ -675,7 +679,7 @@ def get_documents_to_index(

return document, terms

def delete(self, user={}, index=True, transaction_id=None):
def delete(self, user={}, index=True, transaction_id=None, fetch_relations=True):
"""
Deletes a single resource and any related indexed data

Expand Down Expand Up @@ -712,11 +716,15 @@ def delete(self, user={}, index=True, transaction_id=None):
permit_deletion = True

if permit_deletion is True:
for related_resource in models.ResourceXResource.objects.filter(
Q(from_resource_id=self.resourceinstanceid)
| Q(to_resource_id=self.resourceinstanceid)
):
related_resource.delete(deletedResourceId=self.resourceinstanceid)
if fetch_relations:
for related_resource in models.ResourceXResource.objects.filter(
Q(from_resource_id=self.resourceinstanceid)
| Q(to_resource_id=self.resourceinstanceid)
).select_related("tile"):
related_resource.delete(deletedResourceId=self.resourceinstanceid)
else:
for related_resource in self.fromrelations + self.torelations:
related_resource.delete(deletedResourceId=self.resourceinstanceid)

if index:
self.delete_index()
Expand Down
41 changes: 29 additions & 12 deletions arches/app/models/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ def datatype_post_save_actions(self, request=None):
def save(self, **kwargs):
request = kwargs.pop("request", None)
index = kwargs.pop("index", True)
recalculate_descriptors = kwargs.pop("recalculate_descriptors", True)
user = kwargs.pop("user", None)
new_resource_created = kwargs.pop("new_resource_created", False)
resource_creation = kwargs.pop("resource_creation", False)
Expand Down Expand Up @@ -544,16 +545,20 @@ def save(self, **kwargs):
tile.parenttile = self
tile.save(
request=request,
user=user,
resource_creation=resource_creation,
index=False,
recalculate_descriptors=recalculate_descriptors,
transaction_id=transaction_id,
**kwargs,
)

resource = Resource.objects.get(pk=self.resourceinstance_id)
resource.save_descriptors(context={"tile": self})

if index:
self.index(resource=resource)
if index or recalculate_descriptors:
resource = Resource.objects.get(pk=self.resourceinstance_id)
if recalculate_descriptors:
resource.save_descriptors(context={"tile": self})
if index:
self.index(resource=resource)

def populate_missing_nodes(self):
first_node = next(iter(self.data.items()), None)
Expand All @@ -567,13 +572,24 @@ def populate_missing_nodes(self):
def delete(self, *args, **kwargs):
se = SearchEngineFactory().create()
request = kwargs.pop("request", None)
user = kwargs.pop("user", None)
index = kwargs.pop("index", True)
recalculate_descriptors = kwargs.pop("recalculate_descriptors", True)
transaction_id = kwargs.pop("transaction_id", None)
provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None)
for tile in self.tiles:
tile.delete(*args, request=request, **kwargs)
tile.delete(
*args,
request=request,
user=user,
recalculate_descriptors=recalculate_descriptors,
index=index,
transaction_id=transaction_id,
**kwargs,
)
try:
user = request.user
if user is None and request is not None:
user = request.user
user_is_reviewer = user_is_resource_reviewer(user)
except AttributeError: # no user
user = None
Expand Down Expand Up @@ -615,11 +631,12 @@ def delete(self, *args, **kwargs):
datatype = self.datatype_factory.get_instance(node.datatype)
datatype.post_tile_delete(self, nodeid, index=index)

resource = Resource.objects.get(pk=self.resourceinstance_id)
resource.save_descriptors()

if index:
self.index(resource=resource)
if index or recalculate_descriptors:
resource = Resource.objects.get(pk=self.resourceinstance_id)
if recalculate_descriptors:
resource.save_descriptors()
if index:
self.index(resource=resource)
except IntegrityError as e:
logger.error(e)

Expand Down
Loading
Loading