Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
825ab46
Add a new treeDefault import feature
CarolineDenis Apr 18, 2025
069cd26
Remove import
CarolineDenis Apr 18, 2025
c22a999
init create_default_trees_view
acwhite211 May 27, 2025
8360e83
create_default_trees_view api change
acwhite211 May 29, 2025
33f73d3
add create_default_trees_view fixes
acwhite211 May 30, 2025
435e9ac
Add api to onClick fct
CarolineDenis May 30, 2025
bae30d6
init create_default_trees_task
acwhite211 Jun 12, 2025
a0f9f9e
create_default_trees_task fix
acwhite211 Jun 12, 2025
df5003c
bypass tree validation to speedup default tree upload
acwhite211 Jun 12, 2025
d4d2c5d
Merge branch 'main' into issue-6294
acwhite211 Jun 13, 2025
93617ac
Lint code with ESLint and Prettier
acwhite211 Jun 13, 2025
99d9de5
Merge branch 'main' into issue-6294
acwhite211 Jun 13, 2025
5fc1df6
Lint code with ESLint and Prettier
acwhite211 Jun 13, 2025
1b79f85
Merge branch 'main' into issue-6294
acwhite211 Jun 13, 2025
5b84226
Lint code with ESLint and Prettier
acwhite211 Jun 13, 2025
7401c57
Merge remote-tracking branch 'origin/main' into issue-6294
CarolineDenis Aug 19, 2025
9f9994f
test
CarolineDenis Aug 19, 2025
821053c
Lint code with ESLint and Prettier
CarolineDenis Aug 19, 2025
832b665
Merge remote-tracking branch 'origin/main' into issue-6294
CarolineDenis Sep 2, 2025
2be475e
Lint code with ESLint and Prettier
CarolineDenis Sep 2, 2025
441d301
Use frontend collection name
CarolineDenis Sep 3, 2025
ab79af9
Lint code with ESLint and Prettier
CarolineDenis Sep 3, 2025
2a31190
Merge branch 'main' into issue-6294
acwhite211 Oct 27, 2025
43b2c81
Merge branch 'main' into issue-6294
alesan99 Nov 10, 2025
05df6d9
Add notification formatting to frontend
alesan99 Nov 10, 2025
c141cc4
Update API schema and name
alesan99 Nov 13, 2025
d8cc3bf
WIP Poll for tree creation progress
alesan99 Nov 13, 2025
3e78902
Add label to progress bar
alesan99 Nov 13, 2025
efd5425
Fix task error handling
alesan99 Nov 17, 2025
f3e26b6
Fix continous progress polling after errors
alesan99 Nov 17, 2025
c5e55af
Generalize initial taxon tree creation code
alesan99 Nov 17, 2025
4da498d
Generalize add_default_taxon function
alesan99 Nov 24, 2025
366e9c5
WIP Add Geography and GeologicTimePeriod schemas
alesan99 Nov 25, 2025
317a0df
Merge branch 'main' into issue-6294
alesan99 Nov 26, 2025
f98d6ed
Fix geography and geologictimeperiod mapping
alesan99 Nov 26, 2025
5e718f1
Fix: Always get treedef by name
alesan99 Dec 1, 2025
58720bf
WIP retry failed rows
alesan99 Dec 1, 2025
5ac489f
Improve readability
alesan99 Dec 1, 2025
2b5ff77
WIP accept row_count and mapping_url
alesan99 Dec 1, 2025
e4122c5
Improve success notification
alesan99 Dec 2, 2025
269f1c0
Add localization
alesan99 Dec 2, 2025
85e43e5
Fetch tree mapping from url
alesan99 Dec 3, 2025
7271e8b
Lint code with ESLint and Prettier
alesan99 Dec 3, 2025
452a7e5
Remove non-background execution
alesan99 Dec 3, 2025
f9cea3f
Merge branch 'issue-6294' of https://github.com/specify/specify7 into…
alesan99 Dec 3, 2025
6d36cf3
Merge branch 'main' into issue-6294
alesan99 Dec 3, 2025
a817c4d
Fix provided colleciton name not being used
alesan99 Dec 9, 2025
43368ed
Add stop button
alesan99 Dec 17, 2025
c17ba87
Lint code with ESLint and Prettier
alesan99 Dec 17, 2025
96422c0
Fix task_id in notifications
alesan99 Dec 17, 2025
7736f98
Merge branch 'issue-6294' of https://github.com/specify/specify7 into…
alesan99 Dec 17, 2025
10a1df7
Lint code with ESLint and Prettier
alesan99 Dec 17, 2025
6b4a5a1
Use tree title as tree name
alesan99 Dec 22, 2025
4048cb2
Merge branch 'issue-6294' of https://github.com/specify/specify7 into…
alesan99 Dec 22, 2025
d2f110d
Merge branch 'main' into issue-6294
alesan99 Dec 22, 2025
2c74aed
Lint code with ESLint and Prettier
alesan99 Dec 22, 2025
d6acbcb
Add comments for lines_iter
alesan99 Dec 22, 2025
5329412
Merge branch 'issue-6294' of https://github.com/specify/specify7 into…
alesan99 Dec 22, 2025
ae1690e
Use tree mapping for rank count
alesan99 Dec 23, 2025
eeee9d4
Update tree rank debug logs
alesan99 Dec 23, 2025
4f4921a
Get rank by column name
alesan99 Dec 23, 2025
fbfc75e
Add default isenforced and isinfullname values
alesan99 Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions specifyweb/backend/trees/tree_mutations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

from specifyweb.backend.permissions.permissions import PermissionTarget, PermissionTargetAction


class TaxonMutationPT(PermissionTarget):
resource = "/tree/edit/taxon"
merge = PermissionTargetAction()
move = PermissionTargetAction()
synonymize = PermissionTargetAction()
desynonymize = PermissionTargetAction()
repair = PermissionTargetAction()


class GeographyMutationPT(PermissionTarget):
resource = "/tree/edit/geography"
merge = PermissionTargetAction()
move = PermissionTargetAction()
synonymize = PermissionTargetAction()
desynonymize = PermissionTargetAction()
repair = PermissionTargetAction()


class StorageMutationPT(PermissionTarget):
resource = "/tree/edit/storage"
merge = PermissionTargetAction()
move = PermissionTargetAction()
bulk_move = PermissionTargetAction()
synonymize = PermissionTargetAction()
desynonymize = PermissionTargetAction()
repair = PermissionTargetAction()


class GeologictimeperiodMutationPT(PermissionTarget):
resource = "/tree/edit/geologictimeperiod"
merge = PermissionTargetAction()
move = PermissionTargetAction()
synonymize = PermissionTargetAction()
desynonymize = PermissionTargetAction()
repair = PermissionTargetAction()


class LithostratMutationPT(PermissionTarget):
resource = "/tree/edit/lithostrat"
merge = PermissionTargetAction()
move = PermissionTargetAction()
synonymize = PermissionTargetAction()
desynonymize = PermissionTargetAction()
repair = PermissionTargetAction()

class TectonicunitMutationPT(PermissionTarget):
resource = "/tree/edit/tectonicunit"
merge = PermissionTargetAction()
move = PermissionTargetAction()
synonymize = PermissionTargetAction()
desynonymize = PermissionTargetAction()
repair = PermissionTargetAction()

def perm_target(tree):
return {
'taxon': TaxonMutationPT,
'geography': GeographyMutationPT,
'storage': StorageMutationPT,
'geologictimeperiod': GeologictimeperiodMutationPT,
'lithostrat': LithostratMutationPT,
'tectonicunit':TectonicunitMutationPT
}[tree]
5 changes: 5 additions & 0 deletions specifyweb/backend/trees/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@
re_path(r'^(?P<treedef>\d+)/(?P<parentid>\w+)/(?P<sortfield>\w+)/$', views.tree_view),
path('repair/', views.repair_tree),
])),

# Create new trees
path('create_default_tree/', views.create_default_tree_view),
re_path(r'^create_default_tree/status/(?P<task_id>[^/]+)/$', views.default_tree_upload_status),
re_path(r'^create_default_tree/abort/(?P<task_id>[^/]+)/$', views.abort_default_tree_creation),
]
272 changes: 270 additions & 2 deletions specifyweb/backend/trees/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
from typing import Tuple, List
from typing import Any, Callable, Dict, Iterator, Optional
import json
import requests
import csv
import time
from requests.exceptions import ChunkedEncodingError, ConnectionError

from django.db import transaction
from django.db.models import Q, Count, Model

from specifyweb.backend.notifications.models import Message
from specifyweb.celery_tasks import LogErrorsTask, app
import specifyweb.specify.models as spmodels
from specifyweb.specify.datamodel import datamodel, Table

import logging
logger = logging.getLogger(__name__)

lookup = lambda tree: (tree.lower() + 'treedef')

SPECIFY_TREES = {"taxon", "storage", "geography", "geologictimeperiod", "lithostrat", 'tectonicunit'}
Expand Down Expand Up @@ -84,4 +96,260 @@ def get_models(name: str):
tree_rank_model = get_treedefitem_model(name)
tree_node_model = getattr(spmodels, name.lower().title())

return tree_def_model, tree_rank_model, tree_node_model
return tree_def_model, tree_rank_model, tree_node_model

def initialize_default_tree(tree_type: str, discipline, tree_name: str, rank_names_lst: list):
"""Creates an initial empty tree."""
with transaction.atomic():
tree_def_model, tree_rank_model, tree_node_model = get_models(tree_type)

# Uniquify name
tree_def = None
unique_tree_name = tree_name
if tree_def_model.objects.filter(name=tree_name).exists():
i = 1
while tree_def_model.objects.filter(name=f"{tree_name}_{i}").exists():
i += 1
unique_tree_name = f"{tree_name}_{i}"

# Create tree definition
tree_def, _ = tree_def_model.objects.get_or_create(
name=unique_tree_name,
discipline=discipline
)

# Create tree ranks
treedefitems_bulk = []
rank_id = 0
for i, rank_name in enumerate(rank_names_lst):
treedefitems_bulk.append(
tree_rank_model(
treedef=tree_def,
name=rank_name,
title=rank_name.capitalize(), # TODO: allow rank name configuration
rankid=int(rank_id),
isenforced=(rank_id == 0),
isinfullname=(i >= len(rank_names_lst)-3)
)
)
rank_id += 10
if treedefitems_bulk:
tree_rank_model.objects.bulk_create(treedefitems_bulk, ignore_conflicts=False)

# Create root node
# TODO: Avoid having duplicated code from add_root endpoint
root_rank = tree_rank_model.objects.get(treedef=tree_def, rankid=0)
tree_node, _ = tree_node_model.objects.get_or_create(
name="Root",
fullname="Root",
nodenumber=1,
definition=tree_def,
definitionitem=root_rank,
parent=None
)

tree_name = tree_def.name
return tree_name

def add_default_tree_record(tree_type: str, discipline, row: dict, tree_name: str, tree_cfg: dict[str, Any]):
"""
Given one CSV row, the discipline, and a rank configuration dictionary,
walk through the 'ranks' in order, creating or updating each tree record and linking
it to its parent.
"""
tree_def_model, tree_rank_model, tree_node_model = get_models(tree_type)
tree_def = tree_def_model.objects.get(name=tree_name)
parent = tree_node_model.objects.get(name='Root', fullname='Root', definition=tree_def)
rank_id = 10

for rank_map in tree_cfg['ranks']:
rank = next(iter(rank_map))
fields_map = rank_map[rank]

value = row.get(rank)
if not value:
continue

defaults = {}
for csv_col, model_field in fields_map.items():
if csv_col == rank:
continue
v = row.get(csv_col)
if v:
defaults[model_field] = v

# Get the rank by the column name.
# It should already exist by this point, but worst case it will be generated here.
treedef_item, _ = tree_rank_model.objects.get_or_create(
name=rank,
treedef=tree_def,
defaults={
'title': rank.capitalize(),
'rankid': rank_id
}
)

obj = tree_node_model.objects.filter(
name=value,
fullname=value,
definition=tree_def,
definitionitem=treedef_item,
parent=parent,
).first()
if obj is None:
data = {
'name': value,
'fullname': value,
'definition': tree_def,
'definitionitem': treedef_item,
'parent': parent,
'rankid': treedef_item.rankid,
**defaults
}
obj = tree_node_model(**data)
obj.save(skip_tree_extras=True)

# if not taxon_obj and defaults:
# for f, v in defaults.items():
# setattr(taxon_obj, f, v)
# taxon_obj.save()

parent = obj
rank_id += 10

@app.task(base=LogErrorsTask, bind=True)
def create_default_tree_task(self, url: str, discipline_id: int, tree_discipline_name: str, specify_collection_id: int,
specify_user_id: int, tree_cfg: dict, row_count: Optional[int], tree_name: str):
logger.info(f'starting task {str(self.request.id)}')

specify_user = spmodels.Specifyuser.objects.get(id=specify_user_id)
discipline = spmodels.Discipline.objects.get(id=discipline_id)

Message.objects.create(
user=specify_user,
content=json.dumps({
'type': 'create-default-tree-running',
'name': tree_name,
'taskid': str(self.request.id),
'collection_id': specify_collection_id,
})
)

current = 0
total = 1
def progress(cur: int, additional_total: int=0) -> None:
nonlocal current, total
current += cur
total += additional_total
if current > total:
current = total
self.update_state(state='RUNNING', meta={'current': current, 'total': total})

def set_tree(name: str) -> None:
# Final tree name after being made unique.
nonlocal tree_name
tree_name = name

try:
tree_type = 'taxon'
if tree_discipline_name in SPECIFY_TREES:
# non-taxon tree
tree_type = tree_discipline_name

rank_count = len(tree_cfg['ranks'])

total_rows = 0
if row_count:
total_rows = row_count-2
progress(0, total_rows)
with transaction.atomic():
for row in stream_csv_from_url(url, discipline, rank_count, tree_type, tree_name, set_tree):
add_default_tree_record(tree_type, discipline, row, tree_name, tree_cfg)
progress(1, 0)
except Exception as e:
Message.objects.create(
user=specify_user,
content=json.dumps({
'type': 'create-default-tree-failed',
'name': tree_name,
'taskid': str(self.request.id),
'collection_id': specify_collection_id,
# 'error': str(e)
})
)
raise

Message.objects.create(
user=specify_user,
content=json.dumps({
'type': 'create-default-tree-completed',
'name': tree_name,
'taskid': str(self.request.id),
'collection_id': specify_collection_id,
})
)

def stream_csv_from_url(url: str, discipline, rank_count: int, tree_type: str, initial_tree_name: str, set_tree: Callable[[str], None]) -> Iterator[Dict[str, str]]:
"""
Streams a taxon CSV from a URL. Yields each row.
"""
chunk_size = 8192
max_retries = 5

def lines_iter() -> Iterator[str]:
# Streams data from the server in -chunks-, yields -lines-.
buffer = b""
bytes_downloaded = 0
retries = 0

headers = {}
while True:
# Request data starting from the last downloaded bytes
if bytes_downloaded > 0:
headers['Range'] = f'bytes={bytes_downloaded}-'

try:
with requests.get(url, stream=True, timeout=(5, 30), headers=headers) as resp:
resp.raise_for_status()
for chunk in resp.iter_content(chunk_size=chunk_size):
chunk_length = len(chunk)
if chunk_length == 0:
continue
buffer += chunk
bytes_downloaded += chunk_length

# Extract all lines from chunk
while True:
new_line_index = buffer.find(b'\n')
if new_line_index == -1: break
line = buffer[:new_line_index + 1] # extract line
buffer = buffer[new_line_index + 1 :] # clear read buffer
yield line.decode('utf-8-sig', errors='replace')

if buffer:
# yield last line
yield buffer.decode('utf-8-sig', errors='replace')
return
except (ChunkedEncodingError, ConnectionError) as e:
# Trigger retry
if retries < max_retries:
retries += 1
time.sleep(0.5*retries)
continue
raise
except Exception:
raise

reader = csv.DictReader(lines_iter())

rank_names_lst = reader.fieldnames[:rank_count]
rank_names_lst.insert(0, "Root") # Add Root rank

logger.debug(f"Creating default tree with the following {rank_count} ranks:")
logger.debug(rank_names_lst)

tree_name = initialize_default_tree(tree_type, discipline, initial_tree_name, rank_names_lst)
set_tree(tree_name)

for row in reader:
yield row
Loading