Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6fec1b6
fix autonumbering locking
acwhite211 Sep 26, 2025
fe388a8
Merge branch 'main' into issue-6490-1
acwhite211 Oct 6, 2025
ccfed12
Lint code with ESLint and Prettier
acwhite211 Oct 6, 2025
04512f6
Merge branch 'main' into issue-6490-1
acwhite211 Oct 17, 2025
d612278
Lint code with ESLint and Prettier
acwhite211 Oct 17, 2025
e604dec
Merge branch 'main' into issue-6490-1
acwhite211 Oct 22, 2025
a95ad63
Lint code with ESLint and Prettier
acwhite211 Oct 22, 2025
494dcd1
change autonumbering locking naming key to use db_name and table_name
acwhite211 Oct 23, 2025
e222a02
try row level locking with django
acwhite211 Oct 23, 2025
f5160aa
Add missing autonumbering tables to django model
acwhite211 Oct 24, 2025
411ea24
implement lock_ge_range
acwhite211 Oct 24, 2025
f1dddc6
add db indexes to help autonumbering range locking
acwhite211 Oct 24, 2025
719de2f
Merge branch 'main' into issue-6490-1
acwhite211 Nov 3, 2025
bc815be
fix migration after merge
acwhite211 Nov 4, 2025
61ddb00
Merge branch 'main' into issue-6490-1
acwhite211 Nov 18, 2025
359c152
add PR comment links to lock_tables warning note
acwhite211 Nov 20, 2025
3c2437c
remove unneeded autonum tables
acwhite211 Nov 20, 2025
5effe3d
remove unused function get_tables_to_lock_strict
acwhite211 Nov 20, 2025
a47537e
add more composite indexes for autonumbering
acwhite211 Nov 25, 2025
a9a5ea8
fix migration issues
acwhite211 Dec 17, 2025
02ab2c3
Remove duplicate invoicenumber index
acwhite211 Dec 17, 2025
fb28674
Remove duplicate indexes from migration
acwhite211 Dec 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ const containsSystemTables = (queryFieldSpec: QueryFieldSpec) => {
return Boolean(baseIsBlocked || pathHasBlockedSystem);
};


const hasHierarchyBaseTable = (queryFieldSpec: QueryFieldSpec) =>
Object.keys(schema.domainLevelIds).includes(
queryFieldSpec.baseTable.name.toLowerCase() as 'collection'
Expand Down
30 changes: 30 additions & 0 deletions specifyweb/specify/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -5762,6 +5762,36 @@ class Migration(migrations.Migration):
'ordering': (),
},
),
migrations.CreateModel(
name='AutonumschColl',
fields=[
('collectionid', models.ForeignKey(db_column='CollectionID', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='specify.collection')),
],
options={
'db_table': 'autonumsch_coll',
'managed': False,
},
),
migrations.CreateModel(
name='AutonumschDiv',
fields=[
('divisionid', models.ForeignKey(db_column='DivisionID', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='specify.division')),
],
options={
'db_table': 'autonumsch_div',
'managed': False,
},
),
migrations.CreateModel(
name='AutonumschDsp',
fields=[
('disciplineid', models.ForeignKey(db_column='DisciplineID', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='specify.discipline')),
],
options={
'db_table': 'autonumsch_dsp',
'managed': False,
},
),
migrations.CreateModel(
name='Author',
fields=[
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(just a note/consideration)
Very nice, glad we're considering some more indices here! 🏆

I would love to see other indices (like those proposed in Add missing field indexes (#7482)) if the benefit of adding them would outweight the cost, although that could definitely be a future PR.

I will warn that indexes can result in a performance loss for write, insert, and delete operations, as each index on a table needs to be maintained (i.e., the B-Tree or similar data structure updated).

In the case of Specify for example, this could slow down (non-negligibly or perhaps even significantly, depending on the index data structure and data being inserted/updated) WorkBench and BatchEdit operations along with common DataEntry operations.

If this is a fine trade-off (such as the case were there will be overall significantly more reads than write and the index is well-structured for the intended optimization), I say shoot for the stars 🚀

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ya, sounds good. I'm looking through the issue, and creating a separate PR for create those specific indexes. The difference for this PR, is that the indexes are always on two values (scoping_field, autonumbering_field). So far we added these two indexes:

operations = [
    migrations.AddIndex(
        model_name='accession',
        index=models.Index(fields=['division_id', 'accessionnumber'], name='AccScopeAccessionsnumberIDX'),
    ),
    migrations.AddIndex(
        model_name='collectionobject',
        index=models.Index(fields=['collectionmemberid', 'catalognumber'], name='ColObjScopeCatalognumberIDX'),
    ),
]

For this PR, I'm going through all the proposed indexes and picking out the ones that are common for autonumbering. Let me know if there are any specific ones you want to include?

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from django.db import migrations, models

class Migration(migrations.Migration):

dependencies = [
('specify', '0041_add_missing_schema_after_reorganization'),
]

operations = [
migrations.AddIndex(
model_name='accession',
index=models.Index(fields=['division_id', 'accessionnumber'], name='AccScopeAccessionsnumberIDX'),
),
migrations.AddIndex(
model_name='collectionobject',
index=models.Index(fields=['collectionmemberid', 'catalognumber'], name='ColObjScopeCatalognumberIDX'),
),
]
32 changes: 30 additions & 2 deletions specifyweb/specify/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ class Meta:
ordering = ()
indexes = [
models.Index(fields=['accessionnumber'], name='AccessionNumberIDX'),
models.Index(fields=['dateaccessioned'], name='AccessionDateIDX')
models.Index(fields=['dateaccessioned'], name='AccessionDateIDX'),
models.Index(fields=['division_id', 'accessionnumber'], name='AccScopeAccessionsnumberIDX'), # composite index for autonumbering range/gap locks
]


Expand Down Expand Up @@ -751,6 +752,32 @@ class Meta:

save = partialmethod(custom_save)

class AutonumschColl(models.Model):
collectionid = models.ForeignKey('Collection', models.DO_NOTHING, db_column='CollectionID', primary_key=True)
autonumberingschemeid = models.ForeignKey(Autonumberingscheme, models.DO_NOTHING, db_column='AutoNumberingSchemeID')

class Meta:
managed = False
db_table = 'autonumsch_coll'
unique_together = (('collectionid', 'autonumberingschemeid'))

class AutonumschDiv(models.Model):
divisionid = models.ForeignKey('Division', models.DO_NOTHING, db_column='DivisionID', primary_key=True)
autonumberingschemeid = models.ForeignKey(Autonumberingscheme, models.DO_NOTHING, db_column='AutoNumberingSchemeID')
class Meta:
managed = False
db_table = 'autonumsch_div'
unique_together = (('divisionid', 'autonumberingschemeid'))


class AutonumschDsp(models.Model):
disciplineid = models.ForeignKey('Discipline', models.DO_NOTHING, db_column='DisciplineID', primary_key=True)
autonumberingschemeid = models.ForeignKey(Autonumberingscheme, models.DO_NOTHING, db_column='AutoNumberingSchemeID')
class Meta:
managed = False
db_table = 'autonumsch_dsp'
unique_together = (('disciplineid', 'autonumberingschemeid'))

class Borrow(models.Model):
specify_model = datamodel.get_table_strict('borrow')

Expand Down Expand Up @@ -1490,7 +1517,8 @@ class Meta:
models.Index(fields=['uniqueidentifier'], name='COUniqueIdentifierIDX'),
models.Index(fields=['altcatalognumber'], name='AltCatalogNumberIDX'),
models.Index(fields=['guid'], name='ColObjGuidIDX'),
models.Index(fields=['collectionmemberid'], name='COColMemIDX')
models.Index(fields=['collectionmemberid'], name='COColMemIDX'),
models.Index(fields=['collectionmemberid', 'catalognumber'], name='ColObjScopeCatalognumberIDX'), # composite index for autonumbering range/gap locks
]


Expand Down
54 changes: 52 additions & 2 deletions specifyweb/specify/models_utils/lock_tables.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from django.db import connection
from django.db import connection, transaction
from contextlib import contextmanager
import logging

logger = logging.getLogger(__name__)


@contextmanager
def lock_tables(*tables):
cursor = connection.cursor()
Expand All @@ -13,8 +12,59 @@ def lock_tables(*tables):
yield
else:
try:
# NOTE: Should not do within a transaction.atomic() block
cursor.execute('lock tables %s' %
' write, '.join(tables) + ' write')
yield
finally:
cursor.execute('unlock tables')

@contextmanager
def mysql_named_lock(lock_name: str, timeout: int = 10):
"""
Connection-scoped mutex using MySQL GET_LOCK/RELEASE_LOCK.
Safe to use inside transaction.atomic().
"""
if connection.vendor != "mysql":
yield
return

with connection.cursor() as cur:
cur.execute("SELECT GET_LOCK(%s, %s)", [lock_name, timeout])
got = cur.fetchone()[0]

if not got:
with connection.cursor() as cur:
cur.execute("SELECT IS_USED_LOCK(%s)", [lock_name])
locking_db_process_id = cur.fetchone()[0]
if locking_db_process_id is not None:
logger.warning(
"Failed to acquire lock %r. It is currently held by DB Process %d.",
lock_name,
locking_db_process_id
)
else:
logger.error(
"Failed to acquire lock %r after timeout, but IS_USED_LOCK() reports it is FREE. Check for internal MySQL/connection issues.",
lock_name
)
raise TimeoutError(f"Could not acquire MySQL named lock {lock_name!r}")

try:
yield
finally:
try:
with connection.cursor() as cur:
cur.execute("SELECT RELEASE_LOCK(%s)", [lock_name])
except Exception:
logger.info("Failed to release MySQL named lock %r", lock_name, exc_info=True)

def get_autonumbering_lock_name(db_name, table_name):
return f"autonumbering:{db_name.lower()}:{table_name.lower()}"

@contextmanager
def autonumbering_lock_table(db_name, table_name):
lock_name = get_autonumbering_lock_name(db_name, table_name)
with mysql_named_lock(lock_name):
yield

76 changes: 72 additions & 4 deletions specifyweb/specify/utils/autonumbering.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@


from .uiformatters import UIFormatter, get_uiformatters
from ..models_utils.lock_tables import lock_tables
from ..models_utils.lock_tables import mysql_named_lock, autonumbering_lock_table
import logging
from typing import List, Tuple, Set
from collections.abc import Sequence
from django.db import transaction
from django.apps import apps

from specifyweb.specify.utils.scoping import Scoping
from specifyweb.specify.datamodel import datamodel
from specifyweb.backend.businessrules.models import UniquenessRule, UniquenessRuleField

logger = logging.getLogger(__name__)

Expand All @@ -32,7 +35,9 @@ def autonumber_and_save(collection, user, obj) -> None:
obj.save()


def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None:
def do_autonumbering_old(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None:
# THis is the old implementation of autonumbering involving lock mysql table explicitly.
# Fall back to using this implementation if race-conditions are found.
logger.debug("autonumbering %s fields: %s", obj, fields)

# The autonumber action is prepared and thunked outside the locked table
Expand All @@ -43,12 +48,61 @@ def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[s
for formatter, vals in fields
]

with lock_tables(*get_tables_to_lock(collection, obj, [formatter.field_name for formatter, _ in fields])):
# Serialize the autonumbering critical section without table locks.
db_name = transaction.get_connection().alias
table_name = obj._meta.db_table
with autonumbering_lock_table(db_name, table_name):
for apply_autonumbering_to in thunks:
apply_autonumbering_to(obj)

obj.save()

def do_autonumbering(collection, obj, fields: list[tuple[UIFormatter, Sequence[str]]]) -> None:
logger.debug("autonumbering %s fields: %s", obj, fields)

# Prepare the thunks/queries (ok to prep outside transaction)
prepared = []
for formatter, vals in fields:
with_year = formatter.fillin_year(vals, None)
fieldname = formatter.field_name.lower()
prepared.append((formatter, fieldname, with_year))

db_name = transaction.get_connection().alias
table_name = obj._meta.db_table
with autonumbering_lock_table(db_name, table_name):
with transaction.atomic():
for formatter, fieldname, with_year in prepared:
# Build the exact queryset that limits by regex and scope
# Use django's select_for_update() to lock the current max row itself
qs_max = formatter._autonumber_queryset(collection, obj.__class__, fieldname, with_year)
biggest_obj = (qs_max
.select_for_update()
.order_by('-' + fieldname)
.first())

if biggest_obj is None:
filled_vals = formatter.fill_vals_no_prior(with_year)
setattr(obj, fieldname, ''.join(filled_vals))
continue

# Lock the range of all rows in-scope with field >= biggest_value
biggest_value = getattr(biggest_obj, fieldname)
formatter.lock_ge_range(
collection=collection,
model=obj.__class__,
fieldname=fieldname,
with_year=with_year,
biggest_value=biggest_value,
)

# Get next value off the locked biggest and assign
filled_vals = formatter.fill_vals_after(biggest_value)
setattr(obj, fieldname, ''.join(filled_vals))

# Save once after all fields are assigned
obj.save()

def verify_autonumbering(collection, obj, fields):
pass

def get_tables_to_lock(collection, obj, field_names) -> set[str]:
# TODO: Include the fix for https://github.com/specify/specify7/issues/4148
Expand Down Expand Up @@ -77,6 +131,20 @@ def get_tables_to_lock(collection, obj, field_names) -> set[str]:

return tables

def get_tables_to_lock_strict(obj) -> list[str]:
obj_table = obj._meta.db_table

needed = {obj_table}
needed |= {"autonumberingscheme", "autonumsch_coll"} # or _dsp / _div
needed |= {UniquenessRule._meta.db_table, UniquenessRuleField._meta.db_table}

Discipline = apps.get_model("specify", "Discipline")
needed.add(Discipline._meta.db_table)

if obj_table == "component":
needed.add("collectionobject")

return sorted(needed)

def get_tables_from_field_path(model: str, field_path: str) -> list[str]:
tables = []
Expand Down
29 changes: 28 additions & 1 deletion specifyweb/specify/utils/uiformatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from xml.sax.saxutils import quoteattr

from django.core.exceptions import ObjectDoesNotExist
from django.db import connection
from django.db import connection, transaction

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -206,6 +206,33 @@ def parser(table: Table, value: str) -> str:
return canonicalized
return parser

def lock_ge_range(
self,
collection,
model,
fieldname: str,
with_year: list[str],
biggest_value: str,
) -> None:
assert transaction.get_connection().in_atomic_block, \
"lock_ge_range() must be used inside transaction.atomic()"

# Apply the scope logic used by _autonumber_queryset
group_filter = get_autonumber_group_filter(model, collection, self.format_name)

# Filter by scope, then filter and lock the >= range
base_qs = model.objects.all()
scoped_qs = group_filter(base_qs)

# Lock rows needed for autonumbering with select_for_update().
# Avoid deadlocks in select_for_update() with nowait.
# Rely on the sql named lock for avoiding autonumbering race conditions.
ge_filter = {f"{fieldname}__gte": biggest_value}
qs = scoped_qs.filter(**ge_filter).select_for_update(nowait=True)

# Force evaluation so the locks are actually taken
list(qs.values_list("pk", flat=True))

class Field(NamedTuple):
size: int
value: str
Expand Down