Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
6ae876f
Add search on synonyms in the QB
CarolineDenis Jul 24, 2024
40d4916
Add search synonym param to build query def
CarolineDenis Jul 24, 2024
c147466
Add attribute
CarolineDenis Jul 31, 2024
8349014
Merge remote-tracking branch 'origin/production' into issue-752
CarolineDenis Sep 6, 2024
61d307c
Merge remote-tracking branch 'origin/production' into issue-752
CarolineDenis Dec 6, 2024
ef6d8b1
Merge remote-tracking branch 'origin/production' into issue-752
CarolineDenis Dec 19, 2024
beaa4f1
Merge branch 'main' into issue-752
acwhite211 Oct 24, 2025
3a29237
cleanup searchsynonymy merge on QB execution
acwhite211 Oct 24, 2025
f1ca8b3
create synonymize_taxon_query function
acwhite211 Oct 31, 2025
a053e5d
Merge branch 'main' into issue-752
acwhite211 Oct 31, 2025
222b8b5
remove unused function
acwhite211 Oct 31, 2025
8c952d2
add search_synonymy to get_simple_query
acwhite211 Nov 3, 2025
c1a9a0a
fix batch edit unit tests
acwhite211 Nov 3, 2025
8450777
add search_synonymy to TestExecute calls to qb execute function
acwhite211 Nov 3, 2025
fe6bcf0
fix sp_query sqlalchemy getter call to searchSynonymy
acwhite211 Nov 3, 2025
e88aa08
Merge branch 'main' into issue-752
acwhite211 Nov 3, 2025
4d90687
fix searchsynonymy in front-end test
acwhite211 Nov 3, 2025
62b85d0
add missing searchsynonymy in front-end unit test
acwhite211 Nov 3, 2025
5e2cfaf
add synonymize_by_expanding_accepted_taxon_query
acwhite211 Nov 13, 2025
daeeb8b
synonymize_tree_query on all tree types
acwhite211 Nov 13, 2025
d2a62b1
Merge branch 'main' into issue-752
acwhite211 Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions specifyweb/backend/stored_queries/batch_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ def run_batch_edit_query(props: BatchEditProps):
tableid=tableid,
distinct=True,
series=False,
search_synonymy=False,
count_only=False,
field_specs=query_with_hidden,
limit=limit,
Expand Down
13 changes: 13 additions & 0 deletions specifyweb/backend/stored_queries/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from specifyweb.backend.workbench.upload.auditlog import auditlog
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
from specifyweb.backend.stored_queries.queryfield import fields_from_json, QUREYFIELD_SORT_T
from specifyweb.backend.stored_queries.synonomy import synonymize_tree_query

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,6 +68,7 @@ class BuildQueryProps(NamedTuple):
formatauditobjs: bool = False
distinct: bool = False
series: bool = False
search_synonymy: bool = False
implicit_or: bool = True
formatter_props: ObjectFormatterProps = DefaultQueryFormatterProps()

Expand Down Expand Up @@ -586,6 +588,7 @@ def run_ephemeral_query(collection, user, spquery):
recordsetid = spquery.get("recordsetid", None)
distinct = spquery["selectdistinct"]
series = spquery.get('smushed', None)
search_synonymy = spquery['searchsynonymy']
tableid = spquery["contexttableid"]
count_only = spquery["countonly"]
format_audits = spquery.get("formatauditrecids", False)
Expand All @@ -599,6 +602,7 @@ def run_ephemeral_query(collection, user, spquery):
tableid=tableid,
distinct=distinct,
series=series,
search_synonymy=search_synonymy,
count_only=count_only,
field_specs=field_specs,
limit=limit,
Expand Down Expand Up @@ -787,6 +791,7 @@ def execute(
tableid,
distinct,
series,
search_synonymy,
count_only,
field_specs,
limit,
Expand All @@ -812,6 +817,7 @@ def execute(
formatauditobjs=formatauditobjs,
distinct=distinct,
series=series,
search_synonymy=search_synonymy,
formatter_props=formatter_props,
),
)
Expand Down Expand Up @@ -898,6 +904,8 @@ def build_query(
series = (only for CO) if True, group by all display fields.
Group catalog numbers that fall within the same range together.
Return all record IDs associated with a row.

search_synonymy = if True, search synonym nodes as well, and return all record IDs associated with parent node
"""
model = models.models_by_tableid[tableid]
id_field = model._id
Expand Down Expand Up @@ -1015,6 +1023,11 @@ def build_query(
query = group_by_displayed_fields(query, selected_fields, ignore_cat_num=True)
elif props.distinct:
query = group_by_displayed_fields(query, selected_fields)

if props.search_synonymy:
log_sqlalchemy_query(query.query)
synonymized_query = synonymize_tree_query(query.query, table)
query = query._replace(query=synonymized_query)

internal_predicate = query.get_internal_filters()
query = query.filter(internal_predicate)
Expand Down
6 changes: 6 additions & 0 deletions specifyweb/backend/stored_queries/query_construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ def build_join(self, table, model, join_path):
# To make things "simpler", it doesn't apply any filters, but returns a single predicate
# @model is an input parameter, because cannot guess if it is aliased or not (callers are supposed to know that)
def get_internal_filters(self):
# If nothing to filter on, return TRUE so .where(...) can run safely
if not self.internal_filters:
return sql.true()
# Avoid OR on a single element
if len(self.internal_filters) == 1:
return self.internal_filters[0]
return sql.or_(*self.internal_filters)

def add_proxy_method(name):
Expand Down
232 changes: 232 additions & 0 deletions specifyweb/backend/stored_queries/synonomy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
from typing import Optional, Tuple, List
from sqlalchemy import select, union
from sqlalchemy.sql import Select
from sqlalchemy.orm import Query
from sqlalchemy.sql.selectable import FromClause, Alias, Join
from sqlalchemy.sql.schema import Table
from specifyweb.specify.models_utils.load_datamodel import Table as SpecifyTable

def synonymize_tree_query(
query: Query,
table: "SpecifyTable",
expand_from_accepted: bool = True,
) -> Query:
"""
Expand a tree query (Taxon, Storage, Geography, TectonicUnit, Chronostratigraphy,
Lithostratigraphy) to include synonymy-related records.

expand_from_accepted = True (default)
- Start from the records that match the original predicate.
- Then include all synonyms whose AcceptedID points to those records.

Query Building Strategy:
target_taxon := (original FROM+JOINS + WHERE) projected as (id_col, AcceptedID)
root_ids := SELECT id_col FROM target_taxon
syn_ids := SELECT id_col FROM tree WHERE AcceptedID IN (root_ids)
ids := root_ids UNION syn_ids

expand_from_accepted = False
- Include records whose synonymized children match the original predicate.

Query Building Strategy:
target_taxon := (original FROM+JOINS + WHERE) projected as (id_col, AcceptedID)
ids := SELECT id_col UNION SELECT AcceptedID
FROM target_taxon (AcceptedID NOT NULL)

In both cases:
final := (original SELECT list) + (original FROM/JOINS but no WHERE)
+ WHERE tree.id_col IN (ids)
"""
base_sel: Select = query.statement if isinstance(query, Query) else query

tree_table_name = table.table
id_col_name = table.idColumn

# Find the tree base table and the specific FROM/alias used in the original query
taxon_table, taxon_from = _find_tree_table_and_from(base_sel, tree_table_name)
if taxon_table is None or taxon_from is None:
raise ValueError(
f"synonymize_tree_query: couldn't locate '{tree_table_name}' in the query FROMs."
)

# Build `target_taxon` CTE based on the given query
target_taxon_cte = _build_target_tree_cte(
base_sel,
taxon_from,
id_col_name=id_col_name,
cte_name="target_taxon",
)

if expand_from_accepted:
# root_ids: the records that actually matched the original predicate
root_ids = select(target_taxon_cte.c.TaxonID.label("id"))

# syn_ids: any record whose AcceptedID points at one of those root_ids
# Use the underlying tree table (not the alias) so we don't bring over the original WHERE.
syn_ids = select(taxon_table.c[id_col_name].label("id")).where(
taxon_table.c.AcceptedID.in_(
select(target_taxon_cte.c.TaxonID)
)
)

ids = union(root_ids, syn_ids).subquery("ids")

else:
# Subquery to get the relevant ids for synonymy: id_col and AcceptedID
ids = union(
select(target_taxon_cte.c.TaxonID.label("id")),
select(target_taxon_cte.c.AcceptedID.label("id")).where(
target_taxon_cte.c.AcceptedID.isnot(None)
),
).subquery("ids")

# Rebuild a fresh chainable ORM Query using these ids
return _rebuild_query_with_ids(
query=query,
base_sel=base_sel,
taxon_from=taxon_from,
ids_subquery=ids,
id_col_name=id_col_name,
)

def _build_target_tree_cte(
base_sel: Select,
taxon_from: FromClause,
id_col_name: str,
cte_name: str = "target_taxon",
):
"""
Given the original Select and the tree FromClause/alias used in it,
build a CTE that projects (id_col, AcceptedID) with all original
FROM / WHERE / GROUP BY / HAVING preserved.

The ID column is always labeled as "TaxonID" for downstream reuse,
even when the underlying table is not taxon.
"""
target_taxon = select(
taxon_from.c[id_col_name].label("TaxonID"),
taxon_from.c.AcceptedID.label("AcceptedID"),
)

# Re-attach the original FROM roots
for f in base_sel.get_final_froms():
target_taxon = target_taxon.select_from(f)

# Re-apply WHERE, GROUP BY, HAVING (but not ORDER BY)
for wc in getattr(base_sel, "_where_criteria", ()) or ():
target_taxon = target_taxon.where(wc)
for gb in getattr(base_sel, "_group_by_clauses", ()) or ():
target_taxon = target_taxon.group_by(gb)
if getattr(base_sel, "_having", None) is not None:
target_taxon = target_taxon.having(base_sel._having)

return target_taxon.cte(cte_name)

def _rebuild_query_with_ids(
query: Query,
base_sel: Select,
taxon_from: FromClause,
ids_subquery: FromClause,
id_col_name: str,
) -> Query:
"""
Take the original ORM Query + its underlying Select and rebuild a new,
chainable ORM Query:
- Same selected columns
- Same FROM (joins included)
- Same GROUP BY / HAVING / ORDER BY
- No original WHERE
- Adds WHERE tree.id_col_name IN (SELECT id FROM ids_subquery)
"""
sess = query.session
original_cols: List = list(base_sel.selected_columns)

new_query = sess.query(*original_cols)

# Attach the same FROM base tables as the original query;
# these already carry the join conditions.
for f in base_sel.get_final_froms():
new_query = new_query.select_from(f)

# Preserve GROUP BY / HAVING / ORDER BY from the original select, but not WHERE
for gb in getattr(base_sel, "_group_by_clauses", ()) or ():
new_query = new_query.group_by(gb)
if getattr(base_sel, "_having", None) is not None:
new_query = new_query.having(base_sel._having)
if getattr(base_sel, "_order_by_clauses", None):
new_query = new_query.order_by(*base_sel._order_by_clauses)

# Apply the expansion condition on the appropriate ID column
new_query = new_query.filter(
taxon_from.c[id_col_name].in_(select(ids_subquery.c.id))
)
return new_query

def _find_tree_table_and_from(
sel: Select,
tree_table_name: str = "taxon",
) -> Tuple[Optional[Table], Optional[FromClause]]:
"""
Find the underlying Table for the given tree table name
and the specific FromClause (table OR alias) used in `sel` for that table.

Works with: Table, Alias(Table), Join trees, Alias(Join(...)).
"""

target_name = tree_table_name.lower()

def is_taxon_table(t: Table) -> bool:
# Compare case-insensitively and handle schema-qualified names if any
try:
return t.name is not None and t.name.lower() == target_name
except Exception:
return False

def walk(fc: FromClause) -> Tuple[Optional[Table], Optional[FromClause]]:
# Plain Table
if isinstance(fc, Table) and is_taxon_table(fc):
return fc, fc

# Alias of something
el = getattr(fc, "element", None)
if isinstance(fc, Alias) and el is not None:
# Alias(Table)
if isinstance(el, Table) and is_taxon_table(el):
return el, fc
# Alias(Join/Selectable): recurse into the element
if isinstance(el, Join):
t, frm = walk(el)
if t is not None:
return t, frm

# Join: recurse both sides
if isinstance(fc, Join):
t, frm = walk(fc.left)
if t is not None:
return t, frm
t, frm = walk(fc.right)
if t is not None:
return t, frm

# Unknown / composite
return None, None

# Try walking all final FROM roots
for f in sel.get_final_froms():
t, frm = walk(f)
if t is not None and frm is not None:
return t, frm

# Fallback to scanning selected columns to deduce the alias
try:
for col in sel.selected_columns:
tbl = getattr(col, "table", None)
el = getattr(tbl, "element", None)
if isinstance(tbl, Table) and is_taxon_table(tbl):
return tbl, tbl
if isinstance(el, Table) and is_taxon_table(el):
return el, tbl # tbl is the alias here
except Exception:
pass

return None, None
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_simple_query(self):
table.tableId,
distinct=False,
series=False,
search_synonymy=False,
count_only=False,
field_specs=query_fields,
limit=0,
Expand Down Expand Up @@ -74,6 +75,7 @@ def test_simple_query_count(self):
table.tableId,
distinct=False,
series=False,
search_synonymy=False,
count_only=True,
field_specs=query_fields,
limit=0,
Expand All @@ -95,6 +97,7 @@ def test_simple_query_distinct(self):
table.tableId,
distinct=True,
series=False,
search_synonymy=False,
count_only=False,
field_specs=query_fields,
limit=0,
Expand Down Expand Up @@ -130,6 +133,7 @@ def test_simple_query_distinct_count(self):
table.tableId,
distinct=True,
series=False,
search_synonymy=False,
count_only=True,
field_specs=query_fields,
limit=0,
Expand Down Expand Up @@ -164,6 +168,7 @@ def test_simple_query_recordset_limit(self):
self.specifyuser,
table.tableId,
series=False,
search_synonymy=False,
count_only=False,
field_specs=query_fields,
limit=3,
Expand All @@ -179,6 +184,7 @@ def test_simple_query_recordset_limit(self):
table.tableId,
distinct=False,
series=False,
search_synonymy=False,
count_only=True,
field_specs=query_fields,
limit=3,
Expand Down Expand Up @@ -212,6 +218,7 @@ def test_simple_query_series(self):
table.tableId,
distinct=False,
series=True,
search_synonymy=False,
count_only=False,
field_specs=query_fields,
limit=0,
Expand All @@ -225,6 +232,7 @@ def test_simple_query_series(self):
table.tableId,
distinct=False,
series=True,
search_synonymy=False,
count_only=True,
field_specs=query_fields,
limit=0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"contexttableid": 1,
"selectdistinct": False,
"smushed": False,
"searchsynonymy": False,
"countonly": False,
"formatauditrecids": False,
"specifyuser": f"/api/specify/specifyuser/{specifyuser.id}/",
Expand Down
Loading