Skip to content

Commit 7514900

Browse files
committed
enable case insensitive homonym lookups
1 parent b4e93e3 commit 7514900

File tree

7 files changed

+70
-8
lines changed

7 files changed

+70
-8
lines changed

src/kotobuki/mapping_updater/cli.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@
3939
"try to find it through concepts with the same concept name. It is "
4040
"strongly recommended to index the concept_name column when using this.",
4141
)
42+
@click.option(
43+
"--case-insensitive-homonyms",
44+
is_flag=True,
45+
default=False,
46+
help="When searching for homonyms, do so in a case-insensitive way. "
47+
"It is strongly recommended to create a functional index on the "
48+
"concept table for this.",
49+
)
4250
@click.option(
4351
"-m",
4452
"--write-map-paths",
@@ -73,6 +81,7 @@ def _update_usagi_cli(
7381
schema: str,
7482
usagi_file: Path,
7583
allow_homonyms: bool,
84+
case_insensitive_homonyms: bool,
7685
write_map_paths: bool,
7786
inspect_only: bool,
7887
overwrite: bool,
@@ -87,6 +96,7 @@ def _update_usagi_cli(
8796
schema,
8897
usagi_file,
8998
allow_homonyms,
99+
case_insensitive_homonyms,
90100
write_map_paths,
91101
inspect_only,
92102
overwrite,

src/kotobuki/mapping_updater/db.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from collections.abc import Sequence
22

33
from omop_cdm.regular.cdm54 import Concept, ConceptRelationship
4-
from sqlalchemy import and_, select
4+
from sqlalchemy import and_, func, select
55
from sqlalchemy.orm import Session
66

77
from .relationship import (
@@ -65,8 +65,14 @@ def find_standard_concepts(
6565
return None
6666

6767

68-
def find_all_homonyms(concept_name: str, session: Session) -> Sequence[Concept]:
68+
def find_all_homonyms(
69+
concept_name: str, case_insensitive: bool, session: Session
70+
) -> Sequence[Concept]:
6971
"""Get Concept ORM objects that match the concept_name."""
72+
if case_insensitive:
73+
return session.scalars(
74+
select(Concept).where(func.lower(Concept.concept_name) == concept_name.lower())
75+
).all()
7076
return session.scalars(select(Concept).where(Concept.concept_name == concept_name)).all()
7177

7278

@@ -75,7 +81,9 @@ def find_suitable_homonym(
7581
) -> NewMap | None:
7682
"""Return first homonym concept that maps to a standard concept."""
7783
start_path = [MapLink(concept)]
78-
for h in homonyms:
84+
# Exclude the concept for which we are trying to find a mapping
85+
other_homonyms = [h for h in homonyms if h.concept_id != concept.concept_id]
86+
for h in other_homonyms:
7987
path = start_path.copy()
8088
path.append(MapLink(h, Relationship.HOMONYM))
8189
new_map = find_standard_concepts(h.concept_id, session, path)
@@ -84,11 +92,16 @@ def find_suitable_homonym(
8492
return None
8593

8694

87-
def find_new_mapping(concept: Concept, search_homonyms: bool, session: Session) -> NewMap | None:
95+
def find_new_mapping(
96+
concept: Concept,
97+
search_homonyms: bool,
98+
case_insensitive_homonyms: bool,
99+
session: Session,
100+
) -> NewMap | None:
88101
# Try to find standard concepts via concept relationships
89102
new_map = find_standard_concepts(concept.concept_id, session, [MapLink(concept)])
90103
# Alternatively via concepts with an identical name
91104
if search_homonyms and new_map is None:
92-
homonyms = find_all_homonyms(concept.concept_name, session)
105+
homonyms = find_all_homonyms(concept.concept_name, case_insensitive_homonyms, session)
93106
new_map = find_suitable_homonym(homonyms, session, concept)
94107
return new_map

src/kotobuki/mapping_updater/update_usagi.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def update_usagi_file(
3030
vocab_schema: str,
3131
usagi_file: Path,
3232
allow_homonyms: bool = False,
33+
case_insensitive_homonyms: bool = False,
3334
write_map_paths: bool = False,
3435
inspect_only: bool = False,
3536
overwrite: bool = False,
@@ -87,7 +88,7 @@ def update_usagi_file(
8788

8889
logger.info("Querying database for standard concepts...")
8990
for concept in non_standard:
90-
new_map = find_new_mapping(concept, allow_homonyms, session)
91+
new_map = find_new_mapping(concept, allow_homonyms, case_insensitive_homonyms, session)
9192
new_mappings[concept.concept_id] = new_map
9293
log_remapped_concepts(new_mappings)
9394

tests/python/mapping_updater/db_setup/vocab_data/CONCEPT.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ concept_id concept_name domain_id vocabulary_id concept_class_id standard_concep
1818
16 val2 0 0 0 S v2 2017-01-01 2099-12-31
1919
17 qwerty Condition 0 0 S qwerty 2017-01-01 2099-12-31
2020
18 azerty Condition 0 0 azerty 2017-01-01 2099-12-31
21+
19 Qzerty Condition 0 0 Qzerty 2017-01-01 2099-12-31
22+
20 Dvorak Condition 0 0 S Dvorak 2017-01-01 2099-12-31

tests/python/mapping_updater/db_setup/vocab_data/CONCEPT_RELATIONSHIP.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ concept_id_1 concept_id_2 relationship_id valid_start_date valid_end_date invali
1212
13 14 Maps to 1970-01-01 2099-12-31
1313
13 15 Maps to value 1970-01-01 2099-12-31
1414
13 16 Maps to value 1970-01-01 2099-12-31
15+
19 20 Maps to 2017-01-01 2099-12-31
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pytest
2+
from omop_cdm.regular.cdm54 import Concept
3+
from sqlalchemy import Engine
4+
5+
from kotobuki.mapping_updater.db import find_new_mapping
6+
from kotobuki.mapping_updater.relationship import MapLink, Relationship
7+
from tests.python.mapping_updater.test_usagi_mappings import get_new_map
8+
9+
pytestmark = pytest.mark.usefixtures("create_vocab_tables")
10+
11+
12+
def test_find_standard_concept_via_case_insensitive_homonym(pg_db_engine: Engine):
13+
"""
14+
The concept is non-standard and has a 'Maps to value' relationship to a
15+
standard concept.
16+
"""
17+
result = get_new_map(
18+
concept_id=19,
19+
engine=pg_db_engine,
20+
homonyms=True,
21+
case_insensitive_homonyms=True,
22+
)
23+
assert len(result.concepts) == 1
24+
assert result.concepts[0].concept_id == 20
25+
assert not result.value_as_concept

tests/python/mapping_updater/test_usagi_mappings.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,20 @@ def get_concept_by_id(session: Session, concept_id: int) -> Concept | None:
1313
return session.scalars(select(Concept).filter(Concept.concept_id == concept_id)).one_or_none()
1414

1515

16-
def get_new_map(concept_id: int, engine: Engine, homonyms: bool = False) -> NewMap | None:
16+
def get_new_map(
17+
concept_id: int,
18+
engine: Engine,
19+
homonyms: bool = False,
20+
case_insensitive_homonyms: bool = False,
21+
) -> NewMap | None:
1722
with Session(engine, expire_on_commit=False) as session, session.begin():
1823
concept = get_concept_by_id(session, concept_id=concept_id)
19-
return find_new_mapping(concept=concept, search_homonyms=homonyms, session=session)
24+
return find_new_mapping(
25+
concept=concept,
26+
search_homonyms=homonyms,
27+
case_insensitive_homonyms=case_insensitive_homonyms,
28+
session=session,
29+
)
2030

2131

2232
def test_no_map(pg_db_engine: Engine):

0 commit comments

Comments
 (0)