Skip to content

Commit 3a72a35

Browse files
authored
Update with new SeMRA API (#17)
1 parent c5a7373 commit 3a72a35

8 files changed

Lines changed: 17 additions & 160 deletions

File tree

lexica/anatomy/generate.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@
3333
GILDA_PATH = HERE.joinpath("terms.tsv.gz")
3434
SUMMARY_PATH = HERE.joinpath("summary.json")
3535

36-
ANATOMY_CONFIGURATION.add_mapping_caches(HERE)
37-
3836

3937
@click.command()
4038
def _main() -> None:

lexica/cell/generate.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
GILDA_PATH = HERE.joinpath("terms.tsv.gz")
3232
SUMMARY_PATH = HERE.joinpath("summary.json")
3333

34-
CELL_CONFIGURATION.add_mapping_caches(HERE)
35-
3634

3735
def _main() -> None:
3836
"""Generate a lexical index for cell resources."""

lexica/phenotype/generate.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
GILDA_PATH = HERE.joinpath("terms.tsv.gz")
3232
SUMMARY_PATH = HERE.joinpath("summary.json")
3333

34-
PHENOTYPE_CONFIGURATION.add_mapping_caches(HERE)
35-
3634

3735
def _main() -> None:
3836
"""Generate a lexical index for phenotype resources."""

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ dependencies = [
5959
"semra",
6060
"ssslm>=0.0.10",
6161
"bioregistry",
62-
"pyobo @ git+https://github.com/biopragmatics/pyobo",
62+
"pyobo",
6363
"bioontologies>=0.6.0",
6464
"biosynonyms",
6565
"pandas",

src/biolexica/api.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,6 @@ class Configuration(BaseModel):
5757
)
5858
mapping_configuration: semra.Configuration | None = None
5959

60-
def add_mapping_caches(self, directory: Path) -> None:
61-
"""Add a cache to the directory for raw, processed, and priority mappings."""
62-
if self.mapping_configuration is None:
63-
raise ValueError
64-
self.mapping_configuration.raw_pickle_path = directory.joinpath("mappings_raw.pkl.gz")
65-
self.mapping_configuration.processed_pickle_path = directory.joinpath(
66-
"mappings_processed.pkl.gz"
67-
)
68-
self.mapping_configuration.priority_pickle_path = directory.joinpath(
69-
"mappings_prioritized.pkl"
70-
)
71-
7260

7361
PREDEFINED: TypeAlias = Literal["cell", "anatomy", "phenotype", "obo"]
7462
URL_FMT = "https://github.com/biopragmatics/biolexica/raw/main/lexica/{key}/{key}.ssslm.tsv.gz"
@@ -146,9 +134,15 @@ def assemble_terms( # noqa:C901
146134
logger.info("Writing %d raw literal mappings to %s", len(terms), raw_path)
147135
ssslm.write_literal_mappings(terms, raw_path)
148136

149-
_mappings = []
150-
if configuration.mapping_configuration:
151-
_mappings.extend(configuration.mapping_configuration.get_mappings())
137+
_mappings: list[semra.Mapping] = []
138+
if configuration.mapping_configuration is not None:
139+
from semra.pipeline import AssembleReturnType
140+
141+
_mappings.extend(
142+
configuration.mapping_configuration.get_mappings(
143+
return_type=AssembleReturnType.priority
144+
)
145+
)
152146
if mappings is not None:
153147
_mappings.extend(mappings)
154148

@@ -158,7 +152,7 @@ def assemble_terms( # noqa:C901
158152
assert_projection(_mappings)
159153
terms = ssslm.remap_literal_mappings(
160154
literal_mappings=terms,
161-
mappings=[(mapping.s, mapping.o) for mapping in _mappings],
155+
mappings=[(mapping.subject, mapping.object) for mapping in _mappings],
162156
)
163157

164158
if configuration.excludes:

src/biolexica/configs/anatomy.py

Lines changed: 2 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,12 @@
11
"""Configuration for an anatomy lexical index."""
22

3-
import semra
3+
import semra.landscape.anatomy
44
from pyobo.sources.mesh import get_mesh_category_curies
55

66
import biolexica
77

88
__all__ = ["ANATOMY_CONFIGURATION"]
99

10-
PRIORITY = [
11-
"uberon",
12-
"mesh",
13-
"bto",
14-
"caro",
15-
"ncit",
16-
"umls",
17-
]
18-
19-
SEMRA_CONFIG = semra.Configuration(
20-
name="Anatomy mappings",
21-
inputs=[
22-
semra.Input(source="biomappings"),
23-
semra.Input(source="gilda"),
24-
semra.Input(prefix="uberon", source="pyobo", confidence=0.99),
25-
semra.Input(prefix="bto", source="pyobo", confidence=0.99),
26-
semra.Input(prefix="caro", source="pyobo", confidence=0.99),
27-
semra.Input(prefix="mesh", source="pyobo", confidence=0.99),
28-
semra.Input(prefix="ncit", source="pyobo", confidence=0.99),
29-
semra.Input(prefix="umls", source="pyobo", confidence=0.99),
30-
],
31-
add_labels=False,
32-
priority=PRIORITY,
33-
keep_prefixes=PRIORITY,
34-
remove_imprecise=False,
35-
mutations=[
36-
semra.Mutation(source="uberon", confidence=0.8),
37-
semra.Mutation(source="bto", confidence=0.65),
38-
semra.Mutation(source="caro", confidence=0.8),
39-
semra.Mutation(source="ncit", confidence=0.7),
40-
semra.Mutation(source="umls", confidence=0.7),
41-
],
42-
)
43-
4410
ANATOMY_CONFIGURATION = biolexica.Configuration(
4511
inputs=[
4612
biolexica.Input(source="uberon", processor="pyobo"),
@@ -63,5 +29,5 @@
6329
source="umls", processor="pyobo", ancestors=["umls:C0700276", "umls:C1515976"]
6430
),
6531
],
66-
mapping_configuration=SEMRA_CONFIG,
32+
mapping_configuration=semra.landscape.anatomy.ANATOMY_CONFIGURATION,
6733
)

src/biolexica/configs/cell.py

Lines changed: 2 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,12 @@
11
"""Configuration for a cell lexical index."""
22

33
import semra
4+
import semra.landscape.cell
45

56
import biolexica
67

78
__all__ = ["CELL_CONFIGURATION"]
89

9-
10-
PRIORITY = [
11-
"cl",
12-
"cellosaurus",
13-
"bto",
14-
"clo",
15-
"efo",
16-
"mesh",
17-
"ccle",
18-
"depmap",
19-
]
20-
21-
22-
SEMRA_CONFIG = semra.Configuration(
23-
name="Cell and Cell Line Mappings",
24-
description="Originally a reproduction of the EFO/Cellosaurus/DepMap/CCLE scenario "
25-
"posed in the Biomappings paper, this configuration imports several different cell and "
26-
"cell line resources and identifies mappings between them.",
27-
inputs=[
28-
semra.Input(source="biomappings"),
29-
semra.Input(source="gilda"),
30-
semra.Input(prefix="cellosaurus", source="pyobo", confidence=0.99),
31-
semra.Input(prefix="bto", source="bioontologies", confidence=0.99),
32-
semra.Input(prefix="cl", source="bioontologies", confidence=0.99),
33-
semra.Input(prefix="clo", source="custom", confidence=0.99),
34-
semra.Input(prefix="efo", source="pyobo", confidence=0.99),
35-
semra.Input(
36-
prefix="depmap",
37-
source="pyobo",
38-
confidence=0.99,
39-
extras={"version": "22Q4", "standardize": True, "license": "CC-BY-4.0"},
40-
),
41-
semra.Input(
42-
prefix="ccle",
43-
source="pyobo",
44-
confidence=0.99,
45-
extras={"version": "2019"},
46-
),
47-
semra.Input(prefix="ncit", source="pyobo", confidence=0.99),
48-
semra.Input(prefix="umls", source="pyobo", confidence=0.99),
49-
],
50-
add_labels=False,
51-
priority=PRIORITY,
52-
keep_prefixes=PRIORITY,
53-
remove_imprecise=False,
54-
mutations=[
55-
semra.Mutation(source="efo", confidence=0.7),
56-
semra.Mutation(source="bto", confidence=0.7),
57-
semra.Mutation(source="cl", confidence=0.7),
58-
semra.Mutation(source="clo", confidence=0.7),
59-
semra.Mutation(source="depmap", confidence=0.7),
60-
semra.Mutation(source="ccle", confidence=0.7),
61-
semra.Mutation(source="cellosaurus", confidence=0.7),
62-
semra.Mutation(source="ncit", confidence=0.7),
63-
semra.Mutation(source="umls", confidence=0.7),
64-
],
65-
)
66-
6710
CELL_CONFIGURATION = biolexica.Configuration(
6811
inputs=[
6912
biolexica.Input(
@@ -87,5 +30,5 @@
8730
],
8831
),
8932
],
90-
mapping_configuration=SEMRA_CONFIG,
33+
mapping_configuration=semra.landscape.cell.CELL_CONFIGURATION,
9134
)

src/biolexica/configs/phenotype.py

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,12 @@
11
"""Configuration for a phenotype lexical index."""
22

3-
import semra
3+
import semra.landscape.disease
44
from pyobo.sources.mesh import get_mesh_category_curies
55

66
import biolexica
77

88
__all__ = ["PHENOTYPE_CONFIGURATION"]
99

10-
PRIORITY = [
11-
"doid",
12-
"mondo",
13-
"hp",
14-
"symp",
15-
"mesh",
16-
"efo",
17-
]
18-
19-
SEMRA_CONFIG = semra.Configuration(
20-
name="Cell and Cell Line Mappings",
21-
description="Originally a reproduction of the EFO/Cellosaurus/DepMap/CCLE scenario "
22-
"posed in the Biomappings paper, this configuration imports several different cell and "
23-
"cell line resources and identifies mappings between them.",
24-
inputs=[
25-
semra.Input(source="biomappings"),
26-
semra.Input(source="gilda"),
27-
semra.Input(prefix="doid", source="pyobo", confidence=0.99),
28-
semra.Input(prefix="mondo", source="pyobo", confidence=0.99),
29-
semra.Input(prefix="hp", source="pyobo", confidence=0.99),
30-
semra.Input(prefix="symp", source="pyobo", confidence=0.99),
31-
semra.Input(prefix="mesh", source="pyobo", confidence=0.99),
32-
semra.Input(prefix="efo", source="pyobo", confidence=0.99),
33-
semra.Input(prefix="umls", source="pyobo", confidence=0.99),
34-
semra.Input(prefix="ncit", source="pyobo", confidence=0.99),
35-
],
36-
add_labels=False,
37-
priority=PRIORITY,
38-
keep_prefixes=PRIORITY,
39-
remove_imprecise=False,
40-
mutations=[
41-
semra.Mutation(source="doid", confidence=0.7),
42-
semra.Mutation(source="mondo", confidence=0.7),
43-
semra.Mutation(source="hp", confidence=0.7),
44-
semra.Mutation(source="symp", confidence=0.7),
45-
semra.Mutation(source="umls", confidence=0.7),
46-
semra.Mutation(source="ncit", confidence=0.7),
47-
],
48-
)
49-
5010
PHENOTYPE_CONFIGURATION = biolexica.Configuration(
5111
inputs=[
5212
biolexica.Input(source="doid", processor="pyobo"),
@@ -67,5 +27,5 @@
6727
biolexica.Input(source="umls", processor="pyobo", ancestors=["umls:C0012634"]),
6828
],
6929
excludes=["doid:4"],
70-
mapping_configuration=SEMRA_CONFIG,
30+
mapping_configuration=semra.landscape.disease.DISEASE_CONFIGURATION,
7131
)

0 commit comments

Comments
 (0)