|
1 |
| -from typing import List, Dict, Tuple, NamedTuple, Type |
| 1 | +from typing import Optional, List, Dict, Tuple, NamedTuple, Type |
2 | 2 | import json
|
3 | 3 | import datetime
|
4 | 4 | from collections import defaultdict
|
@@ -41,38 +41,38 @@ class LinkerPaths(NamedTuple):
|
41 | 41 |
|
42 | 42 |
|
43 | 43 | UmlsLinkerPaths = LinkerPaths(
|
44 |
| - ann_index="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2020-10-09/umls/nmslib_index.bin", # noqa |
45 |
| - tfidf_vectorizer="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2020-10-09/umls/tfidf_vectorizer.joblib", # noqa |
46 |
| - tfidf_vectors="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2020-10-09/umls/tfidf_vectors_sparse.npz", # noqa |
47 |
| - concept_aliases_list="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2020-10-09/umls/concept_aliases.json", # noqa |
| 44 | + ann_index="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2023-04-23/umls/nmslib_index.bin", # noqa |
| 45 | + tfidf_vectorizer="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2023-04-23/umls/tfidf_vectorizer.joblib", # noqa |
| 46 | + tfidf_vectors="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2023-04-23/umls/tfidf_vectors_sparse.npz", # noqa |
| 47 | + concept_aliases_list="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/data/linkers/2023-04-23/umls/concept_aliases.json", # noqa |
48 | 48 | )
|
49 | 49 |
|
50 | 50 | MeshLinkerPaths = LinkerPaths(
|
51 |
| - ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/mesh/nmslib_index.bin", # noqa |
52 |
| - tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/mesh/tfidf_vectorizer.joblib", # noqa |
53 |
| - tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/mesh/tfidf_vectors_sparse.npz", # noqa |
54 |
| - concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/mesh/concept_aliases.json", # noqa |
| 51 | + ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/nmslib_index.bin", # noqa |
| 52 | + tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/tfidf_vectorizer.joblib", # noqa |
| 53 | + tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/tfidf_vectors_sparse.npz", # noqa |
| 54 | + concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/mesh/concept_aliases.json", # noqa |
55 | 55 | )
|
56 | 56 |
|
57 | 57 | GeneOntologyLinkerPaths = LinkerPaths(
|
58 |
| - ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/go/nmslib_index.bin", # noqa |
59 |
| - tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/go/tfidf_vectorizer.joblib", # noqa |
60 |
| - tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/go/tfidf_vectors_sparse.npz", # noqa |
61 |
| - concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/go/concept_aliases.json", # noqa |
| 58 | + ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/nmslib_index.bin", # noqa |
| 59 | + tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/tfidf_vectorizer.joblib", # noqa |
| 60 | + tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/tfidf_vectors_sparse.npz", # noqa |
| 61 | + concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/go/concept_aliases.json", # noqa |
62 | 62 | )
|
63 | 63 |
|
64 | 64 | HumanPhenotypeOntologyLinkerPaths = LinkerPaths(
|
65 |
| - ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/hpo/nmslib_index.bin", # noqa |
66 |
| - tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/hpo/tfidf_vectorizer.joblib", # noqa |
67 |
| - tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/hpo/tfidf_vectors_sparse.npz", # noqa |
68 |
| - concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/hpo/concept_aliases.json", # noqa |
| 65 | + ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/nmslib_index.bin", # noqa |
| 66 | + tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/tfidf_vectorizer.joblib", # noqa |
| 67 | + tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/tfidf_vectors_sparse.npz", # noqa |
| 68 | + concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/hpo/concept_aliases.json", # noqa |
69 | 69 | )
|
70 | 70 |
|
71 | 71 | RxNormLinkerPaths = LinkerPaths(
|
72 |
| - ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/rxnorm/nmslib_index.bin", # noqa |
73 |
| - tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/rxnorm/tfidf_vectorizer.joblib", # noqa |
74 |
| - tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/rxnorm/tfidf_vectors_sparse.npz", # noqa |
75 |
| - concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2020-10-09/rxnorm/concept_aliases.json", # noqa |
| 72 | + ann_index="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/nmslib_index.bin", # noqa |
| 73 | + tfidf_vectorizer="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/tfidf_vectorizer.joblib", # noqa |
| 74 | + tfidf_vectors="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/tfidf_vectors_sparse.npz", # noqa |
| 75 | + concept_aliases_list="https://ai2-s2-scispacy.s3-us-west-2.amazonaws.com/data/linkers/2023-04-23/rxnorm/concept_aliases.json", # noqa |
76 | 76 | )
|
77 | 77 |
|
78 | 78 |
|
@@ -196,15 +196,14 @@ class CandidateGenerator:
|
196 | 196 |
|
197 | 197 | def __init__(
|
198 | 198 | self,
|
199 |
| - ann_index: FloatIndex = None, |
200 |
| - tfidf_vectorizer: TfidfVectorizer = None, |
201 |
| - ann_concept_aliases_list: List[str] = None, |
202 |
| - kb: KnowledgeBase = None, |
| 199 | + ann_index: Optional[FloatIndex] = None, |
| 200 | + tfidf_vectorizer: Optional[TfidfVectorizer] = None, |
| 201 | + ann_concept_aliases_list: Optional[List[str]] = None, |
| 202 | + kb: Optional[KnowledgeBase] = None, |
203 | 203 | verbose: bool = False,
|
204 | 204 | ef_search: int = 200,
|
205 |
| - name: str = None, |
| 205 | + name: Optional[str] = None, |
206 | 206 | ) -> None:
|
207 |
| - |
208 | 207 | if name is not None and any(
|
209 | 208 | [ann_index, tfidf_vectorizer, ann_concept_aliases_list, kb]
|
210 | 209 | ):
|
@@ -363,7 +362,7 @@ def __call__(
|
363 | 362 |
|
364 | 363 |
|
365 | 364 | def create_tfidf_ann_index(
|
366 |
| - out_path: str, kb: KnowledgeBase = None |
| 365 | + out_path: str, kb: Optional[KnowledgeBase] = None |
367 | 366 | ) -> Tuple[List[str], TfidfVectorizer, FloatIndex]:
|
368 | 367 | """
|
369 | 368 | Build tfidf vectorizer and ann index.
|
|
0 commit comments