Skip to content

Commit 2fd928f

Browse files
committed
added support for preferred term
1 parent d540ee6 commit 2fd928f

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

quickumls.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def _get_all_matches(self, ngrams):
205205
for match in ngram_cands:
206206
cuisem_match = sorted(self.cuisem_db.get(match))
207207

208-
for cui, semtypes in cuisem_match:
208+
for cui, semtypes, preferred in cuisem_match:
209209
match_similarity = toolbox.get_similarity(
210210
x=ngram_normalized.lower(),
211211
y=match.lower(),
@@ -232,15 +232,16 @@ def _get_all_matches(self, ngrams):
232232
'term': toolbox.safe_unicode(match),
233233
'cui': cui,
234234
'similarity': match_similarity,
235-
'semtypes': semtypes
235+
'semtypes': semtypes,
236+
'preferred': preferred
236237
}
237238
)
238239

239240
if len(ngram_matches) > 0:
240241
matches.append(
241242
sorted(
242243
ngram_matches,
243-
key=lambda m: m['similarity'],
244+
key=lambda m: m['similarity'] + m['preferred'],
244245
reverse=True
245246
)
246247
)

toolbox.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def prepare_string_for_db_input(s):
6565

6666

6767
def make_ngrams(s, n):
68-
s = u'$${}$$'.format(safe_unicode(s))
68+
s = u'{t}{s}{t}'.format(s=safe_unicode(s), t=('$' * (n - 1)))
6969
return (s[i:i+n] for i in xrange3(len(s) - n + 1))
7070

7171

@@ -236,7 +236,7 @@ def has_term(self, term):
236236
except KeyError:
237237
return
238238

239-
def insert(self, term, cui, semtypes):
239+
def insert(self, term, cui, semtypes, is_preferred):
240240
term = prepare_string_for_db_input(safe_unicode(term))
241241
cui = prepare_string_for_db_input(safe_unicode(cui))
242242

@@ -247,7 +247,7 @@ def insert(self, term, cui, semtypes):
247247
except KeyError:
248248
cuis = set()
249249

250-
cuis.add(cui)
250+
cuis.add((cui, is_preferred))
251251
self.cui_db.Put(db_key_encode(term), pickle.dumps(cuis))
252252

253253
try:
@@ -262,7 +262,11 @@ def get(self, term):
262262

263263
cuis = pickle.loads(self.cui_db.Get(db_key_encode(term)))
264264
matches = (
265-
(cui, pickle.loads(self.semtypes_db.Get(db_key_encode(cui))))
266-
for cui in cuis
265+
(
266+
cui,
267+
pickle.loads(self.semtypes_db.Get(db_key_encode(cui))),
268+
is_preferred
269+
)
270+
for cui, is_preferred in cuis
267271
)
268272
return matches

0 commit comments

Comments
 (0)