@@ -40,7 +40,7 @@ def get_mrconso_iterator(path, headers):
4040
4141
4242def extract_from_mrconso (
43- mrconso_path , mrsty_kpath , opts ,
43+ mrconso_path , mrsty_path , opts ,
4444 mrconso_header = HEADERS_MRCONSO , mrsty_header = HEADERS_MRSTY ):
4545
4646 start = time .time ()
@@ -69,6 +69,7 @@ def extract_from_mrconso(
6969
7070 concept_text = content ['str' ].strip ()
7171 cui = content ['cui' ]
72+ preferred = 1 if content ['ispref' ] else 0
7273
7374 if opts .lowercase :
7475 concept_text = concept_text .lower ()
@@ -81,7 +82,7 @@ def extract_from_mrconso(
8182 else :
8283 processed .add ((cui , concept_text ))
8384
84- yield (concept_text , cui , sem_types [cui ])
85+ yield (concept_text , cui , sem_types [cui ], preferred )
8586
8687
8788def parse_and_encode_ngrams (extracted_it , simstring_dir , cuisty_dir ):
@@ -93,9 +94,9 @@ def parse_and_encode_ngrams(extracted_it, simstring_dir, cuisty_dir):
9394
9495 cuisty_db = CuiSemTypesDB (cuisty_dir )
9596
96- for i , (term , cui , stys ) in enumerate (extracted_it , start = 1 ):
97+ for i , (term , cui , stys , preferred ) in enumerate (extracted_it , start = 1 ):
9798 ss_db .insert (term )
98- cuisty_db .insert (term , cui , stys )
99+ cuisty_db .insert (term , cui , stys , preferred )
99100
100101
101102def driver (opts ):
0 commit comments