11import logging
22import subprocess as sp
3- import sqlite3
43
54from concurrent .futures import ThreadPoolExecutor
65from typing import Sequence , Tuple
76
87import bakta .config as cfg
98import bakta .constants as bc
109import bakta .features .orf as orf
10+ import bakta .utils as bu
1111
1212
1313############################################################################
@@ -96,25 +96,22 @@ def lookup(features: Sequence[dict], pseudo: bool = False):
9696 no_pscc_lookups = 0
9797 try :
9898 rec_futures = []
99- with sqlite3 .connect (f"file:{ cfg .db_path .joinpath ('bakta.db' )} ?mode=ro&nolock=1&cache=shared" , uri = True , check_same_thread = False ) as conn :
100- conn .execute ('PRAGMA omit_readlock;' )
101- conn .row_factory = sqlite3 .Row
102- with ThreadPoolExecutor (max_workers = max (10 , cfg .threads )) as tpe : # use min 10 threads for IO bound non-CPU lookups
103- for feature in features :
104- uniref50_id = None
105- if (pseudo ): # if pseudogene use pseudogene info
106- if ('psc' in feature [bc .PSEUDOGENE ]):
107- uniref50_id = feature [bc .PSEUDOGENE ]['psc' ].get (DB_PSCC_COL_UNIREF50 , None )
108- else :
109- if ('psc' in feature ):
110- uniref50_id = feature ['psc' ].get (DB_PSCC_COL_UNIREF50 , None )
111- elif ('pscc' in feature ):
112- uniref50_id = feature ['pscc' ].get (DB_PSCC_COL_UNIREF50 , None )
113- if (uniref50_id is not None ):
114- if (bc .DB_PREFIX_UNIREF_50 in uniref50_id ):
115- uniref50_id = uniref50_id [9 :] # remove 'UniRef50_' prefix
116- future = tpe .submit (fetch_db_pscc_result , conn , uniref50_id )
117- rec_futures .append ((feature , future ))
99+ with ThreadPoolExecutor (max_workers = max (10 , cfg .threads )) as tpe : # use min 10 threads for IO bound non-CPU lookups
100+ for feature in features :
101+ uniref50_id = None
102+ if (pseudo ): # if pseudogene use pseudogene info
103+ if ('psc' in feature [bc .PSEUDOGENE ]):
104+ uniref50_id = feature [bc .PSEUDOGENE ]['psc' ].get (DB_PSCC_COL_UNIREF50 , None )
105+ else :
106+ if ('psc' in feature ):
107+ uniref50_id = feature ['psc' ].get (DB_PSCC_COL_UNIREF50 , None )
108+ elif ('pscc' in feature ):
109+ uniref50_id = feature ['pscc' ].get (DB_PSCC_COL_UNIREF50 , None )
110+ if (uniref50_id is not None ):
111+ if (bc .DB_PREFIX_UNIREF_50 in uniref50_id ):
112+ uniref50_id = uniref50_id [9 :] # remove 'UniRef50_' prefix
113+ future = tpe .submit (fetch_db_pscc_result , uniref50_id )
114+ rec_futures .append ((feature , future ))
118115
119116 for (feature , future ) in rec_futures :
120117 rec = future .result ()
@@ -140,12 +137,13 @@ def lookup(features: Sequence[dict], pseudo: bool = False):
140137 log .info ('looked-up=%i' , no_pscc_lookups )
141138
142139
143- def fetch_db_pscc_result (conn : sqlite3 .Connection , uniref50_id : str ):
144- c = conn .cursor ()
145- c .execute ('select * from pscc where uniref50_id=?' , (uniref50_id ,))
146- rec = c .fetchone ()
147- c .close ()
148- return rec
140+ def fetch_db_pscc_result (uniref50_id : str ):
141+ with bu .get_db_connection () as conn :
142+ c = conn .cursor ()
143+ c .execute ('select * from pscc where uniref50_id=?' , (uniref50_id ,))
144+ rec = c .fetchone ()
145+ c .close ()
146+ return rec
149147
150148
151149def parse_annotation (rec ) -> dict :
0 commit comments