1+ import logging
12import os
2- from sqlmodel import SQLModel , create_engine , Session
3+ from typing import Any , Dict , List , Tuple , Union
4+
35from sqlalchemy import text
4- from typing import List , Dict , Any , Union , Tuple
5- import logging
6+ from sqlmodel import Session , create_engine
67
78# Configure logging
89logger = logging .getLogger (__name__ )
910
1011# Database configuration - same as persist_taxonomy.py
1112DATABASE_URL = os .getenv (
1213 "DATABASE_URL" ,
13- "postgresql://u4axloluqibskgvdikuy:g2rXgpHSbztokCbFxSyR@bk8htvifqendwt1wlzat-postgresql.services.clever-cloud.com:7327/bk8htvifqendwt1wlzat"
14+ "postgresql://u4axloluqibskgvdikuy:g2rXgpHSbztokCbFxSyR@bk8htvifqendwt1wlzat-postgresql.services.clever-cloud.com:7327/bk8htvifqendwt1wlzat" ,
1415)
1516
1617# Create database engine - same pattern as persist_taxonomy.py
1718db_engine = create_engine (DATABASE_URL , pool_pre_ping = True )
1819
20+
1921class DatabaseClient :
2022 """SQL client for connecting to the main PostgreSQL database using SQLModel/SQLAlchemy"""
21-
23+
2224 def __init__ (self , engine = None ):
2325 """
2426 Initialize database client
25-
27+
2628 Args:
2729 engine: SQLAlchemy engine. If None, uses the default engine.
2830 """
2931 self .engine = engine or db_engine
30-
31- def execute_query (self , query : str , params : Union [Dict , Tuple , None ] = None ) -> List [Dict [str , Any ]]:
32+
33+ def execute_query (
34+ self , query : str , params : Union [Dict , Tuple , None ] = None
35+ ) -> List [Dict [str , Any ]]:
3236 """
3337 Execute a SQL query and return results as a list of dictionaries
34-
38+
3539 Args:
3640 query: SQL query string
3741 params: Query parameters (dict for named params, tuple for positional)
38-
42+
3943 Returns:
4044 List of dictionaries containing query results
4145 """
4246 try :
4347 with Session (self .engine ) as session :
4448 # Execute the query using SQLAlchemy text()
4549 result = session .execute (text (query ), params or {})
46-
50+
4751 # Convert results to list of dictionaries
4852 if result .returns_rows :
4953 columns = result .keys ()
@@ -53,11 +57,11 @@ def execute_query(self, query: str, params: Union[Dict, Tuple, None] = None) ->
5357 except Exception as e :
5458 logger .error (f"Error executing query: { e } " )
5559 raise
56-
60+
5761 def list_tables (self ) -> List [str ]:
5862 """
5963 List all tables in the database
60-
64+
6165 Returns:
6266 List of table names
6367 """
@@ -67,17 +71,17 @@ def list_tables(self) -> List[str]:
6771 WHERE table_schema = 'public'
6872 ORDER BY table_name;
6973 """
70-
74+
7175 result = self .execute_query (query )
72- return [row [' table_name' ] for row in result ]
76+ return [row [" table_name" ] for row in result ]
7377
7478 def get_table_info (self , table_name : str ) -> Dict [str , Any ]:
7579 """
7680 Get information about a table structure
77-
81+
7882 Args:
7983 table_name: Name of the table
80-
84+
8185 Returns:
8286 Dictionary containing table information
8387 """
@@ -91,29 +95,27 @@ def get_table_info(self, table_name: str) -> Dict[str, Any]:
9195 WHERE table_name = :table_name
9296 ORDER BY ordinal_position;
9397 """
94-
98+
9599 columns = self .execute_query (query , {"table_name" : table_name })
96-
100+
97101 # Get row count
98102 count_query = f"SELECT COUNT(*) as count FROM { table_name } "
99103 count_result = self .execute_query (count_query )
100- row_count = count_result [0 ]['count' ] if count_result else 0
101-
102- return {
103- 'table_name' : table_name ,
104- 'columns' : columns ,
105- 'row_count' : row_count
106- }
107-
108- def query_table (self , table_name : str , limit : int = 10 , offset : int = 0 ) -> List [Dict [str , Any ]]:
104+ row_count = count_result [0 ]["count" ] if count_result else 0
105+
106+ return {"table_name" : table_name , "columns" : columns , "row_count" : row_count }
107+
108+ def query_table (
109+ self , table_name : str , limit : int = 10 , offset : int = 0
110+ ) -> List [Dict [str , Any ]]:
109111 """
110112 Query any table in the database
111-
113+
112114 Args:
113115 table_name: Name of the table to query
114116 limit: Number of rows to return
115117 offset: Number of rows to skip
116-
118+
117119 Returns:
118120 List of dictionaries containing table data
119121 """
@@ -122,50 +124,59 @@ def query_table(self, table_name: str, limit: int = 10, offset: int = 0) -> List
122124 ORDER BY 1
123125 LIMIT :limit OFFSET :offset
124126 """
125-
127+
126128 return self .execute_query (query , {"limit" : limit , "offset" : offset })
127-
128- def search_table (self , table_name : str , search_term : str , limit : int = 10 ) -> List [Dict [str , Any ]]:
129+
130+ def search_table (
131+ self , table_name : str , search_term : str , limit : int = 10
132+ ) -> List [Dict [str , Any ]]:
129133 """
130134 Search any table by text content in string columns
131-
135+
132136 Args:
133137 table_name: Name of the table to search
134138 search_term: Text to search for
135139 limit: Number of results to return
136-
140+
137141 Returns:
138142 List of dictionaries containing matching data
139143 """
140144 # First get column info to find text columns
141145 table_info = self .get_table_info (table_name )
142- text_columns = [col ['column_name' ] for col in table_info ['columns' ]
143- if 'char' in col ['data_type' ].lower () or 'text' in col ['data_type' ].lower ()]
144-
146+ text_columns = [
147+ col ["column_name" ]
148+ for col in table_info ["columns" ]
149+ if "char" in col ["data_type" ].lower () or "text" in col ["data_type" ].lower ()
150+ ]
151+
145152 if not text_columns :
146153 logger .warning (f"No text columns found in table { table_name } " )
147154 return []
148-
155+
149156 # Build dynamic search query
150- search_conditions = " OR " .join ([f"{ col } ILIKE :search_pattern" for col in text_columns ])
157+ search_conditions = " OR " .join (
158+ [f"{ col } ILIKE :search_pattern" for col in text_columns ]
159+ )
151160 query = f"""
152161 SELECT * FROM { table_name }
153162 WHERE { search_conditions }
154163 ORDER BY 1
155164 LIMIT :limit
156165 """
157-
166+
158167 search_pattern = f"%{ search_term } %"
159168 return self .execute_query (query , {"search_pattern" : search_pattern , "limit" : limit })
160169
161- def query_policies_abstracts_all (self , limit : int = 10 , offset : int = 0 ) -> List [Dict [str , Any ]]:
170+ def query_policies_abstracts_all (
171+ self , limit : int = 10 , offset : int = 0
172+ ) -> List [Dict [str , Any ]]:
162173 """
163174 Query the policies_abstracts_all table
164-
175+
165176 Args:
166177 limit: Number of rows to return
167178 offset: Number of rows to skip
168-
179+
169180 Returns:
170181 List of dictionaries containing policy data
171182 """
@@ -174,13 +185,15 @@ def query_policies_abstracts_all(self, limit: int = 10, offset: int = 0) -> List
174185 ORDER BY openalex_id
175186 LIMIT :limit OFFSET :offset
176187 """
177-
178- return self .execute_query (query , {"limit" : limit , "offset" : offset })
179-
180- def save_extraction_results (self , openalex_id : str , extraction_data : Dict [str , Any ], conclusion : str ) -> bool :
188+
189+ return self .execute_query (query , {"limit" : limit , "offset" : offset })
190+
191+ def save_extraction_results (
192+ self , openalex_id : str , extraction_data : Dict [str , Any ], conclusion : str
193+ ) -> bool :
181194 """
182195 Save extraction results to the database
183-
196+
184197 Args:
185198 openalex_id: OpenAlex ID of the policy
186199 extraction_data: Extracted data from AI system
@@ -195,17 +208,17 @@ def save_extraction_results(self, openalex_id: str, extraction_data: Dict[str, A
195208 conclusion = EXCLUDED.conclusion,
196209 updated_at = CURRENT_TIMESTAMP
197210 """
198-
211+
199212 # Execute the insert query
200213 result = self .execute_query (
201- insert_query ,
214+ insert_query ,
202215 {
203216 "openalex_id" : openalex_id ,
204217 "conclusion" : conclusion ,
205218 "extraction_data" : extraction_data ,
206- }
219+ },
207220 )
208-
221+
209222 def create_policy_extractions_table (self ):
210223 """
211224 Create the policy_extractions table
@@ -220,4 +233,4 @@ def create_policy_extractions_table(self):
220233 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
221234 )
222235 """
223- self .execute_query (query )
236+ self .execute_query (query )
0 commit comments