44
55import flask_restless
66import gunicorn .app .base
7- from dbcat import Catalog
7+ from dbcat import Catalog , init_db
88from dbcat .catalog import CatColumn
99from dbcat .catalog .db import DbScanner
1010from dbcat .catalog .models import (
@@ -66,23 +66,24 @@ def get(self):
6666 edges = []
6767
6868 args = self ._parser .parse_args ()
69- column_edges = self ._catalog .get_column_lineages (args ["job_ids" ])
70- for edge in column_edges :
71- nodes .append (self ._column_info (edge .source ))
72- nodes .append (self ._column_info (edge .target ))
73- nodes .append (self ._job_info (edge .job_execution .job ))
74- edges .append (
75- {
76- "source" : "column:{}" .format (edge .source_id ),
77- "target" : "task:{}" .format (edge .job_execution .job_id ),
78- }
79- )
80- edges .append (
81- {
82- "source" : "task:{}" .format (edge .job_execution .job_id ),
83- "target" : "column:{}" .format (edge .target_id ),
84- }
85- )
69+ with self ._catalog .managed_session :
70+ column_edges = self ._catalog .get_column_lineages (args ["job_ids" ])
71+ for edge in column_edges :
72+ nodes .append (self ._column_info (edge .source ))
73+ nodes .append (self ._column_info (edge .target ))
74+ nodes .append (self ._job_info (edge .job_execution .job ))
75+ edges .append (
76+ {
77+ "source" : "column:{}" .format (edge .source_id ),
78+ "target" : "task:{}" .format (edge .job_execution .job_id ),
79+ }
80+ )
81+ edges .append (
82+ {
83+ "source" : "task:{}" .format (edge .job_execution .job_id ),
84+ "target" : "column:{}" .format (edge .target_id ),
85+ }
86+ )
8687
8788 return {"nodes" : nodes , "edges" : edges }
8889
@@ -106,14 +107,12 @@ def __init__(self, catalog: Catalog):
106107 self ._parser .add_argument ("id" , required = True , help = "ID of the resource" )
107108
108109 def post (self ):
109- try :
110- args = self . _parser . parse_args ( )
111- logging . debug ( "Args for scanning: {}" . format ( args ))
110+ args = self . _parser . parse_args ()
111+ logging . debug ( "Args for scanning: {}" . format ( args ) )
112+ with self . _catalog . managed_session :
112113 source = self ._catalog .get_source_by_id (int (args ["id" ]))
113114 DbScanner (self ._catalog , source ).scan ()
114115 return "Scanned {}" .format (source .fqdn ), 200
115- finally :
116- self ._catalog .scoped_session .remove ()
117116
118117
119118class Parse (Resource ):
@@ -134,27 +133,26 @@ def post(self):
134133 raise ParseErrorHTTP (description = str (error ))
135134
136135 try :
137- source = self ._catalog .get_source_by_id (args ["source_id" ])
138- logging .debug ("Parsing query for source {}" .format (source ))
139- binder = parse_dml_query (
140- catalog = self ._catalog , parsed = parsed , source = source
141- )
142-
143- return (
144- {
145- "select_tables" : [table .name for table in binder .tables ],
146- "select_columns" : [context .alias for context in binder .columns ],
147- },
148- 200 ,
149- )
136+ with self ._catalog .managed_session :
137+ source = self ._catalog .get_source_by_id (args ["source_id" ])
138+ logging .debug ("Parsing query for source {}" .format (source ))
139+ binder = parse_dml_query (
140+ catalog = self ._catalog , parsed = parsed , source = source
141+ )
142+
143+ return (
144+ {
145+ "select_tables" : [table .name for table in binder .tables ],
146+ "select_columns" : [context .alias for context in binder .columns ],
147+ },
148+ 200 ,
149+ )
150150 except TableNotFound as table_error :
151151 raise TableNotFoundHTTP (description = str (table_error ))
152152 except ColumnNotFound as column_error :
153153 raise ColumnNotFoundHTTP (description = str (column_error ))
154154 except SemanticError as semantic_error :
155155 raise SemanticErrorHTTP (description = str (semantic_error ))
156- finally :
157- self ._catalog .scoped_session .remove ()
158156
159157
160158class Analyze (Resource ):
@@ -182,45 +180,44 @@ def post(self):
182180 raise ParseErrorHTTP (description = str (error ))
183181
184182 try :
185- source = self ._catalog .get_source_by_id (args ["source_id" ])
186- logging .debug ("Parsing query for source {}" .format (source ))
187- chosen_visitor = analyze_dml_query (self ._catalog , parsed , source )
188- job_execution = extract_lineage (
189- catalog = self ._catalog ,
190- visited_query = chosen_visitor ,
191- source = source ,
192- parsed = parsed ,
193- start_time = datetime .datetime .fromisoformat (args ["start_time" ]),
194- end_time = datetime .datetime .fromisoformat (args ["end_time" ]),
195- )
196-
197- return (
198- {
199- "data" : {
200- "id" : job_execution .id ,
201- "type" : "job_executions" ,
202- "attributes" : {
203- "job_id" : job_execution .job_id ,
204- "started_at" : job_execution .started_at .strftime (
205- "%Y-%m-%d %H:%M:%S"
206- ),
207- "ended_at" : job_execution .ended_at .strftime (
208- "%Y-%m-%d %H:%M:%S"
209- ),
210- "status" : job_execution .status .name ,
211- },
212- }
213- },
214- 200 ,
215- )
183+ with self ._catalog .managed_session :
184+ source = self ._catalog .get_source_by_id (args ["source_id" ])
185+ logging .debug ("Parsing query for source {}" .format (source ))
186+ chosen_visitor = analyze_dml_query (self ._catalog , parsed , source )
187+ job_execution = extract_lineage (
188+ catalog = self ._catalog ,
189+ visited_query = chosen_visitor ,
190+ source = source ,
191+ parsed = parsed ,
192+ start_time = datetime .datetime .fromisoformat (args ["start_time" ]),
193+ end_time = datetime .datetime .fromisoformat (args ["end_time" ]),
194+ )
195+
196+ return (
197+ {
198+ "data" : {
199+ "id" : job_execution .id ,
200+ "type" : "job_executions" ,
201+ "attributes" : {
202+ "job_id" : job_execution .job_id ,
203+ "started_at" : job_execution .started_at .strftime (
204+ "%Y-%m-%d %H:%M:%S"
205+ ),
206+ "ended_at" : job_execution .ended_at .strftime (
207+ "%Y-%m-%d %H:%M:%S"
208+ ),
209+ "status" : job_execution .status .name ,
210+ },
211+ }
212+ },
213+ 200 ,
214+ )
216215 except TableNotFound as table_error :
217216 raise TableNotFoundHTTP (description = str (table_error ))
218217 except ColumnNotFound as column_error :
219218 raise ColumnNotFoundHTTP (description = str (column_error ))
220219 except SemanticError as semantic_error :
221220 raise SemanticErrorHTTP (description = str (semantic_error ))
222- finally :
223- self ._catalog .scoped_session .remove ()
224221
225222
226223class Server (gunicorn .app .base .BaseApplication ):
@@ -289,6 +286,8 @@ def create_server(
289286 pool_pre_ping = True
290287 )
291288
289+ init_db (catalog )
290+
292291 restful_catalog = Catalog (
293292 ** catalog_options ,
294293 connect_args = {"application_name" : "data-lineage:restful" },
@@ -300,7 +299,7 @@ def create_server(
300299 # Create CRUD APIs
301300 methods = ["DELETE" , "GET" , "PATCH" , "POST" ]
302301 url_prefix = "/api/v1/catalog"
303- api_manager = flask_restless .APIManager (app , catalog .scoped_session )
302+ api_manager = flask_restless .APIManager (app , catalog .get_scoped_session () )
304303 api_manager .create_api (
305304 CatSource ,
306305 methods = methods ,
0 commit comments