Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.

Commit fa63423

Browse files
committed
feature: Update dbcat version to 0.6.1
This dependency update provide sql migrations using Alembic as well as managed sessions to eliminate chances of leaked database connections. Other minor improvements: * Remove idea config files and setup.py * Fix path in README * Update version 0.8.0 * Fix code in example.py to analyze queries. Wrong API was used.
1 parent 070557b commit fa63423

20 files changed

Lines changed: 297 additions & 382 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
66
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
77

8+
.idea
9+
810
# User-specific stuff
911
.idea/**/workspace.xml
1012
.idea/**/tasks.xml

.idea/.gitignore

Lines changed: 0 additions & 2 deletions
This file was deleted.

.idea/data-lineage.iml

Lines changed: 0 additions & 18 deletions
This file was deleted.

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 0 additions & 6 deletions
This file was deleted.

.idea/misc.xml

Lines changed: 0 additions & 7 deletions
This file was deleted.

.idea/modules.xml

Lines changed: 0 additions & 8 deletions
This file was deleted.

.idea/vcs.xml

Lines changed: 0 additions & 6 deletions
This file was deleted.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Download the docker-compose file from Github repository.
3535
# in a new directory run
3636
wget https://raw.githubusercontent.com/tokern/data-lineage/master/install-manifests/docker-compose/catalog-demo.yml
3737
# or run
38-
curl https://raw.githubusercontent.com/tokern/data-lineage/master/install-manifests/docker-compose/catalog-demo.yml -o docker-compose.yml
38+
curl https://raw.githubusercontent.com/tokern/data-lineage/master/install-manifests/docker-compose/tokern-lineage-engine.yml -o docker-compose.yml
3939

4040

4141
Run docker-compose

data_lineage/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# flake8: noqa
2-
__version__ = "0.7.8"
2+
__version__ = "0.8.0"
33

44
import datetime
55
import json

data_lineage/server.py

Lines changed: 71 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import flask_restless
66
import gunicorn.app.base
7-
from dbcat import Catalog
7+
from dbcat import Catalog, init_db
88
from dbcat.catalog import CatColumn
99
from dbcat.catalog.db import DbScanner
1010
from dbcat.catalog.models import (
@@ -66,23 +66,24 @@ def get(self):
6666
edges = []
6767

6868
args = self._parser.parse_args()
69-
column_edges = self._catalog.get_column_lineages(args["job_ids"])
70-
for edge in column_edges:
71-
nodes.append(self._column_info(edge.source))
72-
nodes.append(self._column_info(edge.target))
73-
nodes.append(self._job_info(edge.job_execution.job))
74-
edges.append(
75-
{
76-
"source": "column:{}".format(edge.source_id),
77-
"target": "task:{}".format(edge.job_execution.job_id),
78-
}
79-
)
80-
edges.append(
81-
{
82-
"source": "task:{}".format(edge.job_execution.job_id),
83-
"target": "column:{}".format(edge.target_id),
84-
}
85-
)
69+
with self._catalog.managed_session:
70+
column_edges = self._catalog.get_column_lineages(args["job_ids"])
71+
for edge in column_edges:
72+
nodes.append(self._column_info(edge.source))
73+
nodes.append(self._column_info(edge.target))
74+
nodes.append(self._job_info(edge.job_execution.job))
75+
edges.append(
76+
{
77+
"source": "column:{}".format(edge.source_id),
78+
"target": "task:{}".format(edge.job_execution.job_id),
79+
}
80+
)
81+
edges.append(
82+
{
83+
"source": "task:{}".format(edge.job_execution.job_id),
84+
"target": "column:{}".format(edge.target_id),
85+
}
86+
)
8687

8788
return {"nodes": nodes, "edges": edges}
8889

@@ -106,14 +107,12 @@ def __init__(self, catalog: Catalog):
106107
self._parser.add_argument("id", required=True, help="ID of the resource")
107108

108109
def post(self):
109-
try:
110-
args = self._parser.parse_args()
111-
logging.debug("Args for scanning: {}".format(args))
110+
args = self._parser.parse_args()
111+
logging.debug("Args for scanning: {}".format(args))
112+
with self._catalog.managed_session:
112113
source = self._catalog.get_source_by_id(int(args["id"]))
113114
DbScanner(self._catalog, source).scan()
114115
return "Scanned {}".format(source.fqdn), 200
115-
finally:
116-
self._catalog.scoped_session.remove()
117116

118117

119118
class Parse(Resource):
@@ -134,27 +133,26 @@ def post(self):
134133
raise ParseErrorHTTP(description=str(error))
135134

136135
try:
137-
source = self._catalog.get_source_by_id(args["source_id"])
138-
logging.debug("Parsing query for source {}".format(source))
139-
binder = parse_dml_query(
140-
catalog=self._catalog, parsed=parsed, source=source
141-
)
142-
143-
return (
144-
{
145-
"select_tables": [table.name for table in binder.tables],
146-
"select_columns": [context.alias for context in binder.columns],
147-
},
148-
200,
149-
)
136+
with self._catalog.managed_session:
137+
source = self._catalog.get_source_by_id(args["source_id"])
138+
logging.debug("Parsing query for source {}".format(source))
139+
binder = parse_dml_query(
140+
catalog=self._catalog, parsed=parsed, source=source
141+
)
142+
143+
return (
144+
{
145+
"select_tables": [table.name for table in binder.tables],
146+
"select_columns": [context.alias for context in binder.columns],
147+
},
148+
200,
149+
)
150150
except TableNotFound as table_error:
151151
raise TableNotFoundHTTP(description=str(table_error))
152152
except ColumnNotFound as column_error:
153153
raise ColumnNotFoundHTTP(description=str(column_error))
154154
except SemanticError as semantic_error:
155155
raise SemanticErrorHTTP(description=str(semantic_error))
156-
finally:
157-
self._catalog.scoped_session.remove()
158156

159157

160158
class Analyze(Resource):
@@ -182,45 +180,44 @@ def post(self):
182180
raise ParseErrorHTTP(description=str(error))
183181

184182
try:
185-
source = self._catalog.get_source_by_id(args["source_id"])
186-
logging.debug("Parsing query for source {}".format(source))
187-
chosen_visitor = analyze_dml_query(self._catalog, parsed, source)
188-
job_execution = extract_lineage(
189-
catalog=self._catalog,
190-
visited_query=chosen_visitor,
191-
source=source,
192-
parsed=parsed,
193-
start_time=datetime.datetime.fromisoformat(args["start_time"]),
194-
end_time=datetime.datetime.fromisoformat(args["end_time"]),
195-
)
196-
197-
return (
198-
{
199-
"data": {
200-
"id": job_execution.id,
201-
"type": "job_executions",
202-
"attributes": {
203-
"job_id": job_execution.job_id,
204-
"started_at": job_execution.started_at.strftime(
205-
"%Y-%m-%d %H:%M:%S"
206-
),
207-
"ended_at": job_execution.ended_at.strftime(
208-
"%Y-%m-%d %H:%M:%S"
209-
),
210-
"status": job_execution.status.name,
211-
},
212-
}
213-
},
214-
200,
215-
)
183+
with self._catalog.managed_session:
184+
source = self._catalog.get_source_by_id(args["source_id"])
185+
logging.debug("Parsing query for source {}".format(source))
186+
chosen_visitor = analyze_dml_query(self._catalog, parsed, source)
187+
job_execution = extract_lineage(
188+
catalog=self._catalog,
189+
visited_query=chosen_visitor,
190+
source=source,
191+
parsed=parsed,
192+
start_time=datetime.datetime.fromisoformat(args["start_time"]),
193+
end_time=datetime.datetime.fromisoformat(args["end_time"]),
194+
)
195+
196+
return (
197+
{
198+
"data": {
199+
"id": job_execution.id,
200+
"type": "job_executions",
201+
"attributes": {
202+
"job_id": job_execution.job_id,
203+
"started_at": job_execution.started_at.strftime(
204+
"%Y-%m-%d %H:%M:%S"
205+
),
206+
"ended_at": job_execution.ended_at.strftime(
207+
"%Y-%m-%d %H:%M:%S"
208+
),
209+
"status": job_execution.status.name,
210+
},
211+
}
212+
},
213+
200,
214+
)
216215
except TableNotFound as table_error:
217216
raise TableNotFoundHTTP(description=str(table_error))
218217
except ColumnNotFound as column_error:
219218
raise ColumnNotFoundHTTP(description=str(column_error))
220219
except SemanticError as semantic_error:
221220
raise SemanticErrorHTTP(description=str(semantic_error))
222-
finally:
223-
self._catalog.scoped_session.remove()
224221

225222

226223
class Server(gunicorn.app.base.BaseApplication):
@@ -289,6 +286,8 @@ def create_server(
289286
pool_pre_ping=True
290287
)
291288

289+
init_db(catalog)
290+
292291
restful_catalog = Catalog(
293292
**catalog_options,
294293
connect_args={"application_name": "data-lineage:restful"},
@@ -300,7 +299,7 @@ def create_server(
300299
# Create CRUD APIs
301300
methods = ["DELETE", "GET", "PATCH", "POST"]
302301
url_prefix = "/api/v1/catalog"
303-
api_manager = flask_restless.APIManager(app, catalog.scoped_session)
302+
api_manager = flask_restless.APIManager(app, catalog.get_scoped_session())
304303
api_manager.create_api(
305304
CatSource,
306305
methods=methods,

0 commit comments

Comments
 (0)