Skip to content
This repository was archived by the owner on Jan 19, 2023. It is now read-only.

Commit d3b9177

Browse files
author
Luca Florio
authored
Merge pull request #1 from pleo-io/feat/ignore-undefined-metadata
2 parents b19ef96 + 790533c commit d3b9177

File tree

2 files changed

+53
-40
lines changed

2 files changed

+53
-40
lines changed

dbtmetabase/__init__.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
__version__ = '0.5.2'
77

8-
def export(dbt_path: str,
8+
9+
def export(dbt_path: str,
910
mb_host: str, mb_user: str, mb_password: str,
1011
database: str, schema: str,
11-
mb_https = True, sync = True, sync_timeout = 30,
12-
includes = [], excludes = []):
12+
mb_https = True, sync = True, sync_timeout = 30,
13+
includes = [], excludes = [],
14+
ignore_undefined = False):
1315
"""Exports models from dbt to Metabase.
1416
1517
Arguments:
@@ -26,24 +28,26 @@ def export(dbt_path: str,
2628
sync_timeout {int} -- Synchronization timeout in seconds. (default: {30})
2729
includes {list} -- Model names to limit processing to. (default: {[]})
2830
excludes {list} -- Model names to exclude. (default: {[]})
31+
ignore_undefined {bool} -- Ignore properties not defined in dbt model. (default: {False})
2932
"""
3033

3134
mbc = MetabaseClient(mb_host, mb_user, mb_password, mb_https)
3235
models = DbtReader(dbt_path).read_models(
33-
includes=includes,
36+
includes=includes,
3437
excludes=excludes
3538
)
3639

3740
if sync:
3841
if not mbc.sync_and_wait(database, schema, models, sync_timeout):
3942
logging.critical("Sync timeout reached, models still not compatible")
4043
return
41-
42-
mbc.export_models(database, schema, models)
44+
45+
mbc.export_models(database, schema, models, ignore_undefined)
46+
4347

4448
def main(args: list = None):
4549
import argparse
46-
50+
4751
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)
4852

4953
parser = argparse.ArgumentParser(
@@ -61,6 +65,7 @@ def main(args: list = None):
6165
parser.add_argument('--sync_timeout', metavar='SECS', type=int, default=30, help="synchronization timeout (in secs)")
6266
parser.add_argument('--includes', metavar='MODELS', nargs='*', default=[], help="model names to limit processing to")
6367
parser.add_argument('--excludes', metavar='MODELS', nargs='*', default=[], help="model names to exclude")
68+
parser.add_argument('--ignore_undefined', metavar='IGNORE_UNDEFINED', type=bool, default=False, help="ignore properties not defined in dbt models")
6469
parsed = parser.parse_args(args=args)
6570

6671
if parsed.command == 'export':
@@ -75,5 +80,6 @@ def main(args: list = None):
7580
sync=parsed.sync,
7681
sync_timeout=parsed.sync_timeout,
7782
includes=parsed.includes,
78-
excludes=parsed.excludes
83+
excludes=parsed.excludes,
84+
ignore_undefined=parsed.ignore_undefined
7985
)

dbtmetabase/metabase.py

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __init__(self, host: str, user: str, password: str, https = True):
2727
self.protocol = "https" if https else "http"
2828
self.session_id = self.get_session_id(user, password)
2929
logging.info("Session established successfully")
30-
30+
3131
def get_session_id(self, user: str, password: str) -> str:
3232
"""Obtains new session ID from API.
3333
@@ -43,7 +43,7 @@ def get_session_id(self, user: str, password: str) -> str:
4343
'username': user,
4444
'password': password
4545
})['id']
46-
46+
4747
def sync_and_wait(self, database: str, schema: str, models: list, timeout = 30) -> bool:
4848
"""Synchronize with the database and wait for schema compatibility.
4949
@@ -67,7 +67,7 @@ def sync_and_wait(self, database: str, schema: str, models: list, timeout = 30)
6767
if not database_id:
6868
logging.critical("Cannot find database by name %s", database)
6969
return
70-
70+
7171
self.api('post', f'/api/database/{database_id}/sync_schema')
7272

7373
deadline = int(time.time()) + timeout
@@ -108,35 +108,37 @@ def models_compatible(self, database_id: str, schema: str, models: list) -> bool
108108
if column_name not in table_lookup:
109109
logging.warn("Column %s not found in model %s", column_name, model_name)
110110
are_models_compatible = False
111-
111+
112112
return are_models_compatible
113113

114-
def export_models(self, database: str, schema: str, models: list):
114+
def export_models(self, database: str, schema: str, models: list, ignore_undefined: bool):
115115
"""Exports dbt models to Metabase database schema.
116116
117117
Arguments:
118118
database {str} -- Metabase database name.
119119
schema {str} -- Metabase schema name.
120120
models {list} -- List of dbt models read from project.
121+
ignore_undefined {bool} -- Ignore undefined properties.
121122
"""
122123

123124
database_id = self.find_database_id(database)
124125
if not database_id:
125126
logging.critical("Cannot find database by name %s", database)
126127
return
127-
128+
128129
table_lookup, field_lookup = self.build_metadata_lookups(database_id, schema)
129130

130131
for model in models:
131-
self.export_model(model, table_lookup, field_lookup)
132-
133-
def export_model(self, model: dict, table_lookup: dict, field_lookup: dict):
132+
self.export_model(model, table_lookup, field_lookup, ignore_undefined)
133+
134+
def export_model(self, model: dict, table_lookup: dict, field_lookup: dict, ignore_undefined: bool):
134135
"""Exports one dbt model to Metabase database schema.
135136
136137
Arguments:
137138
model {dict} -- One dbt model read from project.
138139
table_lookup {dict} -- Dictionary of Metabase tables indexed by name.
139140
field_lookup {dict} -- Dictionary of Metabase fields indexed by name, indexed by table name.
141+
ignore_undefined {bool} -- Ignore undefined properties.
140142
"""
141143

142144
model_name = model['name'].upper()
@@ -157,15 +159,16 @@ def export_model(self, model: dict, table_lookup: dict, field_lookup: dict):
157159
logging.info("Table %s is up-to-date", model_name)
158160

159161
for column in model.get('columns', []):
160-
self.export_column(model_name, column, field_lookup)
161-
162-
def export_column(self, model_name: str, column: dict, field_lookup: dict):
162+
self.export_column(model_name, column, field_lookup, ignore_undefined)
163+
164+
def export_column(self, model_name: str, column: dict, field_lookup: dict, ignore_undefined: bool):
163165
"""Exports one dbt column to Metabase database schema.
164166
165167
Arguments:
166168
model_name {str} -- One dbt model name read from project.
167169
column {dict} -- One dbt column read from project.
168170
field_lookup {dict} -- Dictionary of Metabase fields indexed by name, indexed by table name.
171+
ignore_undefined {bool} -- Ignore undefined properties.
169172
"""
170173

171174
column_name = column['name'].upper()
@@ -174,7 +177,7 @@ def export_column(self, model_name: str, column: dict, field_lookup: dict):
174177
if not field:
175178
logging.error('Field %s.%s does not exist in Metabase', model_name, column_name)
176179
return
177-
180+
178181
field_id = field['id']
179182
fk_target_field_id = None
180183
if column.get('special_type') == 'type/FK':
@@ -183,35 +186,39 @@ def export_column(self, model_name: str, column: dict, field_lookup: dict):
183186
fk_target_field_id = field_lookup.get(target_table, {}) \
184187
.get(target_field, {}) \
185188
.get('id')
186-
189+
187190
if fk_target_field_id:
188191
self.api('put', f'/api/field/{fk_target_field_id}', json={
189192
'special_type': 'type/PK'
190193
})
191194
else:
192195
logging.error("Unable to find foreign key target %s.%s", target_table, target_field)
193-
194-
# Nones are not accepted, default to normal
195-
if not column.get('visibility_type'):
196-
column['visibility_type'] = 'normal'
197196

198197
api_field = self.api('get', f'/api/field/{field_id}')
199198

200-
if api_field['description'] != column.get('description') or \
201-
api_field['special_type'] != column.get('special_type') or \
202-
api_field['visibility_type'] != column.get('visibility_type') or \
203-
api_field['fk_target_field_id'] != fk_target_field_id:
199+
payload = {}
200+
payload_fields = ['description', 'special_type', 'visibility_type']
201+
for name in payload_fields:
202+
mb_value = api_field[name]
203+
dbt_value = column.get(name)
204+
# Add null properties to payload only if they should not be ignored
205+
if mb_value != dbt_value and (dbt_value or not ignore_undefined):
206+
# Nones are not accepted, default to normal
207+
if name == 'visibility_type':
208+
payload[name] = 'normal'
209+
else:
210+
payload[name] = dbt_value
211+
212+
if api_field['fk_target_field_id'] != fk_target_field_id and (fk_target_field_id or not ignore_undefined):
213+
payload['fk_target_field_id'] = fk_target_field_id
214+
215+
if payload:
204216
# Update with new values
205-
self.api('put', f'/api/field/{field_id}', json={
206-
'description': column.get('description'),
207-
'special_type': column.get('special_type'),
208-
'visibility_type': column.get('visibility_type'),
209-
'fk_target_field_id': fk_target_field_id
210-
})
217+
self.api('put', f'/api/field/{field_id}', json=payload)
211218
logging.info("Updated field %s.%s successfully", model_name, column_name)
212219
else:
213220
logging.info("Field %s.%s is up-to-date", model_name, column_name)
214-
221+
215222
def find_database_id(self, name: str) -> str:
216223
"""Finds Metabase database ID by name.
217224
@@ -226,7 +233,7 @@ def find_database_id(self, name: str) -> str:
226233
if database['name'].upper() == name.upper():
227234
return database['id']
228235
return None
229-
236+
230237
def build_metadata_lookups(self, database_id: str, schema: str) -> (dict, dict):
231238
"""Builds table and field lookups.
232239
@@ -262,7 +269,7 @@ def build_metadata_lookups(self, database_id: str, schema: str) -> (dict, dict):
262269
table_field_lookup[field_name] = field
263270

264271
field_lookup[table_name] = table_field_lookup
265-
272+
266273
return table_lookup, field_lookup
267274

268275
def api(self, method: str, path: str, authenticated = True, critical = True, **kwargs) -> Any:
@@ -285,7 +292,7 @@ def api(self, method: str, path: str, authenticated = True, critical = True, **k
285292
kwargs['headers'] = headers
286293
else:
287294
headers = kwargs['headers'].copy()
288-
295+
289296
if authenticated:
290297
headers['X-Metabase-Session'] = self.session_id
291298

0 commit comments

Comments
 (0)