Skip to content

Commit d2cb20f

Browse files
Feat(table_diff): Add flag to warn when models lack grain and diff the remaining (#4449)
1 parent 4262061 commit d2cb20f

File tree

7 files changed

+160
-12
lines changed

7 files changed

+160
-12
lines changed

docs/guides/tablediff.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,10 @@ sqlmesh table_diff prod:dev -m "tag:finance" -m "metrics.*_daily"
169169

170170
When multiple selectors are provided, they are combined with OR logic, meaning a model matching any of the selectors will be included.
171171

172-
> Note: All models being compared must have their `grain` defined that is unique and not null, as this is used to perform the join between the tables in the two environments.
172+
!!! note
173+
All models being compared must have their `grain` defined that is unique and not null, as this is used to perform the join between the tables in the two environments.
174+
175+
If the `--warn-grain-check` option is used, this requirement is not enforced. Instead of raising an error, a warning is displayed for the models without a defined grain and diffs are computed for the remaining models.
173176

174177
## Diffing tables or views
175178

docs/reference/cli.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,8 @@ Options:
557557
floating point columns. Default: 3
558558
--skip-grain-check Disable the check for a primary key (grain) that is
559559
missing or is not unique.
560+
--warn-grain-check Warn if any selected model is missing a grain,
561+
and compute diffs for the remaining models.
560562
--temp-schema TEXT Schema used for temporary tables. It can be
561563
`CATALOG.SCHEMA` or `SCHEMA`. Default:
562564
`sqlmesh_temp`

docs/reference/notebook.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,8 @@ Create a schema file containing external model schemas.
300300
%table_diff [--on [ON ...]] [--skip-columns [SKIP_COLUMNS ...]]
301301
[--model MODEL] [--where WHERE] [--limit LIMIT]
302302
[--show-sample] [--decimals DECIMALS] [--skip-grain-check]
303-
[--temp-schema SCHEMA] [--select-model [SELECT_MODEL ...]]
304-
SOURCE:TARGET
303+
[--warn-grain-check] [--temp-schema SCHEMA]
304+
[--select-model [SELECT_MODEL ...]] SOURCE:TARGET
305305
306306
Show the diff between two tables.
307307
@@ -326,6 +326,8 @@ options:
326326
floating point columns. Default: 3
327327
--skip-grain-check Disable the check for a primary key (grain) that is
328328
missing or is not unique.
329+
--warn-grain-check Warn if any selected model is missing a grain,
330+
and compute diffs for the remaining models.
329331
--temp-schema SCHEMA The schema to use for temporary tables.
330332
--select-model <[SELECT_MODEL ...]>
331333
Select specific models to diff using a pattern.

sqlmesh/cli/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,11 @@ def create_external_models(obj: Context, **kwargs: t.Any) -> None:
906906
is_flag=True,
907907
help="Disable the check for a primary key (grain) that is missing or is not unique.",
908908
)
909+
@click.option(
910+
"--warn-grain-check",
911+
is_flag=True,
912+
help="Warn if any selected model is missing a grain, and compute diffs for the remaining models.",
913+
)
909914
@click.option(
910915
"--temp-schema",
911916
type=str,

sqlmesh/core/context.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,6 +1629,7 @@ def table_diff(
16291629
show_sample: bool = True,
16301630
decimals: int = 3,
16311631
skip_grain_check: bool = False,
1632+
warn_grain_check: bool = False,
16321633
temp_schema: t.Optional[str] = None,
16331634
) -> t.List[TableDiff]:
16341635
"""Show a diff between two tables.
@@ -1703,22 +1704,29 @@ def table_diff(
17031704
target_env.naming_info, adapter.dialect
17041705
)
17051706
model_on = on or model.on
1706-
models_to_diff.append((model, adapter, source, target, model_on))
17071707
if not model_on:
17081708
models_without_grain.append(model)
1709+
else:
1710+
models_to_diff.append((model, adapter, source, target, model_on))
1711+
1712+
if models_without_grain:
1713+
model_names = "\n".join(
1714+
f"─ {model.name} \n at '{model._path}'" for model in models_without_grain
1715+
)
1716+
message = (
1717+
"SQLMesh doesn't know how to join the tables for the following models:\n"
1718+
f"{model_names}\n\n"
1719+
"Please specify a `grain` in each model definition. It must be unique and not null."
1720+
)
1721+
if warn_grain_check:
1722+
self.console.log_warning(message)
1723+
else:
1724+
raise SQLMeshError(message)
17091725

17101726
if models_to_diff:
17111727
self.console.show_table_diff_details(
17121728
[model[0].name for model in models_to_diff],
17131729
)
1714-
if models_without_grain:
1715-
model_names = "\n".join(
1716-
f"─ {model.name} \n at '{model._path}'" for model in models_without_grain
1717-
)
1718-
raise SQLMeshError(
1719-
f"SQLMesh doesn't know how to join the tables for the following models:\n{model_names}\n"
1720-
"\nPlease specify the `grain` in each model definition. Must be unique and not null."
1721-
)
17221730

17231731
self.console.start_table_diff_progress(len(models_to_diff))
17241732
try:

sqlmesh/magics.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,11 @@ def create_external_models(self, context: Context, line: str) -> None:
702702
action="store_true",
703703
help="Disable the check for a primary key (grain) that is missing or is not unique.",
704704
)
705+
@argument(
706+
"--warn-grain-check",
707+
action="store_true",
708+
help="Warn if any selected model is missing a grain, and compute diffs for the remaining models.",
709+
)
705710
@line_magic
706711
@pass_sqlmesh_context
707712
def table_diff(self, context: Context, line: str) -> None:
@@ -723,6 +728,7 @@ def table_diff(self, context: Context, line: str) -> None:
723728
show_sample=args.show_sample,
724729
decimals=args.decimals,
725730
skip_grain_check=args.skip_grain_check,
731+
warn_grain_check=args.warn_grain_check,
726732
)
727733

728734
@magic_arguments()

tests/core/test_table_diff.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from sqlmesh.core.table_diff import TableDiff
1515
import numpy as np
1616

17+
from sqlmesh.utils.errors import SQLMeshError
18+
1719

1820
def create_test_console() -> t.Tuple[StringIO, TerminalConsole]:
1921
"""Creates a console and buffer for validating console output."""
@@ -829,3 +831,123 @@ def test_data_diff_empty_tables():
829831
assert (
830832
strip_ansi_codes(output) == "Neither the source nor the target table contained any records"
831833
)
834+
835+
836+
@pytest.mark.slow
837+
def test_data_diff_multiple_models_lacking_grain(sushi_context_fixed_date, capsys, caplog):
838+
# Create first model with grain
839+
expressions = d.parse(
840+
"""
841+
MODEL (name memory.sushi.grain_model, kind full, grain(key),);
842+
SELECT
843+
key,
844+
value,
845+
FROM
846+
(VALUES
847+
(1, 3),
848+
(2, 4),
849+
) AS t (key, value)
850+
"""
851+
)
852+
model_s = load_sql_based_model(expressions, dialect="snowflake")
853+
sushi_context_fixed_date.upsert_model(model_s)
854+
855+
# Create second model without grain
856+
expressions_2 = d.parse(
857+
"""
858+
MODEL (name memory.sushi.no_grain_model, kind full,);
859+
SELECT
860+
key,
861+
value as amount,
862+
FROM
863+
memory.sushi.grain_model
864+
"""
865+
)
866+
model_s2 = load_sql_based_model(expressions_2, dialect="snowflake")
867+
sushi_context_fixed_date.upsert_model(model_s2)
868+
869+
sushi_context_fixed_date.plan(
870+
"source_dev",
871+
no_prompts=True,
872+
auto_apply=True,
873+
skip_tests=True,
874+
start="2023-01-31",
875+
end="2023-01-31",
876+
)
877+
878+
# Modify first model
879+
model = sushi_context_fixed_date.models['"MEMORY"."SUSHI"."GRAIN_MODEL"']
880+
modified_model = model.dict()
881+
modified_model["query"] = (
882+
exp.select("*")
883+
.from_(model.query.subquery())
884+
.union("SELECT key, value FROM (VALUES (1, 6),(2,3),) AS t (key, value)")
885+
)
886+
modified_sqlmodel = SqlModel(**modified_model)
887+
sushi_context_fixed_date.upsert_model(modified_sqlmodel)
888+
889+
# Modify second model
890+
model2 = sushi_context_fixed_date.models['"MEMORY"."SUSHI"."NO_GRAIN_MODEL"']
891+
modified_model2 = model2.dict()
892+
modified_model2["query"] = (
893+
exp.select("*")
894+
.from_(model2.query.subquery())
895+
.union("SELECT key, amount FROM (VALUES (5, 150.2),(6,250.2),) AS t (key, amount)")
896+
)
897+
modified_sqlmodel2 = SqlModel(**modified_model2)
898+
sushi_context_fixed_date.upsert_model(modified_sqlmodel2)
899+
900+
sushi_context_fixed_date.auto_categorize_changes = CategorizerConfig(
901+
sql=AutoCategorizationMode.FULL
902+
)
903+
sushi_context_fixed_date.plan(
904+
"target_dev",
905+
create_from="source_dev",
906+
no_prompts=True,
907+
auto_apply=True,
908+
skip_tests=True,
909+
start="2023-01-31",
910+
end="2023-01-31",
911+
)
912+
913+
# By default erroring out when even one model lacks a grain
914+
with pytest.raises(
915+
SQLMeshError,
916+
match=r"SQLMesh doesn't know how to join the tables for the following models:*",
917+
):
918+
sushi_context_fixed_date.table_diff(
919+
source="source_dev",
920+
target="target_dev",
921+
select_models={"*"},
922+
skip_grain_check=False,
923+
)
924+
925+
# With warn_grain_check flag the diff will go ahead by warning
926+
diffs = sushi_context_fixed_date.table_diff(
927+
source="source_dev",
928+
target="target_dev",
929+
select_models={"*"},
930+
skip_grain_check=False,
931+
warn_grain_check=True,
932+
)
933+
934+
# Check that the diff was performed only for the model with a grain
935+
assert len(diffs) == 1
936+
diff1 = diffs[0]
937+
938+
# Check the table diff corresponds to the grain model
939+
row_diff1 = diff1.row_diff()
940+
assert row_diff1.full_match_count == 2.0
941+
assert row_diff1.full_match_pct == 50.0
942+
assert row_diff1.s_only_count == 0.0
943+
assert row_diff1.t_only_count == 0.0
944+
assert row_diff1.stats["join_count"] == 4.0
945+
assert row_diff1.stats["null_grain_count"] == 0.0
946+
assert row_diff1.stats["s_count"] == 4.0
947+
assert row_diff1.stats["distinct_count_s"] == 2.0
948+
assert row_diff1.stats["t_count"] == 4.0
949+
assert row_diff1.stats["distinct_count_t"] == 2.0
950+
assert row_diff1.s_sample.shape == (0, 2)
951+
assert row_diff1.t_sample.shape == (0, 2)
952+
assert row_diff1.joined_sample.shape == (2, 3)
953+
assert row_diff1.sample.shape == (2, 4)

0 commit comments

Comments
 (0)