|
14 | 14 | from sqlmesh.core.table_diff import TableDiff |
15 | 15 | import numpy as np |
16 | 16 |
|
| 17 | +from sqlmesh.utils.errors import SQLMeshError |
| 18 | + |
17 | 19 |
|
18 | 20 | def create_test_console() -> t.Tuple[StringIO, TerminalConsole]: |
19 | 21 | """Creates a console and buffer for validating console output.""" |
@@ -829,3 +831,123 @@ def test_data_diff_empty_tables(): |
829 | 831 | assert ( |
830 | 832 | strip_ansi_codes(output) == "Neither the source nor the target table contained any records" |
831 | 833 | ) |
| 834 | + |
| 835 | + |
| 836 | +@pytest.mark.slow |
| 837 | +def test_data_diff_multiple_models_lacking_grain(sushi_context_fixed_date, capsys, caplog): |
| 838 | + # Create first model with grain |
| 839 | + expressions = d.parse( |
| 840 | + """ |
| 841 | + MODEL (name memory.sushi.grain_model, kind full, grain(key),); |
| 842 | + SELECT |
| 843 | + key, |
| 844 | + value, |
| 845 | + FROM |
| 846 | + (VALUES |
| 847 | + (1, 3), |
| 848 | + (2, 4), |
| 849 | + ) AS t (key, value) |
| 850 | + """ |
| 851 | + ) |
| 852 | + model_s = load_sql_based_model(expressions, dialect="snowflake") |
| 853 | + sushi_context_fixed_date.upsert_model(model_s) |
| 854 | + |
| 855 | + # Create second model without grain |
| 856 | + expressions_2 = d.parse( |
| 857 | + """ |
| 858 | + MODEL (name memory.sushi.no_grain_model, kind full,); |
| 859 | + SELECT |
| 860 | + key, |
| 861 | + value as amount, |
| 862 | + FROM |
| 863 | + memory.sushi.grain_model |
| 864 | + """ |
| 865 | + ) |
| 866 | + model_s2 = load_sql_based_model(expressions_2, dialect="snowflake") |
| 867 | + sushi_context_fixed_date.upsert_model(model_s2) |
| 868 | + |
| 869 | + sushi_context_fixed_date.plan( |
| 870 | + "source_dev", |
| 871 | + no_prompts=True, |
| 872 | + auto_apply=True, |
| 873 | + skip_tests=True, |
| 874 | + start="2023-01-31", |
| 875 | + end="2023-01-31", |
| 876 | + ) |
| 877 | + |
| 878 | + # Modify first model |
| 879 | + model = sushi_context_fixed_date.models['"MEMORY"."SUSHI"."GRAIN_MODEL"'] |
| 880 | + modified_model = model.dict() |
| 881 | + modified_model["query"] = ( |
| 882 | + exp.select("*") |
| 883 | + .from_(model.query.subquery()) |
| 884 | + .union("SELECT key, value FROM (VALUES (1, 6),(2,3),) AS t (key, value)") |
| 885 | + ) |
| 886 | + modified_sqlmodel = SqlModel(**modified_model) |
| 887 | + sushi_context_fixed_date.upsert_model(modified_sqlmodel) |
| 888 | + |
| 889 | + # Modify second model |
| 890 | + model2 = sushi_context_fixed_date.models['"MEMORY"."SUSHI"."NO_GRAIN_MODEL"'] |
| 891 | + modified_model2 = model2.dict() |
| 892 | + modified_model2["query"] = ( |
| 893 | + exp.select("*") |
| 894 | + .from_(model2.query.subquery()) |
| 895 | + .union("SELECT key, amount FROM (VALUES (5, 150.2),(6,250.2),) AS t (key, amount)") |
| 896 | + ) |
| 897 | + modified_sqlmodel2 = SqlModel(**modified_model2) |
| 898 | + sushi_context_fixed_date.upsert_model(modified_sqlmodel2) |
| 899 | + |
| 900 | + sushi_context_fixed_date.auto_categorize_changes = CategorizerConfig( |
| 901 | + sql=AutoCategorizationMode.FULL |
| 902 | + ) |
| 903 | + sushi_context_fixed_date.plan( |
| 904 | + "target_dev", |
| 905 | + create_from="source_dev", |
| 906 | + no_prompts=True, |
| 907 | + auto_apply=True, |
| 908 | + skip_tests=True, |
| 909 | + start="2023-01-31", |
| 910 | + end="2023-01-31", |
| 911 | + ) |
| 912 | + |
| 913 | + # By default erroring out when even one model lacks a grain |
| 914 | + with pytest.raises( |
| 915 | + SQLMeshError, |
| 916 | + match=r"SQLMesh doesn't know how to join the tables for the following models:*", |
| 917 | + ): |
| 918 | + sushi_context_fixed_date.table_diff( |
| 919 | + source="source_dev", |
| 920 | + target="target_dev", |
| 921 | + select_models={"*"}, |
| 922 | + skip_grain_check=False, |
| 923 | + ) |
| 924 | + |
| 925 | + # With warn_grain_check flag the diff will go ahead by warning |
| 926 | + diffs = sushi_context_fixed_date.table_diff( |
| 927 | + source="source_dev", |
| 928 | + target="target_dev", |
| 929 | + select_models={"*"}, |
| 930 | + skip_grain_check=False, |
| 931 | + warn_grain_check=True, |
| 932 | + ) |
| 933 | + |
| 934 | + # Check that the diff was performed only for the model with a grain |
| 935 | + assert len(diffs) == 1 |
| 936 | + diff1 = diffs[0] |
| 937 | + |
| 938 | + # Check the table diff corresponds to the grain model |
| 939 | + row_diff1 = diff1.row_diff() |
| 940 | + assert row_diff1.full_match_count == 2.0 |
| 941 | + assert row_diff1.full_match_pct == 50.0 |
| 942 | + assert row_diff1.s_only_count == 0.0 |
| 943 | + assert row_diff1.t_only_count == 0.0 |
| 944 | + assert row_diff1.stats["join_count"] == 4.0 |
| 945 | + assert row_diff1.stats["null_grain_count"] == 0.0 |
| 946 | + assert row_diff1.stats["s_count"] == 4.0 |
| 947 | + assert row_diff1.stats["distinct_count_s"] == 2.0 |
| 948 | + assert row_diff1.stats["t_count"] == 4.0 |
| 949 | + assert row_diff1.stats["distinct_count_t"] == 2.0 |
| 950 | + assert row_diff1.s_sample.shape == (0, 2) |
| 951 | + assert row_diff1.t_sample.shape == (0, 2) |
| 952 | + assert row_diff1.joined_sample.shape == (2, 3) |
| 953 | + assert row_diff1.sample.shape == (2, 4) |
0 commit comments