66from sqlmesh .utils import yaml
77import shutil
88import site
9+ import uuid
910
1011pytestmark = pytest .mark .slow
1112
@@ -16,6 +17,10 @@ def __call__(
1617 ) -> subprocess .CompletedProcess : ...
1718
1819
20+ class CreateSitePackageType (t .Protocol ):
21+ def __call__ (self , name : str ) -> t .Tuple [str , Path ]: ...
22+
23+
1924@pytest .fixture
2025def invoke_cli (tmp_path : Path ) -> InvokeCliType :
2126 # Fetch the full path to the SQLMesh binary so that when we use `cwd` to run in the context of a test dir, the correct SQLMesh binary is executed
@@ -40,9 +45,8 @@ def _invoke(sqlmesh_args: t.List[str], **kwargs: t.Any) -> subprocess.CompletedP
4045 return _invoke
4146
4247
43- def test_load_snapshots_that_reference_nonexistent_python_libraries (
44- invoke_cli : InvokeCliType , tmp_path : Path
45- ) -> None :
48+ @pytest .fixture
49+ def duckdb_example_project (tmp_path : Path ) -> Path :
4650 init_example_project (tmp_path , dialect = "duckdb" )
4751 config_path = tmp_path / "config.yaml"
4852
@@ -54,11 +58,65 @@ def test_load_snapshots_that_reference_nonexistent_python_libraries(
5458 }
5559 config_path .write_text (yaml .dump (config_dict ))
5660
61+ return tmp_path
62+
63+
64+ @pytest .fixture
65+ def last_log_file_contents (tmp_path : Path ) -> t .Callable [[], str ]:
66+ def _fetch () -> str :
67+ log_file = sorted (list ((tmp_path / "logs" ).iterdir ()))[- 1 ]
68+ return log_file .read_text ()
69+
70+ return _fetch
71+
72+
73+ @pytest .fixture
74+ def create_site_package () -> t .Iterator [CreateSitePackageType ]:
75+ created_package_path = None
76+
77+ def _create (name : str ) -> t .Tuple [str , Path ]:
78+ nonlocal created_package_path
79+
80+ unique_id = str (uuid .uuid4 ())[0 :8 ]
81+ package_name = f"{ name } _{ unique_id } " # so that multiple tests using the same name dont clobber each other
82+
83+ site_packages = site .getsitepackages ()[0 ]
84+ package_path = Path (site_packages ) / package_name
85+ package_path .mkdir ()
86+
87+ created_package_path = package_path
88+
89+ return package_name , package_path
90+
91+ yield _create
92+
93+ if created_package_path :
94+ # cleanup
95+ shutil .rmtree (created_package_path , ignore_errors = True )
96+
97+
98+ def test_load_snapshots_that_reference_nonexistent_python_libraries (
99+ invoke_cli : InvokeCliType ,
100+ duckdb_example_project : Path ,
101+ last_log_file_contents : t .Callable [[], str ],
102+ create_site_package : CreateSitePackageType ,
103+ ) -> None :
104+ """
105+ Scenario:
106+ - A model is created using a macro that is imported from an external package
107+ - That model is applied + snapshot committed to state
108+ - The external package is removed locally and the import macro import is changed to an inline definition
109+
110+ Outcome:
111+ - `sqlmesh plan` should not exit with an ImportError when it tries to render the query of the snapshot stored in state
112+ - Instead, it should log a warning and proceed with applying the new model version
113+ """
114+
115+ project_path = duckdb_example_project
116+
57117 # simulate a 3rd party library that provides a macro
58- site_packages = site .getsitepackages ()[0 ]
59- sqlmesh_test_macros_package_path = Path (site_packages ) / "sqlmesh_test_macros"
60- sqlmesh_test_macros_package_path .mkdir ()
61- (sqlmesh_test_macros_package_path / "macros.py" ).write_text ("""
118+ package_name , package_path = create_site_package ("sqlmesh_test_macros" )
119+ (package_path / "macros.py" ).write_text ("""
62120from sqlmesh import macro
63121
64122@macro()
@@ -67,11 +125,11 @@ def do_something(evaluator):
67125""" )
68126
69127 # reference the macro from site-packages
70- (tmp_path / "macros" / "__init__.py" ).write_text ("""
71- from sqlmesh_test_macros .macros import do_something
128+ (project_path / "macros" / "__init__.py" ).write_text (f """
129+ from { package_name } .macros import do_something
72130""" )
73131
74- (tmp_path / "models" / "example.sql" ).write_text ("""
132+ (project_path / "models" / "example.sql" ).write_text ("""
75133MODEL (
76134 name example.test_model,
77135 kind FULL
@@ -96,10 +154,10 @@ def do_something(evaluator):
96154
97155 # deleting this removes the 'do_something()' macro used by the version of the snapshot stored in state
98156 # when loading the old snapshot from state in the local python env, this will create an ImportError
99- shutil .rmtree (sqlmesh_test_macros_package_path )
157+ shutil .rmtree (package_path )
100158
101159 # Move the macro inline so its no longer being loaded from a library but still exists with the same signature
102- (tmp_path / "macros" / "__init__.py" ).write_text ("""
160+ (project_path / "macros" / "__init__.py" ).write_text ("""
103161from sqlmesh import macro
104162
105163@macro()
@@ -120,9 +178,8 @@ def do_something(evaluator):
120178 assert "Physical layer updated" in result .stdout
121179 assert "Virtual layer updated" in result .stdout
122180
123- log_file = sorted (list ((tmp_path / "logs" ).iterdir ()))[- 1 ]
124- log_file_contents = log_file .read_text ()
125- assert "ModuleNotFoundError: No module named 'sqlmesh_test_macros'" in log_file_contents
181+ log_file_contents = last_log_file_contents ()
182+ assert f"ModuleNotFoundError: No module named '{ package_name } '" in log_file_contents
126183 assert (
127184 "ERROR - Failed to cache optimized query for model 'example.test_model'"
128185 in log_file_contents
@@ -131,3 +188,194 @@ def do_something(evaluator):
131188 'ERROR - Failed to cache snapshot SnapshotId<"db"."example"."test_model"'
132189 in log_file_contents
133190 )
191+
192+
193+ def test_model_selector_snapshot_references_nonexistent_python_libraries (
194+ invoke_cli : InvokeCliType ,
195+ duckdb_example_project : Path ,
196+ last_log_file_contents : t .Callable [[], str ],
197+ create_site_package : CreateSitePackageType ,
198+ ) -> None :
199+ """
200+ Scenario:
201+ - A model is created using a macro that is imported from an external package
202+ - That model is applied + snapshot committed to state
203+ - The external package is removed locally and the import macro import is changed to an inline definition
204+ - Thus, local version of the model can be rendered but the remote version in state cannot
205+
206+ Outcome:
207+ - `sqlmesh plan --select-model <this model>` should work as it picks up the local version
208+ - `sqlmesh plan --select-model <some other model> should exit with an error, because the plan needs a valid DAG and the remote version is no longer valid locally
209+ """
210+ project_path = duckdb_example_project
211+
212+ # simulate a 3rd party library that provides a macro
213+ package_name , package_path = create_site_package ("sqlmesh_test_macros" )
214+ (package_path / "macros.py" ).write_text ("""
215+ from sqlmesh import macro
216+
217+ @macro()
218+ def do_something(evaluator):
219+ return "'value from site-packages'"
220+ """ )
221+
222+ # reference the macro from site-packages
223+ (project_path / "macros" / "__init__.py" ).write_text (f"""
224+ from { package_name } .macros import do_something
225+ """ )
226+
227+ (project_path / "models" / "example.sql" ).write_text ("""
228+ MODEL (
229+ name sqlmesh_example.test_model,
230+ kind FULL
231+ );
232+
233+ select @do_something() as a
234+ """ )
235+
236+ result = invoke_cli (["plan" , "--no-prompts" , "--auto-apply" , "--skip-tests" ])
237+
238+ assert result .returncode == 0
239+ assert "Physical layer updated" in result .stdout
240+ assert "Virtual layer updated" in result .stdout
241+
242+ # clear cache to ensure we are forced to reload everything
243+ assert invoke_cli (["clean" ]).returncode == 0
244+
245+ # deleting this removes the 'do_something()' macro used by the version of the snapshot stored in state
246+ # when loading the old snapshot from state in the local python env, this will create an ImportError
247+ shutil .rmtree (package_path )
248+
249+ # Move the macro inline so its no longer being loaded from a library but still exists with the same signature
250+ (project_path / "macros" / "__init__.py" ).write_text ("""
251+ from sqlmesh import macro
252+
253+ @macro()
254+ def do_something(evaluator):
255+ return "'some value not from site-packages'"
256+ """ )
257+
258+ # the invalid snapshot is in state but is not preventing a plan
259+ result = invoke_cli (
260+ [
261+ "plan" ,
262+ "--no-prompts" ,
263+ "--skip-tests" ,
264+ ],
265+ input = "n" , # for the apply backfill (y/n) prompt
266+ )
267+ assert result .returncode == 0
268+ assert "Apply - Backfill Tables [y/n]:" in result .stdout
269+ assert "Physical layer updated" not in result .stdout
270+
271+ # the invalid snapshot in state should not prevent a plan if --select-model is used on it (since the local version can be rendered)
272+ result = invoke_cli (
273+ ["plan" , "--select-model" , "sqlmesh_example.test_model" , "--no-prompts" , "--skip-tests" ],
274+ input = "n" , # for the apply backfill (y/n) prompt
275+ )
276+ assert result .returncode == 0
277+ assert "ModuleNotFoundError" not in result .stdout
278+ assert "sqlmesh_example.test_model" in result .stdout
279+ assert "Apply - Backfill Tables" in result .stdout
280+
281+ # the invalid snapshot in state should prevent a plan if --select-model is used on another model
282+ # (since this says to SQLMesh "source everything from state except this selected model" and the plan DAG must be valid to run the plan)
283+ result = invoke_cli (
284+ [
285+ "plan" ,
286+ "--select-model" ,
287+ "sqlmesh_example.full_model" ,
288+ "--no-prompts" ,
289+ "--skip-tests" ,
290+ ],
291+ input = "n" , # for the apply backfill (y/n) prompt
292+ )
293+ assert result .returncode == 1
294+ assert (
295+ "Model 'sqlmesh_example.test_model' sourced from state cannot be rendered in the local environment"
296+ in result .stdout
297+ )
298+ assert f"No module named '{ package_name } '" in result .stdout
299+ assert (
300+ "If the model has been fixed locally, please ensure that the --select-model expression includes it"
301+ in result .stdout
302+ )
303+
304+ # verify the full stack trace was logged
305+ log_file_contents = last_log_file_contents ()
306+ assert f"ModuleNotFoundError: No module named '{ package_name } '" in log_file_contents
307+ assert (
308+ "The above exception was the direct cause of the following exception:" in log_file_contents
309+ )
310+
311+
312+ def test_model_selector_tags_picks_up_both_remote_and_local (
313+ invoke_cli : InvokeCliType , duckdb_example_project : Path
314+ ) -> None :
315+ """
316+ Scenario:
317+ - A model that has already been applied to prod (so exists in state) has a tag added locally
318+ - A new model is created locally that has the same tag
319+
320+ Outcome:
321+ - `sqlmesh plan --select-model tag:<tag>` should include both models
322+ """
323+ project_path = duckdb_example_project
324+
325+ # default state of full_model
326+ (project_path / "models" / "full_model.sql" ).write_text ("""
327+ MODEL (
328+ name sqlmesh_example.full_model,
329+ kind FULL,
330+ cron '@daily',
331+ grain item_id,
332+ audits (assert_positive_order_ids),
333+ );
334+
335+ SELECT
336+ item_id,
337+ COUNT(DISTINCT id) AS num_orders
338+ FROM sqlmesh_example.incremental_model
339+ GROUP BY item_id
340+ """ )
341+
342+ # apply plan - starting point
343+ result = invoke_cli (["plan" , "--no-prompts" , "--auto-apply" , "--skip-tests" ])
344+
345+ assert result .returncode == 0
346+ assert "Physical layer updated" in result .stdout
347+ assert "Virtual layer updated" in result .stdout
348+
349+ # add a new model locally with tag:a
350+ (project_path / "models" / "new_model.sql" ).write_text ("""
351+ MODEL (
352+ name sqlmesh_example.new_model,
353+ kind full,
354+ tags (a)
355+ );
356+
357+ SELECT 1;
358+ """ )
359+
360+ # update full_model with tag:a
361+ (project_path / "models" / "full_model.sql" ).write_text ("""
362+ MODEL (
363+ name sqlmesh_example.full_model,
364+ kind FULL,
365+ tags (a)
366+ );
367+
368+ SELECT
369+ item_id,
370+ COUNT(DISTINCT id) AS num_orders
371+ FROM sqlmesh_example.incremental_model
372+ GROUP BY item_id
373+ """ )
374+
375+ result = invoke_cli (
376+ ["plan" , "--select-model" , "tag:a" , "--no-prompts" , "--skip-tests" ],
377+ input = "n" , # for the apply backfill (y/n) prompt
378+ )
379+ assert result .returncode == 0
380+ assert "sqlmesh_example.full_model" in result .stdout # metadata update: tags
381+ assert "sqlmesh_example.new_model" in result .stdout # added
0 commit comments