@@ -2721,7 +2721,9 @@ def _use_warehouse_as_state_connection(gateway_name: str, config: Config):
27212721
27222722 config .gateways [gateway_name ].state_schema = test_schema
27232723
2724- sqlmesh_context = ctx .create_context (config_mutator = _use_warehouse_as_state_connection )
2724+ sqlmesh_context = ctx .create_context (
2725+ config_mutator = _use_warehouse_as_state_connection , ephemeral_state_connection = False
2726+ )
27252727 assert sqlmesh_context .config .get_state_schema (ctx .gateway ) == test_schema
27262728
27272729 state_sync = (
@@ -2732,3 +2734,83 @@ def _use_warehouse_as_state_connection(gateway_name: str, config: Config):
27322734
27332735 # will throw if one of the migrations produces an error, which can happen if we forget to take quoting or normalization into account
27342736 sqlmesh_context .migrate ()
2737+
2738+
2739+ def test_python_model_column_order (ctx : TestContext , tmp_path : pathlib .Path ):
2740+ if ctx .test_type == "pyspark" and ctx .dialect in ("spark" , "databricks" ):
2741+ # dont skip
2742+ pass
2743+ elif ctx .test_type != "df" :
2744+ pytest .skip ("python model column order test only needs to be run once per db" )
2745+
2746+ schema = ctx .add_test_suffix (TEST_SCHEMA )
2747+
2748+ (tmp_path / "models" ).mkdir ()
2749+
2750+ # note: this model deliberately defines the columns in the @model definition to be in a different order than what
2751+ # is returned by the DataFrame within the model
2752+ model_path = tmp_path / "models" / "python_model.py"
2753+ if ctx .test_type == "pyspark" :
2754+ # python model that emits a PySpark dataframe
2755+ model_path .write_text (
2756+ """
2757+ from pyspark.sql import DataFrame, Row
2758+ import typing as t
2759+ from sqlmesh import ExecutionContext, model
2760+
2761+ @model(
2762+ "TEST_SCHEMA.model",
2763+ columns={
2764+ "id": "int",
2765+ "name": "varchar"
2766+ }
2767+ )
2768+ def execute(
2769+ context: ExecutionContext,
2770+ **kwargs: t.Any,
2771+ ) -> DataFrame:
2772+ return context.spark.createDataFrame([
2773+ Row(name="foo", id=1)
2774+ ])
2775+ """ .replace ("TEST_SCHEMA" , schema )
2776+ )
2777+ else :
2778+ # python model that emits a Pandas DataFrame
2779+ model_path .write_text (
2780+ """
2781+ import pandas as pd
2782+ import typing as t
2783+ from sqlmesh import ExecutionContext, model
2784+
2785+ @model(
2786+ "TEST_SCHEMA.model",
2787+ columns={
2788+ "id": "int",
2789+ "name": "varchar"
2790+ }
2791+ )
2792+ def execute(
2793+ context: ExecutionContext,
2794+ **kwargs: t.Any,
2795+ ) -> pd.DataFrame:
2796+ return pd.DataFrame([
2797+ {"name": "foo", "id": 1}
2798+ ])
2799+ """ .replace ("TEST_SCHEMA" , schema )
2800+ )
2801+
2802+ sqlmesh_ctx = ctx .create_context (path = tmp_path )
2803+
2804+ assert len (sqlmesh_ctx .models ) == 1
2805+
2806+ plan = sqlmesh_ctx .plan (auto_apply = True )
2807+ assert len (plan .new_snapshots ) == 1
2808+
2809+ engine_adapter = sqlmesh_ctx .engine_adapter
2810+
2811+ query = exp .select ("*" ).from_ (
2812+ exp .to_table (f"{ schema } .model" , dialect = ctx .dialect ), dialect = ctx .dialect
2813+ )
2814+ df = engine_adapter .fetchdf (query , quote_identifiers = True )
2815+ assert len (df ) == 1
2816+ assert df .iloc [0 ].to_dict () == {"id" : 1 , "name" : "foo" }
0 commit comments