great-expectations · joshua-stauffer · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/tests/integration/data_sources_and_expectations/data_sources/test_databricks.py b/tests/integration/data_sources_and_expectations/data_sources/test_databricks.py
@@ -0,0 +1,164 @@
+from datetime import datetime, timezone
+
+import pandas as pd
+import pytest
+
+from great_expectations.compatibility.sqlalchemy import sqltypes
+from great_expectations.expectations import (
+    ExpectColumnDistinctValuesToContainSet,
+    ExpectColumnSumToBeBetween,
+    ExpectColumnValuesToBeBetween,
+    ExpectColumnValuesToBeInSet,
+    ExpectColumnValuesToBeOfType,
+)
+from tests.integration.test_utils.data_source_config import DatabricksDatasourceTestConfig
+from tests.integration.test_utils.data_source_config.databricks import DatabricksBatchTestSetup
+
+pytestmark = pytest.mark.databricks
+
+
+class TestDatabricksDataTypes:
+    """This set of tests ensures that we can run expectations against every data
+    type supported by Databricks.
+
+    https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
+    """
+
+    BOOL_COL_NAME = "my_bool"
+    DATE_COL_NAME = "my_date"
+    NUMERIC_COL_NAME = "my_number"
+    STRING_COL_NAME = "my_string"
+
+    DATA_FRAME = pd.DataFrame(
+        {
+            BOOL_COL_NAME: [True, False, True, False],
+            DATE_COL_NAME: [
+                datetime(2021, 1, 1, tzinfo=timezone.utc).date(),
+                datetime(2021, 1, 2, tzinfo=timezone.utc).date(),
+                datetime(2021, 1, 3, tzinfo=timezone.utc).date(),
+                datetime(2021, 1, 4, tzinfo=timezone.utc).date(),
+            ],
+            NUMERIC_COL_NAME: [1, 2, 3, 4],
+            STRING_COL_NAME: ["a", "b", "c", "d"],
+        }
+    )
+
+    @pytest.mark.parametrize(
+        "column_type",
+        [
+            sqltypes.SMALLINT,
+            sqltypes.INT,
+            sqltypes.BIGINT,
+            sqltypes.DECIMAL,
+            sqltypes.FLOAT,
+            sqltypes.REAL,
+        ],
+    )
+    def test_number(self, column_type: sqltypes.TypeEngine):
+        batch_setup = DatabricksBatchTestSetup(
+            config=DatabricksDatasourceTestConfig(
+                column_types={self.NUMERIC_COL_NAME: column_type}
+            ),
+            data=self.DATA_FRAME,
+            extra_data={},
+        )
+        with batch_setup.batch_test_context() as batch:
+            result = batch.validate(
+                expect=ExpectColumnSumToBeBetween(
+                    column=self.NUMERIC_COL_NAME,
+                    min_value=9,
+                    max_value=11,
+                )
+            )
+        assert result.success
+
+    def test_varchar(self):
+        column_type = sqltypes.String
+        batch_setup = DatabricksBatchTestSetup(
+            config=DatabricksDatasourceTestConfig(column_types={self.STRING_COL_NAME: column_type}),
+            data=self.DATA_FRAME,
+            extra_data={},
+        )
+        with batch_setup.batch_test_context() as batch:
+            result = batch.validate(
+                expect=ExpectColumnDistinctValuesToContainSet(
+                    column=self.STRING_COL_NAME,
+                    value_set=[
+                        "a",
+                        "b",
+                    ],
+                )
+            )
+        assert result.success
+
+    def test_boolean(self):
+        column_type = sqltypes.BOOLEAN
+        batch_setup = DatabricksBatchTestSetup(
+            config=DatabricksDatasourceTestConfig(column_types={self.BOOL_COL_NAME: column_type}),
+            data=self.DATA_FRAME,
+            extra_data={},
+        )
+        with batch_setup.batch_test_context() as batch:
+            result = batch.validate(
+                expect=ExpectColumnValuesToBeInSet(
+                    column=self.BOOL_COL_NAME, value_set=[True, False]
+                )
+            )
+        assert result.success
+
+    @pytest.mark.xfail(
+        strict=True,
+        reason="Expectation fails despite the type_ param being the same as the observed value.",
+    )
+    def test_boolean__expect_column_values_to_be_of_type(self):
+        column_type = sqltypes.BOOLEAN
+        batch_setup = DatabricksBatchTestSetup(
+            config=DatabricksDatasourceTestConfig(column_types={self.BOOL_COL_NAME: column_type}),
+            data=self.DATA_FRAME,
+            extra_data={},
+        )
+        with batch_setup.batch_test_context() as batch:
+            result = batch.validate(
+                expect=ExpectColumnValuesToBeOfType(column=self.BOOL_COL_NAME, type_="Boolean")
+            )
+        assert result.success
+
+    def test_date(self):
+        column_type = sqltypes.DATE
+        batch_setup = DatabricksBatchTestSetup(
+            config=DatabricksDatasourceTestConfig(column_types={self.DATE_COL_NAME: column_type}),
+            data=self.DATA_FRAME,
+            extra_data={},
+        )
+        with batch_setup.batch_test_context() as batch:
+            result = batch.validate(
+                expect=ExpectColumnValuesToBeBetween(
+                    column=self.DATE_COL_NAME,
+                    min_value=datetime(year=2021, month=1, day=1, tzinfo=timezone.utc).date(),
+                    max_value=datetime(year=2024, month=1, day=1, tzinfo=timezone.utc).date(),
+                )
+            )
+        assert result.success
+
+    @pytest.mark.parametrize(
+        "column_type",
+        [
+            sqltypes.TIMESTAMP(timezone=False),
+            sqltypes.TIMESTAMP(timezone=True),
+        ],
+    )
+    def test_timestamp(self, column_type: sqltypes.TIMESTAMP):
+        batch_setup = DatabricksBatchTestSetup(
+            config=DatabricksDatasourceTestConfig(column_types={self.DATE_COL_NAME: column_type}),
+            data=self.DATA_FRAME,
+            extra_data={},
+        )
+        with batch_setup.batch_test_context() as batch:
+            result = batch.validate(
+                expect=ExpectColumnValuesToBeBetween(
+                    column=self.DATE_COL_NAME,
+                    min_value=datetime(year=2021, month=1, day=1, tzinfo=timezone.utc),
+                    max_value=datetime(year=2024, month=1, day=1, tzinfo=timezone.utc),
+                )
+            )
+        assert result.success
diff --git a/tests/integration/data_sources_and_expectations/data_sources/test_snowflake.py b/tests/integration/data_sources_and_expectations/data_sources/test_snowflake.py
@@ -14,6 +14,8 @@
 from tests.integration.test_utils.data_source_config import SnowflakeDatasourceTestConfig
 from tests.integration.test_utils.data_source_config.snowflake import SnowflakeBatchTestSetup
 
+pytestmark = pytest.mark.snowflake
+
 
 class TestSnowflakeDataTypes:
     """This set of tests ensures that we can run expectations against every data
@@ -24,7 +26,6 @@ class TestSnowflakeDataTypes:
 
     COLUMN = "col_a"
 
-    @pytest.mark.snowflake
     def test_number(self):
         column_type = (
             SNOWFLAKE_TYPES.NUMBER
@@ -44,7 +45,6 @@ def test_number(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_int(self):
         column_type = sqltypes.INT  # equivalent to INTEGER, BIGINT, SMALLINT, TINYINT, BYTEINT
         batch_setup = SnowflakeBatchTestSetup(
@@ -62,7 +62,6 @@ def test_int(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_float(self):
         column_type = sqltypes.FLOAT  # equivalent to FLOAT4, FLOAT8, DOUBLE, DOUBLE PRECISION, REAL
         batch_setup = SnowflakeBatchTestSetup(
@@ -80,7 +79,6 @@ def test_float(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_varchar(self):
         column_type = sqltypes.VARCHAR  # equivalent to STRING, TEXT
         batch_setup = SnowflakeBatchTestSetup(
@@ -100,7 +98,6 @@ def test_varchar(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_char(self):
         column_type = sqltypes.CHAR  # length of 1, equivalent to CHARACTER
         batch_setup = SnowflakeBatchTestSetup(
@@ -120,7 +117,6 @@ def test_char(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_boolean(self):
         column_type = sqltypes.BOOLEAN
         batch_setup = SnowflakeBatchTestSetup(
@@ -134,7 +130,6 @@ def test_boolean(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_date(self):
         column_type = sqltypes.DATE
         batch_setup = SnowflakeBatchTestSetup(
@@ -160,7 +155,6 @@ def test_date(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_datetime(self):
         column_type = sqltypes.DATETIME
         batch_setup = SnowflakeBatchTestSetup(
@@ -186,7 +180,6 @@ def test_datetime(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_timestamp_tz(self):
         column_type = SNOWFLAKE_TYPES.TIMESTAMP_TZ
         batch_setup = SnowflakeBatchTestSetup(
@@ -212,7 +205,6 @@ def test_timestamp_tz(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     def test_timestamp_ntz(self):
         column_type = SNOWFLAKE_TYPES.TIMESTAMP_NTZ
         batch_setup = SnowflakeBatchTestSetup(
@@ -238,7 +230,6 @@ def test_timestamp_ntz(self):
             )
         assert result.success
 
-    @pytest.mark.snowflake
     @pytest.mark.xfail(
         strict=True,
         reason="time is not an accepted min/max value parameter, and other date types "