Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 0 additions & 158 deletions docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,164 +59,6 @@
expected_existing_expectations_store_yaml
)

# adding expectations store
configured_expectations_store_yaml = """
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py new_expectations_store">
stores:
expectations_S3_store:
class_name: ExpectationsStore
store_backend:
class_name: TupleS3StoreBackend
bucket: '<YOUR S3 BUCKET NAME>'
prefix: '<YOUR S3 PREFIX NAME>' # Bucket and prefix in combination must be unique across all stores

expectations_store_name: expectations_S3_store
# </snippet>
"""

# replace example code with integration test configuration
configured_expectations_store = yaml.load(configured_expectations_store_yaml)
configured_expectations_store["stores"]["expectations_S3_store"]["store_backend"][
"bucket"
] = "gx-golden-path-tests"
configured_expectations_store["stores"]["expectations_S3_store"]["store_backend"][
"prefix"
] = "metadata/expectations"

# add and set the new expectation store
context.add_store(
name=configured_expectations_store["expectations_store_name"],
config=configured_expectations_store["stores"]["expectations_S3_store"],
)
with open(great_expectations_yaml_file_path) as f:
great_expectations_yaml = yaml.load(f)
great_expectations_yaml["expectations_store_name"] = "expectations_S3_store"
great_expectations_yaml["stores"]["expectations_S3_store"]["store_backend"].pop(
"suppress_store_backend_id"
)
with open(great_expectations_yaml_file_path, "w") as f:
yaml.dump(great_expectations_yaml, f)

# adding validation results store
great_expectations_yaml_file_path = pathlib.Path(
context.root_directory, FileDataContext.GX_YML
)
with open(great_expectations_yaml_file_path) as f:
great_expectations_yaml = yaml.load(f)

stores = great_expectations_yaml["stores"]
# popping the rest out so that we can do the comparison. They aren't going anywhere dont worry
pop_stores = [
"checkpoint_store",
"expectations_store",
"expectations_S3_store",
"validation_definition_store",
]
for store in pop_stores:
stores.pop(store)

actual_existing_validation_results_store = {}
actual_existing_validation_results_store["stores"] = stores
actual_existing_validation_results_store["validation_results_store_name"] = (
great_expectations_yaml["validation_results_store_name"]
)

expected_existing_validation_results_store_yaml = """
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py existing_validation_results_store">
stores:
validation_results_store:
class_name: ValidationResultsStore
store_backend:
class_name: TupleFilesystemStoreBackend
base_directory: uncommitted/validations/

validation_results_store_name: validation_results_store
# </snippet>
"""

assert actual_existing_validation_results_store == yaml.load(
expected_existing_validation_results_store_yaml
)

# adding validations store
configured_validation_results_store_yaml = """
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py new_validation_results_store">
stores:
validation_results_S3_store:
class_name: ValidationResultsStore
store_backend:
class_name: TupleS3StoreBackend
bucket: '<YOUR S3 BUCKET NAME>'
prefix: '<YOUR S3 PREFIX NAME>' # Bucket and prefix in combination must be unique across all stores
# </snippet>

# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py set_new_validation_results_store">
validation_results_store_name: validation_results_S3_store
# </snippet>
"""

# replace example code with integration test configuration
configured_validation_results_store = yaml.load(
configured_validation_results_store_yaml
)
configured_validation_results_store["stores"]["validation_results_S3_store"][
"store_backend"
]["bucket"] = "gx-golden-path-tests"
configured_validation_results_store["stores"]["validation_results_S3_store"][
"store_backend"
]["prefix"] = "metadata/validations"

# add and set the new validation store
context.add_store(
name=configured_validation_results_store["validation_results_store_name"],
config=configured_validation_results_store["stores"]["validation_results_S3_store"],
)
with open(great_expectations_yaml_file_path) as f:
great_expectations_yaml = yaml.load(f)
great_expectations_yaml["validation_results_store_name"] = "validation_results_S3_store"
great_expectations_yaml["stores"]["validation_results_S3_store"]["store_backend"].pop(
"suppress_store_backend_id"
)
with open(great_expectations_yaml_file_path, "w") as f:
yaml.dump(great_expectations_yaml, f)

# adding data docs store
data_docs_site_yaml = """
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py add_data_docs_store">
data_docs_sites:
local_site:
class_name: SiteBuilder
show_how_to_buttons: true
store_backend:
class_name: TupleFilesystemStoreBackend
base_directory: uncommitted/data_docs/local_site/
site_index_builder:
class_name: DefaultSiteIndexBuilder
S3_site: # this is a user-selected name - you may select your own
class_name: SiteBuilder
store_backend:
class_name: TupleS3StoreBackend
bucket: <YOUR S3 BUCKET NAME>
site_index_builder:
class_name: DefaultSiteIndexBuilder
# </snippet>
"""

data_docs_site_yaml = data_docs_site_yaml.replace(
"<YOUR S3 BUCKET NAME>", "gx-demo-data-docs"
)
great_expectations_yaml_file_path = pathlib.Path(
context.root_directory, FileDataContext.GX_YML
)
with open(great_expectations_yaml_file_path) as f:
great_expectations_yaml = yaml.load(f)
great_expectations_yaml["data_docs_sites"] = yaml.load(data_docs_site_yaml)[
"data_docs_sites"
]
with open(great_expectations_yaml_file_path, "w") as f:
yaml.dump(great_expectations_yaml, f)


# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py vars">
datasource_name = "my_redshift_datasource"
connection_string = "redshift+psycopg2://<USER_NAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE>?sslmode=<SSLMODE>"
Expand Down
18 changes: 8 additions & 10 deletions tests/integration/test_script_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,23 +499,21 @@ def _check_for_skipped_tests( # noqa: C901, PLR0912 # FIXME CoP
integration_test_fixture,
) -> None:
"""Enable scripts to be skipped based on pytest invocation flags."""
TESTS_TO_SKIP_FOR_SQLA_2_0_AND_PANDAS_2_2 = [
# Pandas 2.2 dropped support for SQLAlchemy < 2 in to_sql/read_sql_table; this list
# captures the tests that rely on those code paths and therefore can't run under that
# combination. See https://github.com/great-expectations/great_expectations/pull/11417.
TESTS_TO_SKIP_UNDER_SQLA_LT_2_AND_PANDAS_GTE_2_2 = [
"expect_column_max_to_be_between_custom",
"partition_data_on_whole_table_snowflake",
"partition_data_on_whole_table_redshift",
"partition_data_on_datetime_redshift",
"partition_data_on_datetime_snowflake",
"deployment_patterns_redshift",
]
IS_RUNNING_SQLA_2_0_AND_PANDAS_2_2 = (
IS_RUNNING_SQLA_LT_2_AND_PANDAS_GTE_2_2 = (
sqlalchemy.__version__ < "2.0" and pandas.__version__ >= "2.2"
)
Comment on lines +508 to 510
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Version gating uses plain string comparisons (e.g., sqlalchemy.__version__ < "2.0"), which can produce incorrect results for versions like 2.10.0 (lexicographic vs semantic ordering). Use packaging.version.parse / packaging.version.Version (or sqlalchemy.util.compat helpers) to compare parsed versions instead of raw strings.

Copilot uses AI. Check for mistakes.
if (
IS_RUNNING_SQLA_2_0_AND_PANDAS_2_2
and integration_test_fixture.name in TESTS_TO_SKIP_FOR_SQLA_2_0_AND_PANDAS_2_2
IS_RUNNING_SQLA_LT_2_AND_PANDAS_GTE_2_2
and integration_test_fixture.name in TESTS_TO_SKIP_UNDER_SQLA_LT_2_AND_PANDAS_GTE_2_2
):
pytest.skip(
"This test requires sqlalchemy version 2.0 or higher and pandas version 2.2 or higher"
"This test requires sqlalchemy version 2.0 or higher when running pandas >= 2.2"
)
dependencies = integration_test_fixture.backend_dependencies
if not dependencies:
Expand Down
49 changes: 47 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,35 @@ def get_snowflake_private_key() -> Optional[str]:
return os.environ.get("SNOWFLAKE_PRIVATE_KEY")


def _engine_kwargs_for(connection_string: str) -> dict:
"""Per-dialect extras to forward to ``sa.create_engine`` based on the URL.

Currently only Snowflake needs special handling (key-pair auth via connect_args).
"""
if connection_string.startswith("snowflake://"):
return get_snowflake_connection_kwargs()
return {}


def get_snowflake_connection_kwargs() -> dict:
"""Extra kwargs for ``sa.create_engine`` (or any equivalent) when connecting to Snowflake.

When ``SNOWFLAKE_PRIVATE_KEY`` is set, the returned dict carries the private key in
``connect_args`` so the Snowflake connector can authenticate via key-pair auth — the
URL produced by :func:`get_snowflake_connection_url` omits the password in that case,
so the private key has to be supplied separately. Otherwise an empty dict is returned.

Intended usage::

kwargs = get_snowflake_connection_kwargs()
engine = sa.create_engine(connection_string, **kwargs)
"""
sf_private_key = os.environ.get("SNOWFLAKE_PRIVATE_KEY")
if sf_private_key:
return {"connect_args": {"private_key": sf_private_key}}
return {}


def get_redshift_connection_url() -> str:
"""Get Amazon Redshift connection url from environment variables.

Expand Down Expand Up @@ -540,7 +569,7 @@ def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 # FIXME CoP
)
connection = None
if sa:
engine = sa.create_engine(connection_string)
engine = sa.create_engine(connection_string, **_engine_kwargs_for(connection_string))
else:
logger.debug(
"Attempting to load data in to tests SqlAlchemy database, but unable to load SqlAlchemy context; " # noqa: E501 # FIXME CoP
Expand Down Expand Up @@ -698,7 +727,7 @@ def clean_up_tables_with_prefix(connection_string: str, table_prefix: str) -> Li
List of deleted tables.
"""
execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
connection_string=connection_string
connection_string=connection_string, **_engine_kwargs_for(connection_string)
)
introspection_output = introspect_db(execution_engine=execution_engine)

Expand Down Expand Up @@ -991,6 +1020,22 @@ def add_datasource(

dialect: str = db_config["dialect"]
if dialect == "snowflake":
# When SNOWFLAKE_PRIVATE_KEY is set, the connection string emitted by
# get_snowflake_connection_url() omits the password, so the connection_string
# overload of add_snowflake() can't authenticate on its own. Switch to the
# explicit-fields overload so the private key is wired into connect_args.
sf_private_key = os.environ.get("SNOWFLAKE_PRIVATE_KEY")
if sf_private_key:
return context.data_sources.add_snowflake(
name=name,
account=os.environ["SNOWFLAKE_ACCOUNT"],
user=os.environ["SNOWFLAKE_USER"],
private_key=sf_private_key,
database=os.environ["SNOWFLAKE_DATABASE"],
schema=os.environ["SNOWFLAKE_SCHEMA"],
warehouse=os.environ["SNOWFLAKE_WAREHOUSE"],
role=os.environ.get("SNOWFLAKE_ROLE") or "PUBLIC",
)
return context.data_sources.add_snowflake(name=name, connection_string=connection_string)
elif dialect == "postgres":
return context.data_sources.add_postgres(name=name, connection_string=connection_string)
Expand Down
Loading