Skip to content

Commit c004117

Browse files
[BUGFIX] Fix docs-snippets CI broken by sqlalchemy-redshift 1.0.0 (#11857)
1 parent a294ae6 commit c004117

3 files changed

Lines changed: 55 additions & 170 deletions

File tree

docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py

Lines changed: 0 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -59,164 +59,6 @@
5959
expected_existing_expectations_store_yaml
6060
)
6161

62-
# adding expectations store
63-
configured_expectations_store_yaml = """
64-
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py new_expectations_store">
65-
stores:
66-
expectations_S3_store:
67-
class_name: ExpectationsStore
68-
store_backend:
69-
class_name: TupleS3StoreBackend
70-
bucket: '<YOUR S3 BUCKET NAME>'
71-
prefix: '<YOUR S3 PREFIX NAME>' # Bucket and prefix in combination must be unique across all stores
72-
73-
expectations_store_name: expectations_S3_store
74-
# </snippet>
75-
"""
76-
77-
# replace example code with integration test configuration
78-
configured_expectations_store = yaml.load(configured_expectations_store_yaml)
79-
configured_expectations_store["stores"]["expectations_S3_store"]["store_backend"][
80-
"bucket"
81-
] = "gx-golden-path-tests"
82-
configured_expectations_store["stores"]["expectations_S3_store"]["store_backend"][
83-
"prefix"
84-
] = "metadata/expectations"
85-
86-
# add and set the new expectation store
87-
context.add_store(
88-
name=configured_expectations_store["expectations_store_name"],
89-
config=configured_expectations_store["stores"]["expectations_S3_store"],
90-
)
91-
with open(great_expectations_yaml_file_path) as f:
92-
great_expectations_yaml = yaml.load(f)
93-
great_expectations_yaml["expectations_store_name"] = "expectations_S3_store"
94-
great_expectations_yaml["stores"]["expectations_S3_store"]["store_backend"].pop(
95-
"suppress_store_backend_id"
96-
)
97-
with open(great_expectations_yaml_file_path, "w") as f:
98-
yaml.dump(great_expectations_yaml, f)
99-
100-
# adding validation results store
101-
great_expectations_yaml_file_path = pathlib.Path(
102-
context.root_directory, FileDataContext.GX_YML
103-
)
104-
with open(great_expectations_yaml_file_path) as f:
105-
great_expectations_yaml = yaml.load(f)
106-
107-
stores = great_expectations_yaml["stores"]
108-
# popping the rest out so that we can do the comparison. They aren't going anywhere dont worry
109-
pop_stores = [
110-
"checkpoint_store",
111-
"expectations_store",
112-
"expectations_S3_store",
113-
"validation_definition_store",
114-
]
115-
for store in pop_stores:
116-
stores.pop(store)
117-
118-
actual_existing_validation_results_store = {}
119-
actual_existing_validation_results_store["stores"] = stores
120-
actual_existing_validation_results_store["validation_results_store_name"] = (
121-
great_expectations_yaml["validation_results_store_name"]
122-
)
123-
124-
expected_existing_validation_results_store_yaml = """
125-
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py existing_validation_results_store">
126-
stores:
127-
validation_results_store:
128-
class_name: ValidationResultsStore
129-
store_backend:
130-
class_name: TupleFilesystemStoreBackend
131-
base_directory: uncommitted/validations/
132-
133-
validation_results_store_name: validation_results_store
134-
# </snippet>
135-
"""
136-
137-
assert actual_existing_validation_results_store == yaml.load(
138-
expected_existing_validation_results_store_yaml
139-
)
140-
141-
# adding validations store
142-
configured_validation_results_store_yaml = """
143-
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py new_validation_results_store">
144-
stores:
145-
validation_results_S3_store:
146-
class_name: ValidationResultsStore
147-
store_backend:
148-
class_name: TupleS3StoreBackend
149-
bucket: '<YOUR S3 BUCKET NAME>'
150-
prefix: '<YOUR S3 PREFIX NAME>' # Bucket and prefix in combination must be unique across all stores
151-
# </snippet>
152-
153-
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py set_new_validation_results_store">
154-
validation_results_store_name: validation_results_S3_store
155-
# </snippet>
156-
"""
157-
158-
# replace example code with integration test configuration
159-
configured_validation_results_store = yaml.load(
160-
configured_validation_results_store_yaml
161-
)
162-
configured_validation_results_store["stores"]["validation_results_S3_store"][
163-
"store_backend"
164-
]["bucket"] = "gx-golden-path-tests"
165-
configured_validation_results_store["stores"]["validation_results_S3_store"][
166-
"store_backend"
167-
]["prefix"] = "metadata/validations"
168-
169-
# add and set the new validation store
170-
context.add_store(
171-
name=configured_validation_results_store["validation_results_store_name"],
172-
config=configured_validation_results_store["stores"]["validation_results_S3_store"],
173-
)
174-
with open(great_expectations_yaml_file_path) as f:
175-
great_expectations_yaml = yaml.load(f)
176-
great_expectations_yaml["validation_results_store_name"] = "validation_results_S3_store"
177-
great_expectations_yaml["stores"]["validation_results_S3_store"]["store_backend"].pop(
178-
"suppress_store_backend_id"
179-
)
180-
with open(great_expectations_yaml_file_path, "w") as f:
181-
yaml.dump(great_expectations_yaml, f)
182-
183-
# adding data docs store
184-
data_docs_site_yaml = """
185-
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py add_data_docs_store">
186-
data_docs_sites:
187-
local_site:
188-
class_name: SiteBuilder
189-
show_how_to_buttons: true
190-
store_backend:
191-
class_name: TupleFilesystemStoreBackend
192-
base_directory: uncommitted/data_docs/local_site/
193-
site_index_builder:
194-
class_name: DefaultSiteIndexBuilder
195-
S3_site: # this is a user-selected name - you may select your own
196-
class_name: SiteBuilder
197-
store_backend:
198-
class_name: TupleS3StoreBackend
199-
bucket: <YOUR S3 BUCKET NAME>
200-
site_index_builder:
201-
class_name: DefaultSiteIndexBuilder
202-
# </snippet>
203-
"""
204-
205-
data_docs_site_yaml = data_docs_site_yaml.replace(
206-
"<YOUR S3 BUCKET NAME>", "gx-demo-data-docs"
207-
)
208-
great_expectations_yaml_file_path = pathlib.Path(
209-
context.root_directory, FileDataContext.GX_YML
210-
)
211-
with open(great_expectations_yaml_file_path) as f:
212-
great_expectations_yaml = yaml.load(f)
213-
great_expectations_yaml["data_docs_sites"] = yaml.load(data_docs_site_yaml)[
214-
"data_docs_sites"
215-
]
216-
with open(great_expectations_yaml_file_path, "w") as f:
217-
yaml.dump(great_expectations_yaml, f)
218-
219-
22062
# <snippet name="docs/docusaurus/docs/snippets/aws_redshift_deployment_patterns.py vars">
22163
datasource_name = "my_redshift_datasource"
22264
connection_string = "redshift+psycopg2://<USER_NAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE>?sslmode=<SSLMODE>"

tests/integration/test_script_runner.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -499,23 +499,21 @@ def _check_for_skipped_tests( # noqa: C901, PLR0912 # FIXME CoP
499499
integration_test_fixture,
500500
) -> None:
501501
"""Enable scripts to be skipped based on pytest invocation flags."""
502-
TESTS_TO_SKIP_FOR_SQLA_2_0_AND_PANDAS_2_2 = [
502+
# Pandas 2.2 dropped support for SQLAlchemy < 2 in to_sql/read_sql_table; this list
503+
# captures the tests that rely on those code paths and therefore can't run under that
504+
# combination. See https://github.com/great-expectations/great_expectations/pull/11417.
505+
TESTS_TO_SKIP_UNDER_SQLA_LT_2_AND_PANDAS_GTE_2_2 = [
503506
"expect_column_max_to_be_between_custom",
504-
"partition_data_on_whole_table_snowflake",
505-
"partition_data_on_whole_table_redshift",
506-
"partition_data_on_datetime_redshift",
507-
"partition_data_on_datetime_snowflake",
508-
"deployment_patterns_redshift",
509507
]
510-
IS_RUNNING_SQLA_2_0_AND_PANDAS_2_2 = (
508+
IS_RUNNING_SQLA_LT_2_AND_PANDAS_GTE_2_2 = (
511509
sqlalchemy.__version__ < "2.0" and pandas.__version__ >= "2.2"
512510
)
513511
if (
514-
IS_RUNNING_SQLA_2_0_AND_PANDAS_2_2
515-
and integration_test_fixture.name in TESTS_TO_SKIP_FOR_SQLA_2_0_AND_PANDAS_2_2
512+
IS_RUNNING_SQLA_LT_2_AND_PANDAS_GTE_2_2
513+
and integration_test_fixture.name in TESTS_TO_SKIP_UNDER_SQLA_LT_2_AND_PANDAS_GTE_2_2
516514
):
517515
pytest.skip(
518-
"This test requires sqlalchemy version 2.0 or higher and pandas version 2.2 or higher"
516+
"This test requires sqlalchemy version 2.0 or higher when running pandas >= 2.2"
519517
)
520518
dependencies = integration_test_fixture.backend_dependencies
521519
if not dependencies:

tests/test_utils.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,35 @@ def get_snowflake_private_key() -> Optional[str]:
377377
return os.environ.get("SNOWFLAKE_PRIVATE_KEY")
378378

379379

380+
def _engine_kwargs_for(connection_string: str) -> dict:
381+
"""Per-dialect extras to forward to ``sa.create_engine`` based on the URL.
382+
383+
Currently only Snowflake needs special handling (key-pair auth via connect_args).
384+
"""
385+
if connection_string.startswith("snowflake://"):
386+
return get_snowflake_connection_kwargs()
387+
return {}
388+
389+
390+
def get_snowflake_connection_kwargs() -> dict:
391+
"""Extra kwargs for ``sa.create_engine`` (or any equivalent) when connecting to Snowflake.
392+
393+
When ``SNOWFLAKE_PRIVATE_KEY`` is set, the returned dict carries the private key in
394+
``connect_args`` so the Snowflake connector can authenticate via key-pair auth — the
395+
URL produced by :func:`get_snowflake_connection_url` omits the password in that case,
396+
so the private key has to be supplied separately. Otherwise an empty dict is returned.
397+
398+
Intended usage::
399+
400+
kwargs = get_snowflake_connection_kwargs()
401+
engine = sa.create_engine(connection_string, **kwargs)
402+
"""
403+
sf_private_key = os.environ.get("SNOWFLAKE_PRIVATE_KEY")
404+
if sf_private_key:
405+
return {"connect_args": {"private_key": sf_private_key}}
406+
return {}
407+
408+
380409
def get_redshift_connection_url() -> str:
381410
"""Get Amazon Redshift connection url from environment variables.
382411
@@ -540,7 +569,7 @@ def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 # FIXME CoP
540569
)
541570
connection = None
542571
if sa:
543-
engine = sa.create_engine(connection_string)
572+
engine = sa.create_engine(connection_string, **_engine_kwargs_for(connection_string))
544573
else:
545574
logger.debug(
546575
"Attempting to load data in to tests SqlAlchemy database, but unable to load SqlAlchemy context; " # noqa: E501 # FIXME CoP
@@ -698,7 +727,7 @@ def clean_up_tables_with_prefix(connection_string: str, table_prefix: str) -> Li
698727
List of deleted tables.
699728
"""
700729
execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
701-
connection_string=connection_string
730+
connection_string=connection_string, **_engine_kwargs_for(connection_string)
702731
)
703732
introspection_output = introspect_db(execution_engine=execution_engine)
704733

@@ -991,6 +1020,22 @@ def add_datasource(
9911020

9921021
dialect: str = db_config["dialect"]
9931022
if dialect == "snowflake":
1023+
# When SNOWFLAKE_PRIVATE_KEY is set, the connection string emitted by
1024+
# get_snowflake_connection_url() omits the password, so the connection_string
1025+
# overload of add_snowflake() can't authenticate on its own. Switch to the
1026+
# explicit-fields overload so the private key is wired into connect_args.
1027+
sf_private_key = os.environ.get("SNOWFLAKE_PRIVATE_KEY")
1028+
if sf_private_key:
1029+
return context.data_sources.add_snowflake(
1030+
name=name,
1031+
account=os.environ["SNOWFLAKE_ACCOUNT"],
1032+
user=os.environ["SNOWFLAKE_USER"],
1033+
private_key=sf_private_key,
1034+
database=os.environ["SNOWFLAKE_DATABASE"],
1035+
schema=os.environ["SNOWFLAKE_SCHEMA"],
1036+
warehouse=os.environ["SNOWFLAKE_WAREHOUSE"],
1037+
role=os.environ.get("SNOWFLAKE_ROLE") or "PUBLIC",
1038+
)
9941039
return context.data_sources.add_snowflake(name=name, connection_string=connection_string)
9951040
elif dialect == "postgres":
9961041
return context.data_sources.add_postgres(name=name, connection_string=connection_string)

0 commit comments

Comments
 (0)