Skip to content

Commit 0d315e0

Browse files
authored
[MAINTENANCE] Add a sqlite execution engine and 1 sqlite specific metric. (#11042)
1 parent d35b0c5 commit 0d315e0

File tree

5 files changed

+88
-13
lines changed

5 files changed

+88
-13
lines changed

great_expectations/datasource/fluent/sqlite_datasource.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from great_expectations.datasource.fluent.sql_datasource import (
3030
TableAsset as SqlTableAsset,
3131
)
32+
from great_expectations.execution_engine.sqlite_execution_engine import SqliteExecutionEngine
3233

3334
if TYPE_CHECKING:
3435
# min version of typing_extension missing `Self`, so it can't be imported at runtime
@@ -41,6 +42,9 @@
4142
BatchParameters,
4243
DataAsset,
4344
)
45+
from great_expectations.execution_engine.sqlalchemy_execution_engine import (
46+
SqlAlchemyExecutionEngine,
47+
)
4448

4549
# This module serves as an example of how to extend _SQLAssets for specific backends. The steps are:
4650
# 1. Create a plain class with the extensions necessary for the specific backend.
@@ -161,6 +165,12 @@ class SqliteDatasource(SQLDatasource):
161165
_TableAsset: Type[SqlTableAsset] = pydantic.PrivateAttr(SqliteTableAsset)
162166
_QueryAsset: Type[SqlQueryAsset] = pydantic.PrivateAttr(SqliteQueryAsset)
163167

168+
@property
169+
@override
170+
def execution_engine_type(self) -> Type[SqlAlchemyExecutionEngine]:
171+
"""Returns the default execution engine type."""
172+
return SqliteExecutionEngine
173+
164174
@public_api
165175
@override
166176
def add_table_asset(
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from typing import Optional
2+
3+
from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa
4+
from great_expectations.compatibility.typing_extensions import override
5+
from great_expectations.core.metric_function_types import SummarizationMetricNameSuffixes
6+
from great_expectations.execution_engine.execution_engine import ExecutionEngine
7+
from great_expectations.execution_engine.sqlalchemy_execution_engine import (
8+
SqlAlchemyExecutionEngine,
9+
)
10+
from great_expectations.expectations.expectation_configuration import ExpectationConfiguration
11+
from great_expectations.expectations.metrics.column_aggregate_metric_provider import (
12+
column_aggregate_partial,
13+
)
14+
from great_expectations.expectations.metrics.column_aggregate_metrics.column_standard_deviation import ( # noqa: E501
15+
ColumnStandardDeviation as BaseColumnStandardDeviation,
16+
)
17+
from great_expectations.validator.metric_configuration import MetricConfiguration
18+
19+
20+
class SqliteExecutionEngine(SqlAlchemyExecutionEngine):
21+
"""SqlAlchemyExecutionEngine for SQLite databases."""
22+
23+
pass
24+
25+
26+
class ColumnStandardDeviation(BaseColumnStandardDeviation):
27+
"""MetricProvider Class for Aggregate Standard Deviation metric for SQLite databases."""
28+
29+
# We should change this decorator to compute this metric a completely new way
30+
@column_aggregate_partial(engine=SqlAlchemyExecutionEngine)
31+
def _sqlalchemy(cls, column, _dialect, _metrics, **kwargs):
32+
"""Sqlite Standard Deviation implementation"""
33+
mean = _metrics["column.mean"]
34+
nonnull_row_count = _metrics[
35+
f"column_values.null.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}"
36+
]
37+
standard_deviation = sa.func.sqrt(
38+
sa.func.sum((1.0 * column - mean) * (1.0 * column - mean))
39+
/ ((1.0 * nonnull_row_count) - 1.0)
40+
)
41+
return standard_deviation
42+
43+
@classmethod
44+
@override
45+
def _get_evaluation_dependencies(
46+
cls,
47+
metric: MetricConfiguration,
48+
configuration: Optional[ExpectationConfiguration] = None,
49+
execution_engine: Optional[ExecutionEngine] = None,
50+
runtime_configuration: Optional[dict] = None,
51+
):
52+
# We don't need to override this here but I wanted to show for completeness
53+
# If we are changing the decorator on the provider method or we are completely
54+
# implementing a new datasource, we'll want to override.
55+
return super()._get_evaluation_dependencies(
56+
metric=metric,
57+
configuration=configuration,
58+
execution_engine=execution_engine,
59+
runtime_configuration=runtime_configuration,
60+
)

great_expectations/expectations/metrics/column_aggregate_metrics/column_standard_deviation.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,6 @@ def _sqlalchemy(cls, column, _dialect, _metrics, **kwargs):
4848
"""SqlAlchemy Standard Deviation implementation"""
4949
if _dialect.name.lower() == GXSqlDialect.MSSQL:
5050
standard_deviation = sa.func.stdev(column)
51-
elif _dialect.name.lower() == GXSqlDialect.SQLITE:
52-
mean = _metrics["column.mean"]
53-
nonnull_row_count = _metrics[
54-
f"column_values.null.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}"
55-
]
56-
standard_deviation = sa.func.sqrt(
57-
sa.func.sum((1.0 * column - mean) * (1.0 * column - mean))
58-
/ ((1.0 * nonnull_row_count) - 1.0)
59-
)
6051
else:
6152
standard_deviation = sa.func.stddev_samp(column)
6253

great_expectations/expectations/registry.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,25 @@ def get_metric_provider(
312312
) -> Tuple[MetricProvider, Callable]:
313313
try:
314314
metric_definition = _registered_metrics[metric_name]
315+
except KeyError:
316+
raise gx_exceptions.MetricProviderError( # noqa: TRY003 # FIXME CoP
317+
f"No metric named {metric_name} found."
318+
)
319+
320+
try:
315321
return metric_definition["providers"][type(execution_engine).__name__]
316322
except KeyError:
323+
# Search up class hierarchy for a match. We skip the first entry since that's the
324+
# execution engine type itself, type(execution_engine), which we just checked and
325+
# resulted in the KeyError we're handling here.
326+
for cls in type(execution_engine).mro()[1:]:
327+
possible_key = cls.__name__
328+
if metric_definition["providers"].get(possible_key) is not None:
329+
metric_def = metric_definition["providers"][possible_key]
330+
# Register the metric definition for this engine so we don't have to search again
331+
metric_definition["providers"][type(execution_engine).__name__] = metric_def
332+
return metric_def
333+
# no matches when search hierarchy so we raise
317334
raise gx_exceptions.MetricProviderError( # noqa: TRY003 # FIXME CoP
318335
f"No provider found for {metric_name} using {type(execution_engine).__name__}"
319336
)

tests/validator/test_validation_graph.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,10 +344,7 @@ class PandasExecutionEngineStub:
344344
),
345345
)
346346

347-
assert (
348-
e.value.message
349-
== "No provider found for column_values.not_a_metric using PandasExecutionEngine"
350-
)
347+
assert e.value.message == "No metric named column_values.not_a_metric found."
351348

352349

353350
@pytest.mark.unit

0 commit comments

Comments
 (0)