11from collections .abc import Callable
22from pathlib import Path
3+ from logging import Logger
34from typing import TypeAlias
45import duckdb
56import pytest
67
7- from databricks .labs .lakebridge .assessments .pipeline import PipelineClass , DB_NAME , StepExecutionStatus
8+ from databricks .labs .lakebridge .assessments .pipeline import (
9+ PipelineClass ,
10+ DB_NAME ,
11+ StepExecutionStatus ,
12+ StepExecutionResult ,
13+ )
814from databricks .labs .lakebridge .assessments .profiler import Profiler
915
1016from databricks .labs .lakebridge .assessments .profiler_config import Step , PipelineConfig
@@ -45,7 +51,20 @@ def python_failure_config(pipeline_configuration_loader: _Loader) -> PipelineCon
4551 return pipeline_configuration_loader (Path ("pipeline_config_python_failure.yml" ))
4652
4753
48- def test_run_pipeline (sandbox_sqlserver , pipeline_config , get_logger ):
54+ @pytest .fixture (scope = "module" )
55+ def empty_result_config () -> PipelineConfig :
56+ prefix = Path (__file__ ).parent
57+ config_path = f"{ prefix } /../../resources/assessments/pipeline_config_empty_result.yml"
58+ config : PipelineConfig = PipelineClass .load_config_from_yaml (config_path )
59+ updated_steps = [step .copy (extract_source = f"{ prefix } /../../{ step .extract_source } " ) for step in config .steps ]
60+ return config .copy (steps = updated_steps )
61+
62+
63+ def test_run_pipeline (
64+ sandbox_sqlserver : DatabaseManager ,
65+ pipeline_config : PipelineConfig ,
66+ get_logger : Logger ,
67+ ) -> None :
4968 pipeline = PipelineClass (config = pipeline_config , executor = sandbox_sqlserver )
5069 results = pipeline .execute ()
5170
@@ -56,10 +75,14 @@ def test_run_pipeline(sandbox_sqlserver, pipeline_config, get_logger):
5675 StepExecutionStatus .SKIPPED ,
5776 ), f"Step { result .step_name } failed with status { result .status } "
5877
59- assert verify_output (get_logger , pipeline_config .extract_folder )
78+ assert verify_output (get_logger , Path ( pipeline_config .extract_folder ) )
6079
6180
62- def test_run_sql_failure_pipeline (sandbox_sqlserver , sql_failure_config , get_logger ):
81+ def test_run_sql_failure_pipeline (
82+ sandbox_sqlserver : DatabaseManager ,
83+ sql_failure_config : PipelineConfig ,
84+ get_logger : Logger ,
85+ ) -> None :
6386 pipeline = PipelineClass (config = sql_failure_config , executor = sandbox_sqlserver )
6487 with pytest .raises (RuntimeError ) as e :
6588 pipeline .execute ()
@@ -68,7 +91,11 @@ def test_run_sql_failure_pipeline(sandbox_sqlserver, sql_failure_config, get_log
6891 assert "Pipeline execution failed due to errors in steps: invalid_sql_step" in str (e .value )
6992
7093
71- def test_run_python_failure_pipeline (sandbox_sqlserver , python_failure_config , get_logger ):
94+ def test_run_python_failure_pipeline (
95+ sandbox_sqlserver : DatabaseManager ,
96+ python_failure_config : PipelineConfig ,
97+ get_logger : Logger ,
98+ ) -> None :
7299 pipeline = PipelineClass (config = python_failure_config , executor = sandbox_sqlserver )
73100 with pytest .raises (RuntimeError ) as e :
74101 pipeline .execute ()
@@ -77,7 +104,11 @@ def test_run_python_failure_pipeline(sandbox_sqlserver, python_failure_config, g
77104 assert "Pipeline execution failed due to errors in steps: invalid_python_step" in str (e .value )
78105
79106
80- def test_run_python_dep_failure_pipeline (sandbox_sqlserver , pipeline_dep_failure_config , get_logger ):
107+ def test_run_python_dep_failure_pipeline (
108+ sandbox_sqlserver : DatabaseManager ,
109+ pipeline_dep_failure_config : PipelineConfig ,
110+ get_logger : Logger ,
111+ ):
81112 pipeline = PipelineClass (config = pipeline_dep_failure_config , executor = sandbox_sqlserver )
82113 with pytest .raises (RuntimeError ) as e :
83114 pipeline .execute ()
@@ -101,16 +132,16 @@ def test_skipped_steps(sandbox_sqlserver: DatabaseManager, pipeline_config: Pipe
101132 assert result .error_message is None , "Skipped steps should not have error messages"
102133
103134
104- def verify_output (get_logger , path ):
135+ def verify_output (get_logger : Logger , path : Path ):
105136 conn = duckdb .connect (str (Path (path )) + "/" + DB_NAME )
106137
107138 expected_tables = ["usage" , "inventory" , "random_data" ]
108139 logger = get_logger
109140 for table in expected_tables :
110141 try :
111142 result = conn .execute (f"SELECT COUNT(*) FROM { table } " ).fetchone ()
112- logger .info (f"Count for { table } : { result [ 0 ] } " )
113- if result [0 ] == 0 :
143+ logger .info (f"Count for { table } : { result } " )
144+ if result is None or result [0 ] == 0 :
114145 logger .debug (f"Table { table } is empty" )
115146 return False
116147 except duckdb .CatalogException :
@@ -122,7 +153,7 @@ def verify_output(get_logger, path):
122153 return True
123154
124155
125- def test_pipeline_config_comments ():
156+ def test_pipeline_config_comments () -> None :
126157 pipeline_w_comments = PipelineConfig (
127158 name = "warehouse_profiler" ,
128159 version = "1.0" ,
@@ -136,7 +167,7 @@ def test_pipeline_config_comments():
136167 assert pipeline_wo_comments .comment is None
137168
138169
139- def test_pipeline_step_comments ():
170+ def test_pipeline_step_comments () -> None :
140171 step_w_comment = Step (
141172 name = "step_w_comment" ,
142173 type = "sql" ,
@@ -156,3 +187,26 @@ def test_pipeline_step_comments():
156187 )
157188 assert step_w_comment .comment == "This is a step comment."
158189 assert step_wo_comment .comment is None
190+
191+
192+ def test_run_empty_result_pipeline (
193+ sandbox_sqlserver : DatabaseManager ,
194+ empty_result_config : PipelineConfig ,
195+ get_logger : Logger ,
196+ ) -> None :
197+ pipeline = PipelineClass (config = empty_result_config , executor = sandbox_sqlserver )
198+ results = pipeline .execute ()
199+
200+ # Verify step completed successfully despite empty results
201+ assert len (results ) == 1
202+ assert results == [
203+ StepExecutionResult (step_name = "empty_result_step" , status = StepExecutionStatus .COMPLETE , error_message = None )
204+ ]
205+
206+ # Verify that no table was created (processing was skipped for empty resultset)
207+ with duckdb .connect (str (Path (empty_result_config .extract_folder )) + "/" + DB_NAME ) as conn :
208+ tables = conn .execute ("SHOW TABLES" ).fetchall ()
209+ table_names = [table [0 ] for table in tables ]
210+
211+ # Table should NOT be created when resultset is empty
212+ assert "empty_result_step" not in table_names , "Empty resultset should skip table creation"
0 commit comments