Skip to content

Commit 55c56d4

Browse files
authored
Debug and fix acceptance runs (#2223)
* Fix recon tests after `sqlglot` upgrade * pipe operator is used in hash queries so test fixtures were updated to expect the same * use different spark schemas for different `reconcile` tests to get rid of spark write errors about existing schema * fix e2e recon test; we didnt see the issues as it did not run in the CI env before * delete `pytest` modifier that excluded `reconcile` tests and run all integration tests in acceptance CI
1 parent 25312ad commit 55c56d4

File tree

9 files changed

+226
-153
lines changed

9 files changed

+226
-153
lines changed

.codegen.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"toolchain": {
66
"required": ["hatch"],
77
"pre_setup": ["hatch env create"],
8-
"prepend_path": ".venv/bin"
8+
"prepend_path": ".venv/bin",
9+
"acceptance_path": "tests/integration"
910
}
1011
}

.github/scripts/setup_spark_remote.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ else
9191
fi
9292
fi
9393

94+
rm -rf "${HOME}"/spark/"${spark}"/spark-warehouse
95+
echo "Cleared old spark warehouse default directory"
9496

9597
cd "${spark}" || exit 1
9698
## check spark remote is running,if not start the spark remote

.github/workflows/acceptance.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
- name: Checkout Code
2929
uses: actions/checkout@v6
3030
with:
31-
fetch-depth: 0
31+
fetch-depth: 1
3232

3333
- name: Install Python
3434
uses: actions/setup-python@v6
@@ -45,6 +45,12 @@ jobs:
4545
chmod +x $GITHUB_WORKSPACE/.github/scripts/setup_mssql_odbc.sh
4646
$GITHUB_WORKSPACE/.github/scripts/setup_mssql_odbc.sh
4747
48+
# TODO: Migrate tests to use Databricks clusters instead of Spark local mode
49+
- name: Setup spark
50+
run: |
51+
chmod +x $GITHUB_WORKSPACE/.github/scripts/setup_spark_remote.sh
52+
$GITHUB_WORKSPACE/.github/scripts/setup_spark_remote.sh
53+
4854
- name: Run integration tests
4955
uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.4
5056
with:

src/databricks/labs/lakebridge/reconcile/recon_capture.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def generate_final_reconcile_output(
9595
metadata_config: ReconcileMetadataConfig = ReconcileMetadataConfig(),
9696
local_test_run: bool = False,
9797
) -> ReconcileOutput:
98-
_db_prefix = "default" if local_test_run else f"{metadata_config.catalog}.{metadata_config.schema}"
98+
_db_prefix = metadata_config.schema if local_test_run else f"{metadata_config.catalog}.{metadata_config.schema}"
9999
recon_df = spark.sql(
100100
f"""
101101
SELECT
@@ -237,7 +237,9 @@ def __init__(
237237
self.source_dialect = source_dialect
238238
self.ws = ws
239239
self.spark = spark
240-
self._db_prefix = "default" if local_test_run else f"{metadata_config.catalog}.{metadata_config.schema}"
240+
self._db_prefix = (
241+
metadata_config.schema if local_test_run else f"{metadata_config.catalog}.{metadata_config.schema}"
242+
)
241243

242244
def _generate_recon_main_id(
243245
self,

tests/integration/conftest.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
import logging
32
from urllib.parse import urlparse
43

@@ -30,22 +29,6 @@ def get_logger():
3029
return logger
3130

3231

33-
def pytest_collection_modifyitems(config, items):
34-
if os.getenv('TEST_ENV') != 'ACCEPTANCE':
35-
return
36-
selected_items = []
37-
deselected_items = []
38-
# Add only specific tests to run from acceptance.yml
39-
inclusions = {'assessments', 'connections', 'config', 'discovery', 'helpers', 'transpile'}
40-
for item in items:
41-
if any(f"tests/integration/{inclusion}" in str(item.fspath) for inclusion in inclusions):
42-
selected_items.append(item)
43-
else:
44-
deselected_items.append(item)
45-
items[:] = selected_items
46-
config.hook.pytest_deselected(items=deselected_items)
47-
48-
4932
@pytest.fixture(scope="session")
5033
def mock_spark() -> SparkSession:
5134
"""

tests/integration/reconcile/conftest.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
11
import logging
2+
import uuid
3+
from collections.abc import Generator
24

35
import pytest
46

57
from databricks.sdk import WorkspaceClient
68
from databricks.sdk.errors.platform import PermissionDenied
79
from databricks.sdk.service.catalog import TableInfo, SchemaInfo
10+
11+
from databricks.labs.lakebridge.config import ReconcileMetadataConfig
812
from tests.integration.debug_envgetter import TestEnvGetter
913

1014
logger = logging.getLogger(__name__)
1115

16+
DIAMONDS_COLUMNS = [
17+
("carat", "DOUBLE"),
18+
("cut", "STRING"),
19+
("color", "STRING"),
20+
("clarity", "STRING"),
21+
]
1222
DIAMONDS_ROWS_SQL = """
1323
INSERT INTO {catalog}.{schema}.{table} (carat, cut, color, clarity) VALUES
1424
(0.23, 'Ideal', 'E', 'SI2'),
@@ -24,6 +34,7 @@
2434
def recon_catalog(make_catalog) -> str:
2535
try:
2636
catalog = make_catalog().name
37+
logger.info(f"Created catalog {catalog} for recon tests")
2738
except PermissionDenied as e:
2839
logger.warning("Could not create catalog for recon tests, using 'sandbox' instead", exc_info=e)
2940
catalog = "sandbox"
@@ -34,14 +45,20 @@ def recon_catalog(make_catalog) -> str:
3445
@pytest.fixture
3546
def recon_schema(recon_catalog, make_schema) -> SchemaInfo:
3647
from_schema = make_schema(catalog_name=recon_catalog)
48+
logger.info(f"Created schema {from_schema.name} in catalog {recon_catalog} for recon tests")
3749

3850
return from_schema
3951

4052

4153
@pytest.fixture
4254
def recon_tables(ws: WorkspaceClient, recon_schema: SchemaInfo, make_table) -> tuple[TableInfo, TableInfo]:
43-
src_table = make_table(catalog_name=recon_schema.catalog_name, schema_name=recon_schema.name)
44-
tgt_table = make_table(catalog_name=recon_schema.catalog_name, schema_name=recon_schema.name)
55+
src_table = make_table(
56+
catalog_name=recon_schema.catalog_name, schema_name=recon_schema.name, columns=DIAMONDS_COLUMNS
57+
)
58+
tgt_table = make_table(
59+
catalog_name=recon_schema.catalog_name, schema_name=recon_schema.name, columns=DIAMONDS_COLUMNS
60+
)
61+
logger.info(f"Created recon tables {src_table.name}, {tgt_table.name} in schema {recon_schema.name}")
4562

4663
test_env = TestEnvGetter(True)
4764
warehouse = test_env.get("TEST_DEFAULT_WAREHOUSE_ID")
@@ -52,11 +69,32 @@ def recon_tables(ws: WorkspaceClient, recon_schema: SchemaInfo, make_table) -> t
5269
schema=recon_schema.name,
5370
table=tbl.name,
5471
)
55-
ws.statement_execution.execute_statement(
72+
exc_response = ws.statement_execution.execute_statement(
5673
warehouse_id=warehouse,
5774
catalog=recon_schema.catalog_name,
5875
schema=recon_schema.name,
5976
statement=sql,
6077
)
78+
logger.info(f"Inserted data into table {tbl.name} and got response {exc_response.status}")
6179

6280
return src_table, tgt_table
81+
82+
83+
@pytest.fixture
84+
def recon_metadata(mock_spark, report_tables_schema) -> Generator[ReconcileMetadataConfig, None, None]:
85+
rand = uuid.uuid4().hex
86+
schema = f"recon_schema_{rand}"
87+
mock_spark.sql(f"CREATE SCHEMA {schema}")
88+
main_schema, metrics_schema, details_schema = report_tables_schema
89+
90+
mock_spark.createDataFrame(data=[], schema=main_schema).write.saveAsTable(f"{schema}.MAIN")
91+
mock_spark.createDataFrame(data=[], schema=metrics_schema).write.saveAsTable(f"{schema}.METRICS")
92+
mock_spark.createDataFrame(data=[], schema=details_schema).write.saveAsTable(f"{schema}.DETAILS")
93+
94+
yield ReconcileMetadataConfig(
95+
catalog=f"recon_catalog_{rand}",
96+
schema=schema,
97+
volume=f"recon_volume_{rand}",
98+
)
99+
100+
mock_spark.sql(f"DROP SCHEMA {schema} CASCADE")

0 commit comments

Comments
 (0)