Skip to content

Commit b8027cf

Browse files
authored
Merge branch 'main' into docs/remove-outdated-teradata-mention
2 parents 40be8a8 + a678bfe commit b8027cf

File tree

21 files changed

+155
-128
lines changed

21 files changed

+155
-128
lines changed

labs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name: lakebridge
33
description: Code Transpiler and Data Reconciliation tool for Accelerating Data onboarding to Databricks from EDW, CDW and other ETL sources.
44
install:
5-
script: src/databricks/labs/lakebridge/base_install.py
5+
script: src/databricks/labs/lakebridge/install.py
66
uninstall:
77
script: src/databricks/labs/lakebridge/uninstall.py
88
entrypoint: src/databricks/labs/lakebridge/cli.py

src/databricks/labs/lakebridge/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,26 @@
1+
import logging
2+
13
from databricks.sdk.core import with_user_agent_extra, with_product
4+
from databricks.labs.blueprint.entrypoint import is_in_debug
25
from databricks.labs.blueprint.logger import install_logger
36
from databricks.labs.lakebridge.__about__ import __version__
47

8+
# Ensure that anything that imports this (or lower) submodules triggers setup of the blueprint logging.
59
install_logger()
610

11+
12+
def initialize_logging() -> None:
13+
"""Common logging initialisation for non-CLI entry-points."""
14+
# This is intended to be used by all the non-CLI entry-points, such as install/uninstall hooks and pipeline tasks.
15+
# It emulates the behaviour of the blueprint App() initialisation, except that we don't have handoff from the
16+
# Databricks CLI. As such the policy is:
17+
# - The root (and logging system in general) is left alone.
18+
# - If running in the IDE debugger, databricks.* will be set to DEBUG.
19+
# - Otherwise, databricks.* will be set to INFO.
20+
databricks_log_level = logging.DEBUG if is_in_debug() else logging.INFO
21+
logging.getLogger("databricks").setLevel(databricks_log_level)
22+
23+
724
# Add lakebridge/<version> for projects depending on lakebridge as a library
825
with_user_agent_extra("lakebridge", __version__)
926

src/databricks/labs/lakebridge/assessments/configure_assessment.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from databricks.labs.lakebridge.assessments import CONNECTOR_REQUIRED
1717

1818
logger = logging.getLogger(__name__)
19-
logger.setLevel(logging.INFO)
2019

2120

2221
def _save_to_disk(credential: dict, cred_file: Path) -> None:

src/databricks/labs/lakebridge/assessments/dashboards/execute.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,15 @@
1919
ExtractSchemaValidationCheck,
2020
build_validation_report_dataframe,
2121
)
22+
from databricks.labs.lakebridge import initialize_logging
2223

2324
logger = logging.getLogger(__name__)
2425

2526

26-
def main(*argv) -> None:
27+
def main(*argv: str) -> None:
28+
"""Lakeview Jobs task entry point: profiler_dashboards"""
29+
initialize_logging()
30+
2731
logger.debug(f"Arguments received: {argv}")
2832
assert len(sys.argv) == 4, f"Invalid number of arguments: {len(sys.argv)}"
2933
catalog_name = sys.argv[0]

src/databricks/labs/lakebridge/assessments/pipeline.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager, FetchResult
1919

2020
logger = logging.getLogger(__name__)
21-
logger.setLevel(logging.INFO)
2221

2322
DB_NAME = "profiler_extract.db"
2423

src/databricks/labs/lakebridge/base_install.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

src/databricks/labs/lakebridge/cli.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from databricks.sdk import WorkspaceClient
1717

1818
from databricks.labs.blueprint.cli import App
19-
from databricks.labs.blueprint.entrypoint import is_in_debug
19+
from databricks.labs.blueprint.entrypoint import get_logger
2020
from databricks.labs.blueprint.installation import RootJsonValue, JsonObject, JsonValue
2121
from databricks.labs.blueprint.tui import Prompts
2222

@@ -30,7 +30,6 @@
3030
from databricks.labs.lakebridge.connections.credential_manager import cred_file
3131
from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
3232
from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
33-
from databricks.labs.lakebridge.install import installer
3433
from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner
3534
from databricks.labs.lakebridge.lineage import lineage_generator
3635
from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
@@ -48,7 +47,6 @@
4847

4948
# Subclass to allow controlled access to protected methods.
5049
class Lakebridge(App):
51-
_logger_instance: logging.Logger | None = None
5250

5351
def create_workspace_client(self) -> WorkspaceClient:
5452
"""Create a workspace client, with the appropriate product and version information.
@@ -58,15 +56,25 @@ def create_workspace_client(self) -> WorkspaceClient:
5856
self._patch_databricks_host()
5957
return self._workspace_client()
6058

61-
def get_logger(self) -> logging.Logger:
62-
if self._logger_instance is None:
63-
self._logger_instance = self._logger
64-
self._logger_instance.setLevel(logging.INFO)
65-
return self._logger_instance
59+
def _log_level(self, raw: str) -> int:
60+
"""Convert the log-level provided by the Databricks CLI into a logging level supported by Python."""
61+
log_level = super()._log_level(raw)
62+
# Due to an issue in the handoff of the intended logging level from the Databricks CLI to our
63+
# application, we can't currently distinguish between --log-level=WARN and nothing at all, where we
64+
# prefer (and the application logging expects) INFO.
65+
#
66+
# Rather than default to only have WARNING logs show, it's preferable to default to INFO and have
67+
# --log-level=WARN not work for now.
68+
#
69+
# See: https://github.com/databrickslabs/lakebridge/issues/2167
70+
# TODO: Remove this once #2167 has been resolved.
71+
if log_level == logging.WARNING:
72+
log_level = logging.INFO
73+
return log_level
6674

6775

6876
lakebridge = Lakebridge(__file__)
69-
logger = lakebridge.get_logger()
77+
logger = get_logger(__file__)
7078

7179

7280
def raise_validation_exception(msg: str) -> NoReturn:
@@ -745,6 +753,9 @@ def install_transpile(
745753
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
746754
) -> None:
747755
"""Install or upgrade the Lakebridge transpilers."""
756+
# Avoid circular imports.
757+
from databricks.labs.lakebridge.install import installer # pylint: disable=cyclic-import, import-outside-toplevel
758+
748759
is_interactive = interactive_mode(interactive)
749760
ctx = ApplicationContext(w)
750761
ctx.add_user_agent_extra("cmd", "install-transpile")
@@ -804,6 +815,9 @@ def configure_reconcile(
804815
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
805816
) -> None:
806817
"""Configure the Lakebridge reconciliation module"""
818+
# Avoid circular imports.
819+
from databricks.labs.lakebridge.install import installer # pylint: disable=cyclic-import, import-outside-toplevel
820+
807821
ctx = ApplicationContext(w)
808822
ctx.add_user_agent_extra("cmd", "configure-reconcile")
809823
user = w.current_user
@@ -1017,8 +1031,4 @@ def create_profiler_dashboard(
10171031

10181032

10191033
if __name__ == "__main__":
1020-
app = lakebridge
1021-
logger = app.get_logger()
1022-
if is_in_debug():
1023-
logger.setLevel(logging.DEBUG)
1024-
app()
1034+
lakebridge()

src/databricks/labs/lakebridge/connections/database_manager.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from sqlalchemy.orm.session import Session
1313

1414
logger = logging.getLogger(__name__)
15-
logger.setLevel("INFO")
1615

1716

1817
@dataclasses.dataclass

src/databricks/labs/lakebridge/install.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
import dataclasses
22
import logging
33
import os
4+
import sys
45
import webbrowser
56
from collections.abc import Callable, Sequence, Set
67
from pathlib import Path
78
from typing import Any
89

10+
from databricks.labs.blueprint.entrypoint import get_logger
911
from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
1012
from databricks.labs.blueprint.installer import InstallState
1113
from databricks.labs.blueprint.tui import Prompts
1214
from databricks.labs.blueprint.wheels import ProductInfo
1315
from databricks.sdk import WorkspaceClient
16+
from databricks.sdk.core import with_user_agent_extra
1417
from databricks.sdk.errors import NotFound, PermissionDenied
1518

19+
from databricks.labs.lakebridge import initialize_logging
1620
from databricks.labs.lakebridge.__about__ import __version__
21+
from databricks.labs.lakebridge.cli import lakebridge
1722
from databricks.labs.lakebridge.config import (
1823
DatabaseConfig,
1924
ReconcileConfig,
@@ -430,3 +435,22 @@ def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
430435
setattr(ws.config, '_product_info', ('lakebridge', __version__))
431436

432437
return ws
438+
439+
440+
if __name__ == "__main__":
441+
with_user_agent_extra("cmd", "install")
442+
initialize_logging()
443+
444+
# Warning: ensures logger for this file is not __main__.
445+
logger = get_logger(__file__)
446+
447+
app_installer = installer(
448+
ws=lakebridge.create_workspace_client(),
449+
transpiler_repository=TranspilerRepository.user_home(),
450+
is_interactive=sys.stdin.isatty(),
451+
)
452+
if not app_installer.upgrade_installed_transpilers():
453+
logger.debug("No existing Lakebridge transpilers detected; assuming fresh installation.")
454+
455+
logger.info("Successfully Setup Lakebridge Components Locally")
456+
logger.info("For more information, please visit https://databrickslabs.github.io/lakebridge/")

src/databricks/labs/lakebridge/reconcile/execute.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,20 @@
66
from databricks.labs.blueprint.installation import Installation
77
from databricks.sdk import WorkspaceClient
88

9-
from databricks.labs.lakebridge.config import (
10-
TableRecon,
11-
ReconcileConfig,
12-
)
13-
from databricks.labs.lakebridge.reconcile.exception import (
14-
ReconciliationException,
15-
)
9+
from databricks.labs.lakebridge import initialize_logging
10+
from databricks.labs.lakebridge.config import ReconcileConfig, TableRecon
11+
from databricks.labs.lakebridge.reconcile.exception import ReconciliationException
12+
from databricks.labs.lakebridge.reconcile.recon_config import AGG_RECONCILE_OPERATION_NAME, RECONCILE_OPERATION_NAME
1613
from databricks.labs.lakebridge.reconcile.trigger_recon_aggregate_service import TriggerReconAggregateService
17-
from databricks.labs.lakebridge.reconcile.recon_config import (
18-
RECONCILE_OPERATION_NAME,
19-
AGG_RECONCILE_OPERATION_NAME,
20-
)
2114
from databricks.labs.lakebridge.reconcile.trigger_recon_service import TriggerReconService
2215

2316
logger = logging.getLogger(__name__)
2417

2518

26-
def main(*argv) -> None:
19+
def main(*argv: str) -> None:
20+
"""Lakeview Jobs task entry point: reconcile"""
21+
initialize_logging()
22+
2723
logger.debug(f"Arguments received: {argv}")
2824

2925
assert len(sys.argv) == 2, f"Invalid number of arguments: {len(sys.argv)}," f" Operation name must be specified."

0 commit comments

Comments
 (0)