-
Notifications
You must be signed in to change notification settings - Fork 78
Add llm-transpile command with Switch runner and integration tests
#2078
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 74 commits
dd726b5
febb62d
fa26b4c
6511e20
33ea7de
d0c63c3
7cb9ea9
467dea9
57298b0
09c0eb8
8439314
5f66f3f
fae9880
2ee157f
9dc4b04
7637234
b736965
bacd5f6
729cb0d
21b6629
42ce0df
81c32e5
13bcc15
8dcf8f3
83678b8
f698470
22cadc9
b3d2441
ac7e2a4
ee5c892
f0426e1
934c2e8
74923cc
084f90f
61f796f
0c1d1d5
6aeea25
468f8de
6a57570
2c3d153
f41dee8
950c1b8
6ca78ed
ba65df4
2e2abcb
bcbe4df
486250f
fc1ddca
42c9c4e
1831076
01a0c87
ccce0f2
ac382d6
4600583
7f0eaa4
0e22abe
43cc0f5
bb7c3d6
bd70638
0eb1570
eae5997
9823201
eb46f24
34c9f8f
c54e68f
23df37b
4a0bf49
c49c5b3
fab0e87
078a0bc
1e10b60
394ad9d
69f93b2
aeff475
7a336f9
2602577
b5bcbcd
6132f04
fb2ae71
53077ec
186ab59
508d6c1
363a31d
c01d605
ebf969a
eb1d497
52d6613
11903f9
a62194d
24ad1b3
4083dd6
59fe56e
ab3851e
5a26f86
efc3c64
1a64612
6ac2e68
9799a23
ddf7f1a
0323ae0
cff62b1
a5e06e3
3ce1ffd
df7b15c
a659992
459d941
1aab9bf
c9bfda0
8ec00ca
eefda07
e0814c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,3 +22,4 @@ remorph_transpile/ | |
| /linter/src/main/antlr4/library/gen/ | ||
| .databricks-login.json | ||
| .mypy_cache | ||
| .env | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,6 +36,7 @@ | |
| from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine | ||
| from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository | ||
| from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine | ||
| from databricks.labs.lakebridge.transpiler.switch_runner import SwitchRunner | ||
| from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine | ||
|
|
||
| from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity | ||
|
|
@@ -92,7 +93,7 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str): | |
|
|
||
|
|
||
| @lakebridge.command | ||
| def transpile( # pylint: disable=too-many-arguments | ||
| def transpile( | ||
| *, | ||
| w: WorkspaceClient, | ||
| transpiler_config_path: str | None = None, | ||
|
|
@@ -240,6 +241,14 @@ def use_transpiler_config_path(self, transpiler_config_path: str | None) -> None | |
| ) | ||
| self._config = dataclasses.replace(self._config, transpiler_config_path=transpiler_config_path) | ||
|
|
||
| # Switch is installed inside "/Users/<>/.lakebridge/transpilers/Switch/lsp/config.yml | ||
| if ( | ||
| self._config.transpiler_config_path is not None | ||
| and Path(self._config.transpiler_config_path).parent.parent.name == "Switch" | ||
| ): | ||
| msg = "Switch transpiler is not supported through `transpile` run `llm-transpile` instead." | ||
| raise RuntimeError(msg) | ||
|
|
||
sundarshankar89 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| def use_source_dialect(self, source_dialect: str | None) -> None: | ||
| if source_dialect is not None: | ||
| # Defer validation: depends on the transpiler config path, we'll deal with this later. | ||
|
|
@@ -637,6 +646,77 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s | |
| ctx.connect_config.cluster_id = cluster_id | ||
|
|
||
|
|
||
| @lakebridge.command | ||
| def llm_transpile( | ||
| *, | ||
| w: WorkspaceClient, | ||
| input_source: str, | ||
| output_ws_folder: str, | ||
| source_dialect: str, | ||
| ctx: ApplicationContext | None = None, | ||
| ) -> None: | ||
| """Transpile source code to Databricks using LLM Transpiler (Switch)""" | ||
| if ctx is None: | ||
| ctx = ApplicationContext(w) | ||
| del w | ||
| ctx.add_user_agent_extra("cmd", "transpile-switch") | ||
|
||
| user = ctx.current_user | ||
| logger.debug(f"User: {user}") | ||
|
|
||
| job_list = ctx.install_state.jobs | ||
| if "Switch" not in job_list: | ||
| raise RuntimeError( | ||
| "Switch Job ID not found. " | ||
| "Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first." | ||
| ) | ||
| logger.debug("Switch job ID found in InstallState") | ||
| job_id = int(job_list["Switch"]) | ||
|
|
||
| # Upload File to Volume | ||
| transpile_config = ctx.transpile_config | ||
| if transpile_config is None or transpile_config.transpiler_options is None: | ||
| raise RuntimeError( | ||
| "Transpile configuration config.yml not found in workspace." | ||
| "Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first." | ||
| ) | ||
|
|
||
| transpile_options = transpile_config.transpiler_options | ||
| logger.debug(f"Transpiler options: {transpile_options}") | ||
| if not isinstance(transpile_options, Mapping): | ||
| raise RuntimeError( | ||
| "Invalid transpile configuration: transpiler_options must be a mapping. " | ||
| "Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first." | ||
| ) | ||
| catalog = transpile_options.get("catalog", None) | ||
| schema = transpile_options.get("schema", None) | ||
| volume = transpile_options.get("volume", None) | ||
|
|
||
| if catalog is None or schema is None or volume is None: | ||
| raise RuntimeError( | ||
| "Invalid transpile configuration: catalog, schema or volume is missing. " | ||
| "Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first." | ||
| ) | ||
| assert isinstance(catalog, str) | ||
| assert isinstance(schema, str) | ||
| assert isinstance(volume, str) | ||
|
|
||
| try: | ||
| job_runner = SwitchRunner(ctx.workspace_client, ctx.installation) | ||
| volume_input_path = job_runner.upload_to_volume( | ||
| local_path=Path(input_source), catalog=catalog, schema=schema, volume=volume | ||
| ) | ||
|
|
||
| response = job_runner.run( | ||
| volume_input_path=volume_input_path, | ||
| output_ws_folder=output_ws_folder, | ||
| source_tech=source_dialect, | ||
| job_id=job_id, | ||
| ) | ||
| json.dump(response, sys.stdout, indent=2) | ||
| except Exception as ex: | ||
| raise RuntimeError(ex) from ex | ||
|
|
||
|
|
||
| @lakebridge.command | ||
| def reconcile(*, w: WorkspaceClient) -> None: | ||
| """[EXPERIMENTAL] Reconciles source to Databricks datasets""" | ||
|
|
@@ -730,6 +810,7 @@ def install_transpile( | |
| w: WorkspaceClient, | ||
| artifact: str | None = None, | ||
| interactive: str | None = None, | ||
| include_llm_transpiler: bool = False, | ||
| transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(), | ||
| ) -> None: | ||
| """Install or upgrade the Lakebridge transpilers.""" | ||
|
|
@@ -738,9 +819,13 @@ def install_transpile( | |
| ctx.add_user_agent_extra("cmd", "install-transpile") | ||
| if artifact: | ||
| ctx.add_user_agent_extra("artifact-overload", Path(artifact).name) | ||
| if include_llm_transpiler: | ||
| ctx.add_user_agent_extra("include-llm-transpiler", "true") | ||
| user = w.current_user | ||
| logger.debug(f"User: {user}") | ||
| transpile_installer = installer(w, transpiler_repository, is_interactive=is_interactive) | ||
| transpile_installer = installer( | ||
| w, transpiler_repository, is_interactive=is_interactive, include_llm=include_llm_transpiler | ||
| ) | ||
| transpile_installer.run(module="transpile", artifact=artifact) | ||
|
|
||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.