|
17 | 17 | from databricks.labs.blueprint.cli import App |
18 | 18 | from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug |
19 | 19 | from databricks.labs.blueprint.installation import RootJsonValue |
| 20 | +from databricks.labs.blueprint.installer import InstallState |
20 | 21 | from databricks.labs.blueprint.tui import Prompts |
21 | 22 |
|
22 | 23 |
|
|
35 | 36 | from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME |
36 | 37 | from databricks.labs.lakebridge.transpiler.describe import TranspilersDescription |
37 | 38 | from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile |
38 | | -from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine |
| 39 | +from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig, LSPEngine |
39 | 40 | from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository |
40 | 41 | from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine |
| 42 | +from databricks.labs.lakebridge.transpiler.switch_runner import SwitchConfig, SwitchRunner |
41 | 43 | from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine |
42 | 44 |
|
43 | 45 | from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity |
@@ -534,6 +536,234 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s |
534 | 536 | ctx.connect_config.cluster_id = cluster_id |
535 | 537 |
|
536 | 538 |
|
| 539 | +@lakebridge.command |
| 540 | +def llm_transpile( |
| 541 | + *, |
| 542 | + w: WorkspaceClient, |
| 543 | + input_source: str | None = None, |
| 544 | + output_ws_folder: str | None = None, |
| 545 | + source_dialect: str | None = None, |
| 546 | + transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(), |
| 547 | +) -> None: |
| 548 | + """Transpile source code to Databricks using LLM Transpiler (Switch)""" |
| 549 | + ctx = ApplicationContext(w) |
| 550 | + ctx.add_user_agent_extra("cmd", "llm-transpile") |
| 551 | + user = ctx.current_user |
| 552 | + logger.debug(f"User: {user}") |
| 553 | + |
| 554 | + checker = _LLMTranspileConfigChecker(ctx.transpile_config, ctx.prompts, ctx.install_state, transpiler_repository) |
| 555 | + checker.use_input_source(input_source) |
| 556 | + checker.use_output_ws_folder(output_ws_folder) |
| 557 | + checker.use_source_dialect(source_dialect) |
| 558 | + params = checker.check() |
| 559 | + |
| 560 | + result = _llm_transpile(ctx, params) |
| 561 | + print(json.dumps(result)) |
| 562 | + |
| 563 | + |
| 564 | +class _LLMTranspileConfigChecker: |
| 565 | + """Helper class for 'llm-transpile' command configuration validation""" |
| 566 | + |
| 567 | + _transpile_config: TranspileConfig | None |
| 568 | + _prompts: Prompts |
| 569 | + _install_state: InstallState |
| 570 | + _transpiler_repository: TranspilerRepository |
| 571 | + _input_source: str | None = None |
| 572 | + _output_ws_folder: str | None = None |
| 573 | + _source_dialect: str | None = None |
| 574 | + |
| 575 | + def __init__( |
| 576 | + self, |
| 577 | + transpile_config: TranspileConfig | None, |
| 578 | + prompts: Prompts, |
| 579 | + install_state: InstallState, |
| 580 | + transpiler_repository: TranspilerRepository, |
| 581 | + ): |
| 582 | + self._transpile_config = transpile_config |
| 583 | + self._prompts = prompts |
| 584 | + self._install_state = install_state |
| 585 | + self._transpiler_repository = transpiler_repository |
| 586 | + |
| 587 | + @staticmethod |
| 588 | + def _validate_input_source_path(input_source: str, msg: str) -> None: |
| 589 | + """Validate the input source: it must be a path that exists.""" |
| 590 | + if not Path(input_source).exists(): |
| 591 | + raise_validation_exception(msg) |
| 592 | + |
| 593 | + def use_input_source(self, input_source: str | None) -> None: |
| 594 | + if input_source is not None: |
| 595 | + logger.debug(f"Setting input_source to: {input_source!r}") |
| 596 | + self._validate_input_source_path(input_source, f"Invalid path for '--input-source': {input_source}") |
| 597 | + self._input_source = input_source |
| 598 | + |
| 599 | + def _prompt_input_source(self) -> None: |
| 600 | + default_input = None |
| 601 | + if self._transpile_config and self._transpile_config.input_source: |
| 602 | + default_input = self._transpile_config.input_source |
| 603 | + |
| 604 | + if default_input: |
| 605 | + prompt_text = f"Enter input source path (press <enter> for default: {default_input})" |
| 606 | + prompted = self._prompts.question(prompt_text).strip() |
| 607 | + self._input_source = prompted if prompted else default_input |
| 608 | + else: |
| 609 | + prompted = self._prompts.question("Enter input source path (directory or file)").strip() |
| 610 | + self._input_source = prompted |
| 611 | + |
| 612 | + logger.debug(f"Setting input_source to: {self._input_source!r}") |
| 613 | + self._validate_input_source_path(self._input_source, f"Invalid input source: {self._input_source}") |
| 614 | + |
| 615 | + def _check_input_source(self) -> None: |
| 616 | + if self._input_source is None: |
| 617 | + self._prompt_input_source() |
| 618 | + |
| 619 | + def use_output_ws_folder(self, output_ws_folder: str | None) -> None: |
| 620 | + if output_ws_folder is not None: |
| 621 | + logger.debug(f"Setting output_ws_folder to: {output_ws_folder!r}") |
| 622 | + self._validate_output_ws_folder_path( |
| 623 | + output_ws_folder, f"Invalid path for '--output-ws-folder': {output_ws_folder}" |
| 624 | + ) |
| 625 | + self._output_ws_folder = output_ws_folder |
| 626 | + |
| 627 | + @staticmethod |
| 628 | + def _validate_output_ws_folder_path(output_ws_folder: str, msg: str) -> None: |
| 629 | + """Validate output folder is a Workspace path.""" |
| 630 | + if not output_ws_folder.startswith("/Workspace/"): |
| 631 | + raise_validation_exception(f"{msg}. Must start with /Workspace/") |
| 632 | + |
| 633 | + def _prompt_output_ws_folder(self) -> None: |
| 634 | + prompted_output_ws_folder = self._prompts.question( |
| 635 | + "Enter output folder path (Databricks Workspace path starting with /Workspace/)" |
| 636 | + ).strip() |
| 637 | + logger.debug(f"Setting output_ws_folder to: {prompted_output_ws_folder!r}") |
| 638 | + self._validate_output_ws_folder_path( |
| 639 | + prompted_output_ws_folder, f"Invalid output folder: {prompted_output_ws_folder}" |
| 640 | + ) |
| 641 | + self._output_ws_folder = prompted_output_ws_folder |
| 642 | + |
| 643 | + def _check_output_ws_folder(self) -> None: |
| 644 | + if self._output_ws_folder is None: |
| 645 | + self._prompt_output_ws_folder() |
| 646 | + |
| 647 | + def use_source_dialect(self, source_dialect: str | None) -> None: |
| 648 | + if source_dialect is not None: |
| 649 | + logger.debug(f"Setting source_dialect to: {source_dialect!r}") |
| 650 | + self._source_dialect = source_dialect |
| 651 | + |
| 652 | + def _prompt_source_dialect(self) -> None: |
| 653 | + """Prompt for source dialect from Switch dialects.""" |
| 654 | + available_dialects = self._get_switch_dialects() |
| 655 | + |
| 656 | + if not available_dialects: |
| 657 | + raise_validation_exception( |
| 658 | + "No Switch dialects available. " |
| 659 | + "Install with: databricks labs lakebridge install-transpile --include-llm-transpiler" |
| 660 | + ) |
| 661 | + |
| 662 | + logger.debug(f"Available dialects: {available_dialects!r}") |
| 663 | + source_dialect = self._prompts.choice("Select the source dialect:", list(sorted(available_dialects))) |
| 664 | + |
| 665 | + self._source_dialect = source_dialect |
| 666 | + |
| 667 | + def _check_source_dialect(self) -> None: |
| 668 | + """Validate and prompt for source dialect if not provided.""" |
| 669 | + available_dialects = self._get_switch_dialects() |
| 670 | + |
| 671 | + if self._source_dialect is None: |
| 672 | + self._prompt_source_dialect() |
| 673 | + elif self._source_dialect not in available_dialects: |
| 674 | + supported = ", ".join(sorted(available_dialects)) |
| 675 | + raise_validation_exception(f"Invalid source-dialect: '{self._source_dialect}'. " f"Available: {supported}") |
| 676 | + |
| 677 | + def _get_switch_dialects(self) -> set[str]: |
| 678 | + """Get Switch dialects from config.yml using LSPConfig.""" |
| 679 | + config_path = self._transpiler_repository.transpiler_config_path("Switch") |
| 680 | + if not config_path.exists(): |
| 681 | + return set() |
| 682 | + |
| 683 | + try: |
| 684 | + lsp_config = LSPConfig.load(config_path) |
| 685 | + return set(lsp_config.remorph.dialects) |
| 686 | + except (OSError, ValueError) as e: |
| 687 | + logger.warning(f"Failed to load Switch dialects: {e}") |
| 688 | + return set() |
| 689 | + |
| 690 | + def _get_switch_options_with_defaults(self) -> dict[str, str]: |
| 691 | + """Get default values for Switch options from config.yml.""" |
| 692 | + config_path = self._transpiler_repository.transpiler_config_path("Switch") |
| 693 | + if not config_path.exists(): |
| 694 | + return {} |
| 695 | + |
| 696 | + try: |
| 697 | + lsp_config = LSPConfig.load(config_path) |
| 698 | + except (OSError, ValueError) as e: |
| 699 | + logger.warning(f"Failed to load Switch options: {e}") |
| 700 | + return {} |
| 701 | + |
| 702 | + options_all = lsp_config.options_for_dialect("all") |
| 703 | + result = {} |
| 704 | + for option in options_all: |
| 705 | + if option.default and option.default != "<none>": |
| 706 | + result[option.flag] = option.default |
| 707 | + |
| 708 | + logger.debug(f"Loaded {len(result)} Switch options with defaults from config.yml") |
| 709 | + return result |
| 710 | + |
| 711 | + def _validate_switch_options(self, options: dict[str, str]) -> None: |
| 712 | + """Validate options against config.yml choices.""" |
| 713 | + config_path = self._transpiler_repository.transpiler_config_path("Switch") |
| 714 | + if not config_path.exists(): |
| 715 | + return |
| 716 | + |
| 717 | + try: |
| 718 | + lsp_config = LSPConfig.load(config_path) |
| 719 | + except (OSError, ValueError) as e: |
| 720 | + logger.warning(f"Failed to validate Switch options: {e}") |
| 721 | + return |
| 722 | + |
| 723 | + options_all = lsp_config.options_for_dialect("all") |
| 724 | + for option in options_all: |
| 725 | + if option.flag in options and option.choices: |
| 726 | + value = options[option.flag] |
| 727 | + if value not in option.choices: |
| 728 | + raise_validation_exception( |
| 729 | + f"Invalid value for '{option.flag}': {value!r}. " f"Must be one of: {', '.join(option.choices)}" |
| 730 | + ) |
| 731 | + |
| 732 | + def check(self) -> dict: |
| 733 | + """Validate all parameters and return configuration dict.""" |
| 734 | + logger.debug("Checking llm-transpile configuration") |
| 735 | + |
| 736 | + self._check_input_source() |
| 737 | + self._check_output_ws_folder() |
| 738 | + self._check_source_dialect() |
| 739 | + |
| 740 | + switch_options = self._get_switch_options_with_defaults() |
| 741 | + self._validate_switch_options(switch_options) |
| 742 | + |
| 743 | + wait_for_completion = str(switch_options.pop("wait_for_completion", "false")).lower() == "true" |
| 744 | + |
| 745 | + return { |
| 746 | + "input_source": self._input_source, |
| 747 | + "output_ws_folder": self._output_ws_folder, |
| 748 | + "source_dialect": self._source_dialect, |
| 749 | + "switch_options": switch_options, |
| 750 | + "wait_for_completion": wait_for_completion, |
| 751 | + } |
| 752 | + |
| 753 | + |
| 754 | +def _llm_transpile(ctx: ApplicationContext, params: dict) -> RootJsonValue: |
| 755 | + """Execute LLM transpilation via Switch job.""" |
| 756 | + config = SwitchConfig(ctx.install_state) |
| 757 | + resources = config.get_resources() |
| 758 | + job_id = config.get_job_id() |
| 759 | + |
| 760 | + runner = SwitchRunner(ctx.workspace_client, ctx.installation) |
| 761 | + |
| 762 | + return runner.run( |
| 763 | + catalog=resources["catalog"], schema=resources["schema"], volume=resources["volume"], job_id=job_id, **params |
| 764 | + ) |
| 765 | + |
| 766 | + |
537 | 767 | @lakebridge.command |
538 | 768 | def reconcile(*, w: WorkspaceClient) -> None: |
539 | 769 | """[EXPERIMENTAL] Reconciles source to Databricks datasets""" |
|
0 commit comments