Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/databricks/labs/lakebridge/assessments/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PipelineClass:
def __init__(self, config: PipelineConfig, executor: DatabaseManager | None):
self.config = config
self.executor = executor
self.db_path_prefix = Path(config.extract_folder)
self.db_path_prefix = Path(config.extract_folder).expanduser()
self._create_dir(self.db_path_prefix)

def execute(self) -> list[StepExecutionResult]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def get_profiler_extract_path(pipeline_config_path: Path) -> Path:
the filesystem path to the profiler extract database
"""
pipeline_config = PipelineClass.load_config_from_yaml(pipeline_config_path)
normalized_db_path = os.path.normpath(pipeline_config.extract_folder)
normalized_db_path = os.path.normpath(os.path.expanduser(pipeline_config.extract_folder))
database_path = Path(normalized_db_path) / PROFILER_DB_NAME
return database_path

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
name: synapse_assessment
version: "1.0"
# TODO: This needs to be removed.
extract_folder: "/tmp/data/synapse_assessment"
extract_folder: "~/.databricks/labs/lakebridge_profilers/synapse_assessment"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI we'll need to add the run Id as a path prefix in a follow-up PR.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why at the folder level? can we not do it at table level inside the database?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we make the profiler output configurable by the user?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

having it central will help us share state more easily across component, That was the principle behind a common central location.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see the value of that. Maybe we need to think about how to manage the state for all modules and then standardize the optionality of outputs for all.

steps:
- name: workspace_info
type: python
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/assessments/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ def test_skipped_steps(sandbox_sqlserver: DatabaseManager, pipeline_config: Pipe
assert result.error_message is None, "Skipped steps should not have error messages"


def verify_output(get_logger: Logger, path: Path):
conn = duckdb.connect(str(Path(path)) + "/" + DB_NAME)
def verify_output(get_logger, path):
conn = duckdb.connect(str(Path(path).expanduser()) + "/" + DB_NAME)

expected_tables = ["usage", "inventory", "random_data"]
logger = get_logger
Expand Down
Loading