databricks asset bundle setup

DilmurodMak · DilmurodMak · commit 254ff18d4972 · 2024-12-02T11:10:56.000-05:00
diff --git a/single_tech_samples/databricks/databricks_terraform/databricks.yml b/single_tech_samples/databricks/databricks_terraform/databricks.yml
@@ -0,0 +1,53 @@
+# Name of the bundle
+bundle:
+  name: modern-data-warehouse-databricks-asset-bundles
+
+# Including the workflows to be used in the bundle. 
+# This will deploy the workflows to the Databricks workspace and can be used to run the workflows as part of CI/CD pipeline.
+# In this case we are creating test workflows and running test in databricks workspace as part of CI/CD pipeline.
+include:
+  - single_tech_samples/databricks/databricks_terraform/workflows/*.yml
+
+# Target Environment Configuration
+# Each environment has its own resources in Azure.
+targets:
+  # Sandbox
+  sandbox:
+    presets:
+      name_prefix: "sandbox_"
+    workspace:
+      host: <DATABRICKS_WORKSPACE_URL>
+      root_path: /Workspace/sandbox/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
+    run_as:
+      service_principal_name: ${workspace.current_user.userName}
+
+  dev:
+    presets:
+      name_prefix: "dev_"
+    default: true
+    workspace:
+      host: <DATABRICKS_WORKSPACE_URL>
+      root_path: /Workspace/dev/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
+    run_as:
+      service_principal_name: ${workspace.current_user.userName}
+  
+  stg:
+    presets:
+      name_prefix: "stg_"
+    default: true
+    workspace:
+      host: <DATABRICKS_WORKSPACE_URL>
+      root_path: /Workspace/stg/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
+    run_as:
+      service_principal_name: ${workspace.current_user.userName}
+  
+  prod:
+    presets:
+      name_prefix: "prod_"
+    default: true
+    workspace:
+      host: <DATABRICKS_WORKSPACE_URL>
+      root_path: /Workspace/prod/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
+    run_as:
+      service_principal_name: ${workspace.current_user.userName}
+
diff --git a/single_tech_samples/databricks/databricks_terraform/tests/hello_test.py b/single_tech_samples/databricks/databricks_terraform/tests/hello_test.py
@@ -0,0 +1,18 @@
+# Databricks notebook source
+# COMMAND ----------
+
+import unittest
+
+class Greeter:
+    def __init__(self):
+        self.message = "Hello Test Message from Dummy File!"
+
+class TestGreeter(unittest.TestCase):
+    def test_greeter_message(self):
+        greeter = Greeter()
+        self.assertEqual(greeter.message, "Hello Test Message from Dummy File!", "The message should be 'Hello world!'")
+
+if __name__ == "__main__":
+    unittest.main(argv=['first-arg-is-ignored'], exit=False)
+
+# COMMAND ----------
diff --git a/single_tech_samples/databricks/databricks_terraform/utils/generate-databricks-workflows.sh b/single_tech_samples/databricks/databricks_terraform/utils/generate-databricks-workflows.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+###
+# The Bellow Script is used to generate Databricks Job YAML files for each test file in the tests directory.
+# The script will iterate over all the test files and generate a YAML file for each test file under the workflows directory.
+###
+
+# Change to the root directory of the project
+cd "$(dirname "$0")/.." || exit
+
+# Set the directory where YAML files will be generated
+OUTPUT_DIR="workflows/"
+TEST_FOLDER_PATH="single_tech_samples/databricks/databricks_terraform/tests"
+
+mkdir -p "$OUTPUT_DIR"
+
+# Find all _test.py files from the root directory and iterate over them
+for test_file in $(find ./tests -type f -name "*_test.py"); do
+  # Extract the base filename without extension
+  base_name=$(basename "$test_file" .py)
+
+  # Define the path to the output YAML file
+  output_file="${OUTPUT_DIR}/${base_name}.job.yml"
+
+  # Generate the YAML content
+  cat <<EOF > "$output_file"
+resources:
+  jobs:
+    ${base_name}:
+      name: ${base_name}
+      tasks:
+        - task_key: ${base_name}
+          notebook_task:
+            notebook_path: ${TEST_FOLDER_PATH}/${base_name}
+            base_parameters:
+              env: \${bundle.target}
+            source: GIT
+
+      git_source:
+        git_url: https://github.com/Azure-Samples/modern-data-warehouse-dataops
+        git_provider: gitHub
+        git_branch: main
+      queue:
+        enabled: true
+
+      job_clusters:
+        - job_cluster_key: job_cluster
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: Standard_D4ds_v5
+            autoscale:
+                min_workers: 1
+                max_workers: 4
+EOF
+
+  echo "Generated YAML job template for: $base_name -> $output_file"
+done
diff --git a/single_tech_samples/databricks/databricks_terraform/workflows/hello_test.job.yml b/single_tech_samples/databricks/databricks_terraform/workflows/hello_test.job.yml
@@ -0,0 +1,27 @@
+resources:
+  jobs:
+    hello_test:
+      name: hello_test
+      tasks:
+        - task_key: hello_test
+          notebook_task:
+            notebook_path: single_tech_samples/databricks/databricks_terraform/tests/hello_test
+            base_parameters:
+              env: ${bundle.target}
+            source: GIT
+
+      git_source:
+        git_url: https://github.com/Azure-Samples/modern-data-warehouse-dataops
+        git_provider: gitHub
+        git_branch: main
+      queue:
+        enabled: true
+
+      job_clusters:
+        - job_cluster_key: job_cluster
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: Standard_D4ds_v5
+            autoscale:
+                min_workers: 1
+                max_workers: 4