Skip to content

Commit

Permalink
databricks asset bundle setup
Browse files Browse the repository at this point in the history
  • Loading branch information
DilmurodMak committed Dec 2, 2024
1 parent 3754af7 commit 254ff18
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 0 deletions.
53 changes: 53 additions & 0 deletions single_tech_samples/databricks/databricks_terraform/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Name of the bundle
bundle:
name: modern-data-warehouse-databricks-asset-bundles

# Including the workflows to be used in the bundle.
# This will deploy the workflows to the Databricks workspace and can be used to run the workflows as part of CI/CD pipeline.
# In this case we are creating test workflows and running test in databricks workspace as part of CI/CD pipeline.
include:
- single_tech_samples/databricks/databricks_terraform/workflows/*.yml

# Target Environment Configuration
# Each environment has its own resources in Azure.
targets:
# Sandbox
sandbox:
presets:
name_prefix: "sandbox_"
workspace:
host: <DATABRICKS_WORKSPACE_URL>
root_path: /Workspace/sandbox/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
run_as:
service_principal_name: ${workspace.current_user.userName}

dev:
presets:
name_prefix: "dev_"
default: true
workspace:
host: <DATABRICKS_WORKSPACE_URL>
root_path: /Workspace/dev/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
run_as:
service_principal_name: ${workspace.current_user.userName}

stg:
presets:
name_prefix: "stg_"
default: true
workspace:
host: <DATABRICKS_WORKSPACE_URL>
root_path: /Workspace/stg/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
run_as:
service_principal_name: ${workspace.current_user.userName}

prod:
presets:
name_prefix: "prod_"
default: true
workspace:
host: <DATABRICKS_WORKSPACE_URL>
root_path: /Workspace/prod/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
run_as:
service_principal_name: ${workspace.current_user.userName}

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Databricks notebook source
# COMMAND ----------

import unittest

class Greeter:
def __init__(self):
self.message = "Hello Test Message from Dummy File!"

class TestGreeter(unittest.TestCase):
def test_greeter_message(self):
greeter = Greeter()
self.assertEqual(greeter.message, "Hello Test Message from Dummy File!", "The message should be 'Hello world!'")

if __name__ == "__main__":
unittest.main(argv=['first-arg-is-ignored'], exit=False)

# COMMAND ----------
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash

###
# The Bellow Script is used to generate Databricks Job YAML files for each test file in the tests directory.
# The script will iterate over all the test files and generate a YAML file for each test file under the workflows directory.
###

# Change to the root directory of the project
cd "$(dirname "$0")/.." || exit

# Set the directory where YAML files will be generated
OUTPUT_DIR="workflows/"
TEST_FOLDER_PATH="single_tech_samples/databricks/databricks_terraform/tests"

mkdir -p "$OUTPUT_DIR"

# Find all _test.py files from the root directory and iterate over them
for test_file in $(find ./tests -type f -name "*_test.py"); do
# Extract the base filename without extension
base_name=$(basename "$test_file" .py)

# Define the path to the output YAML file
output_file="${OUTPUT_DIR}/${base_name}.job.yml"

# Generate the YAML content
cat <<EOF > "$output_file"
resources:
jobs:
${base_name}:
name: ${base_name}
tasks:
- task_key: ${base_name}
notebook_task:
notebook_path: ${TEST_FOLDER_PATH}/${base_name}
base_parameters:
env: \${bundle.target}
source: GIT
git_source:
git_url: https://github.com/Azure-Samples/modern-data-warehouse-dataops
git_provider: gitHub
git_branch: main
queue:
enabled: true
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: Standard_D4ds_v5
autoscale:
min_workers: 1
max_workers: 4
EOF

echo "Generated YAML job template for: $base_name -> $output_file"
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
resources:
jobs:
hello_test:
name: hello_test
tasks:
- task_key: hello_test
notebook_task:
notebook_path: single_tech_samples/databricks/databricks_terraform/tests/hello_test
base_parameters:
env: ${bundle.target}
source: GIT

git_source:
git_url: https://github.com/Azure-Samples/modern-data-warehouse-dataops
git_provider: gitHub
git_branch: main
queue:
enabled: true

job_clusters:
- job_cluster_key: job_cluster
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: Standard_D4ds_v5
autoscale:
min_workers: 1
max_workers: 4

0 comments on commit 254ff18

Please sign in to comment.