Skip to content

Latest commit

 

History

History
477 lines (362 loc) · 19.4 KB

File metadata and controls

477 lines (362 loc) · 19.4 KB
page_title dbtcloud_global_connection Resource - dbtcloud
subcategory
description This resource can be used to create global connections as introduced in dbt Cloud in August 2024. Those connections are not linked to a specific project and can be linked to environments from different projects by using the connection_id field in the dbtcloud_environment resource.

dbtcloud_global_connection (Resource)

This resource can be used to create global connections as introduced in dbt Cloud in August 2024.

Those connections are not linked to a specific project and can be linked to environments from different projects by using the connection_id field in the dbtcloud_environment resource.

Example Usage

resource "dbtcloud_global_connection" "apache_spark" {
  name = "My Apache Spark connection"
  apache_spark = {
    method  = "http"
    host    = "my-spark-host.com"
    cluster = "my-cluster"
    // example of optional fields
    connect_timeout = 100
  }
}

resource "dbtcloud_global_connection" "athena" {
  name = "My Athena connection"
  athena = {
    region_name    = "us-east-1"
    database       = "mydatabase"
    s3_staging_dir = "s3://my-bucket/my-staging-dir/"
    // example of optional fields
    work_group     = "my_work_group"
    s3_data_dir    = "s3://my-bucket/my-data-dir/"
  }
}

// BigQuery connection with Service Account JSON authentication
resource "dbtcloud_global_connection" "bigquery" {
  name = "My BigQuery connection"
  bigquery = {
    gcp_project_id              = "my-gcp-project-id"
    timeout_seconds             = 1000
    private_key_id              = "my-private-key-id"
    private_key                 = "ABCDEFGHIJKL"
    client_email                = "my_client_email"
    client_id                   = "my_client_id"
    auth_uri                    = "my_auth_uri"
    token_uri                   = "my_token_uri"
    auth_provider_x509_cert_url = "my_auth_provider_x509_cert_url"
    client_x509_cert_url        = "my_client_x509_cert_url"
    // optional: explicitly set the auth type (defaults to service-account-json behavior when not set)
    deployment_env_auth_type    = "service-account-json"
  }
}

// BigQuery connection with External OAuth (Workload Identity Federation)
// TODO: Currently the API still requires service account fields even with external-oauth-wif
resource "dbtcloud_global_connection" "bigquery_wif" {
  name = "My BigQuery WIF connection"
  bigquery = {
    gcp_project_id           = "my-gcp-project-id"
    application_id           = "oauth_application_id"
    application_secret       = "oauth_secret_id"
    deployment_env_auth_type = "external-oauth-wif"

    // TODO: These fields should not be required for external-oauth-wif in the future
    private_key_id              = "my-private-key-id"
    private_key                 = "ABCDEFGHIJKL"
    client_email                = "my_client_email"
    client_id                   = "my_client_id"
    auth_uri                    = "my_auth_uri"
    token_uri                   = "my_token_uri"
    auth_provider_x509_cert_url = "my_auth_provider_x509_cert_url"
    client_x509_cert_url        = "my_client_x509_cert_url"
  }
}

resource "dbtcloud_global_connection" "databricks" {
  name = "My Databricks connection"
  databricks = {
    host      = "my-databricks-host.cloud.databricks.com"
    http_path = "/sql/my/http/path"
    // optional fields
    catalog       = "dbt_catalog"
    client_id     = "yourclientid"
    client_secret = "yourclientsecret"
  }
}

resource "dbtcloud_global_connection" "fabric" {
  name = "My Fabric connection"
  fabric = {
    server   = "my-fabric-server.com"
    database = "mydb"
    // optional fields
    port          = 1234
    retries       = 3
    login_timeout = 60
    query_timeout = 3600
  }
}

resource "dbtcloud_global_connection" "postgres" {
  name = "My PostgreSQL connection"
  postgres = {
    hostname = "my-postgresql-server.com"
    port     = 5432
    // optional fields
    dbname = "my_database"
    // it is possible to set settings to connect via SSH Tunnel as well
  }
}

resource "dbtcloud_global_connection" "redshift" {
  name = "My Redshift connection"
  redshift = {
    hostname = "my-redshift-connection.com"
    port     = 5432
    // optional fields
    dbname = "my_database"
    // it is possible to set settings to connect via SSH Tunnel as well
  }
}

resource "dbtcloud_global_connection" "snowflake" {
  name = "My Snowflake connection"
  // we can set Privatelink if needed
  private_link_endpoint_id = data.dbtcloud_privatelink_endpoint.my_private_link.id
  snowflake = {
    account                   = "my-snowflake-account"
    database                  = "MY_DATABASE"
    warehouse                 = "MY_WAREHOUSE"
    client_session_keep_alive = false
    allow_sso                 = true
    oauth_client_id           = "yourclientid"
    oauth_client_secret       = "yourclientsecret"
  }
}

resource "dbtcloud_global_connection" "starburst" {
  name = "My Starburst connection"
  starburst = {
    host     = "my-starburst-host.com"
    database = "mydb"
  }
}

resource "dbtcloud_global_connection" "synapse" {
  name = "My Synapse connection"
  synapse = {
    host     = "my-synapse-server.com"
    database = "mydb"
    // optional fields
    port          = 1234
    retries       = 3
    login_timeout = 60
    query_timeout = 3600
  }
}

resource "dbtcloud_global_connection" "teradata" {
  name = "My Teradata connection"
  teradata = {
    host = "my-teradata-server.com"
    tmode = "ANSI"
    // optional fields
    port = "1234"
    request_timeout = 600
    retries = 3
  }
}

Schema

Required

  • name (String) Connection name

Optional

  • apache_spark (Attributes) Apache Spark connection configuration. (see below for nested schema)
  • athena (Attributes) Athena connection configuration. (see below for nested schema)
  • bigquery (Attributes) (see below for nested schema)
  • databricks (Attributes) Databricks connection configuration (see below for nested schema)
  • fabric (Attributes) Microsoft Fabric connection configuration. (see below for nested schema)
  • oauth_configuration_id (Number) External OAuth configuration ID (only Snowflake for now)
  • postgres (Attributes) PostgreSQL connection configuration. (see below for nested schema)
  • private_link_endpoint_id (String) Private Link Endpoint ID. This ID can be found using the privatelink_endpoint data source
  • redshift (Attributes) Redshift connection configuration (see below for nested schema)
  • resource_metadata (Dynamic) Metadata for tracking resource identity during account migrations. Stored in Terraform state only and not sent to the API.
  • salesforce (Attributes) Salesforce connection configuration. (see below for nested schema)
  • snowflake (Attributes) Snowflake connection configuration (see below for nested schema)
  • starburst (Attributes) Starburst/Trino connection configuration. (see below for nested schema)
  • synapse (Attributes) Azure Synapse Analytics connection configuration. (see below for nested schema)
  • teradata (Attributes) Teradata connection configuration. (see below for nested schema)

Read-Only

  • adapter_version (String) Version of the adapter
  • id (Number) Connection Identifier
  • is_ssh_tunnel_enabled (Boolean) Whether the connection can use an SSH tunnel

Nested Schema for apache_spark

Required:

  • cluster (String) Spark cluster for the connection
  • host (String) Hostname of the connection
  • method (String) Authentication method for the connection (http or thrift).

Optional:

  • auth (String) Auth
  • connect_retries (Number) Connection retries. Default=0
  • connect_timeout (Number) Connection time out in seconds. Default=10
  • organization (String) Organization ID
  • port (Number) Port for the connection. Default=443
  • user (String) User

Nested Schema for athena

Required:

  • database (String) Specify the database (data catalog) to build models into (lowercase only).
  • region_name (String) AWS region of your Athena instance.
  • s3_staging_dir (String) S3 location to store Athena query results and metadata. Must be in the format 's3://bucket-name/path/'.

Optional:

  • num_boto3_retries (Number) Number of times to retry boto3 requests (e.g. deleting S3 files for materialized tables).
  • num_iceberg_retries (Number) Number of times to retry iceberg commit queries to fix ICEBERG_COMMIT_ERROR.
  • num_retries (Number) Number of times to retry a failing query.
  • poll_interval (Number) Interval in seconds to use for polling the status of query results in Athena.
  • s3_data_dir (String) Prefix for storing tables, if different from the connection's S3 staging directory. Must be in the format 's3://bucket-name/path/'.
  • s3_data_naming (String) How to generate table paths in the S3 data directory.
  • s3_tmp_table_dir (String) Prefix for storing temporary tables, if different from the connection's S3 data directory. Must be in the format 's3://bucket-name/path/'.
  • spark_work_group (String) Identifier of Athena Spark workgroup for running Python models.
  • work_group (String) Identifier of Athena workgroup.

Nested Schema for bigquery

Required:

  • gcp_project_id (String) The GCP project ID to use for the connection

Optional:

  • application_id (String, Sensitive) OAuth Client ID. Required when using 'external-oauth-wif' authentication.
  • application_secret (String, Sensitive) OAuth Client Secret. Required when using 'external-oauth-wif' authentication.
  • auth_provider_x509_cert_url (String) Auth Provider X509 Cert URL for the Service Account. Required when using 'service-account-json' authentication.
  • auth_uri (String) Auth URI for the Service Account. Required when using 'service-account-json' authentication.
  • client_email (String) Service Account email. Required when using 'service-account-json' authentication.
  • client_id (String) Client ID of the Service Account. Required when using 'service-account-json' authentication.
  • client_x509_cert_url (String) Client X509 Cert URL for the Service Account. Required when using 'service-account-json' authentication.
  • dataproc_cluster_name (String) Dataproc cluster name for PySpark workloads
  • dataproc_region (String) Google Cloud region for PySpark workloads on Dataproc
  • deployment_env_auth_type (String) Authentication type for deployment environments. Can be 'service-account-json' or 'external-oauth-wif'. Defaults to 'service-account-json'.
  • execution_project (String) Project to bill for query execution
  • gcs_bucket (String) URI for a Google Cloud Storage bucket to host Python code executed via Datapro
  • impersonate_service_account (String) Service Account to impersonate when running queries
  • job_creation_timeout_seconds (Number) Maximum timeout for the job creation step
  • job_execution_timeout_seconds (Number) Timeout in seconds for job execution, to be used for the bigquery_v1 adapter
  • job_retry_deadline_seconds (Number) Total number of seconds to wait while retrying the same query
  • location (String) Location to create new Datasets in
  • maximum_bytes_billed (Number) Max number of bytes that can be billed for a given BigQuery query
  • priority (String) The priority with which to execute BigQuery queries (batch or interactive)
  • private_key (String, Sensitive) Private Key for the Service Account. Required when using 'service-account-json' authentication.
  • private_key_id (String) Private Key ID for the Service Account. Required when using 'service-account-json' authentication.
  • retries (Number) Number of retries for queries
  • scopes (Set of String) OAuth scopes for the BigQuery connection
  • timeout_seconds (Number) Timeout in seconds for queries, to be used ONLY for the bigquery_v0 adapter
  • token_uri (String) Token URI for the Service Account. Required when using 'service-account-json' authentication.
  • use_latest_adapter (Boolean) Whether to use the latest bigquery_v1 adapter (use this for BQ WIF). If true, the job_execution_timeout_seconds field will be used. Warning! changing the adapter version (from legacy to latest or vice versa) is not supported.

Nested Schema for databricks

Required:

  • host (String) The hostname of the Databricks cluster or SQL warehouse.
  • http_path (String) The HTTP path of the Databricks cluster or SQL warehouse.

Optional:

  • catalog (String) Catalog name if Unity Catalog is enabled in your Databricks workspace.
  • client_id (String) Required to enable Databricks OAuth authentication for IDE developers.
  • client_secret (String) Required to enable Databricks OAuth authentication for IDE developers.

Nested Schema for fabric

Required:

  • database (String) The database to connect to for this connection.
  • server (String) The server hostname.

Optional:

  • login_timeout (Number) The number of seconds used to establish a connection before failing. Defaults to 0, which means that the timeout is disabled or uses the default system settings.
  • port (Number) The port to connect to for this connection. Default=1433
  • query_timeout (Number) The number of seconds used to wait for a query before failing. Defaults to 0, which means that the timeout is disabled or uses the default system settings.
  • retries (Number) The number of automatic times to retry a query before failing. Defaults to 1. Queries with syntax errors will not be retried. This setting can be used to overcome intermittent network issues.

Nested Schema for postgres

Required:

  • dbname (String) The database name for this connection.
  • hostname (String) The hostname of the database.

Optional:

  • port (Number) The port to connect to for this connection. Default=5432
  • ssh_tunnel (Attributes) PostgreSQL SSH Tunnel configuration (see below for nested schema)

Nested Schema for postgres.ssh_tunnel

Required:

  • hostname (String) The hostname for the SSH tunnel.
  • port (Number) The HTTP port for the SSH tunnel.
  • username (String) The username to use for the SSH tunnel.

Read-Only:

  • id (Number) The ID of the SSH tunnel connection.
  • public_key (String) The SSH public key generated to allow connecting via SSH tunnel.

Nested Schema for redshift

Required:

  • dbname (String) The database name for this connection.
  • hostname (String) The hostname of the data warehouse.

Optional:

  • port (Number) The port to connect to for this connection. Default=5432
  • ssh_tunnel (Attributes) Redshift SSH Tunnel configuration (see below for nested schema)

Nested Schema for redshift.ssh_tunnel

Required:

  • hostname (String) The hostname for the SSH tunnel.
  • port (Number) The HTTP port for the SSH tunnel.
  • username (String) The username to use for the SSH tunnel.

Read-Only:

  • id (Number) The ID of the SSH tunnel connection.
  • public_key (String) The SSH public key generated to allow connecting via SSH tunnel.

Nested Schema for salesforce

Required:

Optional:

  • data_transform_run_timeout (Number) Timeout in seconds for data transformation runs. Default=300
  • database (String) The target database name. Default=default

Nested Schema for snowflake

Required:

  • account (String) The Snowflake account name
  • database (String) The default database for the connection
  • warehouse (String) The default Snowflake Warehouse to use for the connection

Optional:

  • allow_sso (Boolean) Whether to allow Snowflake OAuth for the connection. If true, the oauth_client_id and oauth_client_secret fields must be set
  • client_session_keep_alive (Boolean) If true, the snowflake client will keep connections for longer than the default 4 hours. This is helpful when particularly long-running queries are executing (> 4 hours)
  • oauth_client_id (String, Sensitive) OAuth Client ID. Required to allow OAuth between dbt Cloud and Snowflake
  • oauth_client_secret (String, Sensitive) OAuth Client Secret. Required to allow OAuth between dbt Cloud and Snowflake
  • role (String) The Snowflake role to use when running queries on the connection

Nested Schema for starburst

Required:

  • host (String) The hostname of the account to connect to.

Optional:

  • method (String) The authentication method. Only LDAP for now.
  • port (Number) The port to connect to for this connection. Default=443

Nested Schema for synapse

Required:

  • database (String) The database to connect to for this connection.
  • host (String) The server hostname.

Optional:

  • login_timeout (Number) The number of seconds used to establish a connection before failing. Defaults to 0, which means that the timeout is disabled or uses the default system settings.
  • port (Number) The port to connect to for this connection. Default=1433
  • query_timeout (Number) The number of seconds used to wait for a query before failing. Defaults to 0, which means that the timeout is disabled or uses the default system settings.
  • retries (Number) The number of automatic times to retry a query before failing. Defaults to 1. Queries with syntax errors will not be retried. This setting can be used to overcome intermittent network issues.

Nested Schema for teradata

Required:

  • host (String) The hostname of the database.
  • tmode (String) The transaction mode to use for the connection.

Optional:

  • port (String) The port to connect to for this connection. Default=1025
  • request_timeout (Number) The number of seconds used to establish a connection before failing. Defaults to 0, which means that the timeout is disabled or uses the default system settings.
  • retries (Number) The number of automatic times to retry a query before failing. Defaults to 1. Queries with syntax errors will not be retried. This setting can be used to overcome intermittent network issues.

Import

Import is supported using the following syntax:

# A project-scoped connection can be imported as a global connection by specifying the connection ID
# Migrating from project-scoped connections to global connections could be done by:
# 1. Adding the config for the global connection and importing it (see below)
# 2. Removing the project-scoped connection from the config AND from the state
#    - CAREFUL: If the connection is removed from the config but not the state, it will be destroyed on the next apply


# using  import blocks (requires Terraform >= 1.5)
import {
  to = dbtcloud_global_connection.my_connection
  id = "connection_id"
}

import {
  to = dbtcloud_global_connection.my_connection
  id = "1234"
}

# using the older import command
terraform import dbtcloud_global_connection.my_connection "connection_id"
terraform import dbtcloud_global_connection.my_connection 1234