Skip to content

Commit 47d1877

Browse files
Update jdbc string
Update default option Update log stmt Reorder parameters Minimise diff
1 parent 0538eac commit 47d1877

File tree

4 files changed

+76
-22
lines changed

4 files changed

+76
-22
lines changed

connectors/athena-databricks-connector/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ From the repository root, initialize the submodule and build:
4040

4141
```bash
4242
git submodule update --init
43-
cd connectors
44-
mvn clean package -DskipTests
43+
mvn clean package -DskipTests -f connectors/pom.xml
4544
```
4645

4746
The parent POM builds the `athena-jdbc` dependency from the submodule first, then the Databricks connector.
@@ -58,10 +57,11 @@ sam deploy --guided -t connectors/athena-databricks-connector/athena-databricks-
5857
| Parameter | Description | Default |
5958
|-----------|-------------|---------|
6059
| AthenaCatalogName | Lambda function name (must match pattern: `^[a-z0-9-_]{1,64}$`) | databricks |
61-
| SpillBucket | S3 bucket for spilling data | Required |
60+
| SpillBucket | S3 bucket name for spilling data (bucket name only, not URI or ARN) | Required |
6261
| SpillPrefix | Prefix within SpillBucket | athena-spill |
6362
| LambdaTimeout | Maximum Lambda invocation runtime (1-900 seconds) | 900 |
6463
| LambdaMemory | Lambda memory in MB (128-3008) | 1024 |
64+
| DatabricksHost | Databricks workspace hostname (e.g. `dbc-59ed3753-5cf0.cloud.databricks.com`) | Required |
6565
| SecretName | Name of the Secrets Manager secret containing the Databricks personal access token | Required |
6666
| DatabricksDefaultDatabase | Default Databricks Unity Catalog database (catalog.schema) | default |
6767
| DisableSpillEncryption | Disable encryption for spilled data | false |

connectors/athena-databricks-connector/athena-databricks-connector.yaml

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,38 @@ Parameters:
1919
Type: String
2020
AllowedPattern: ^[a-z0-9-_]{1,64}$
2121
Default: databricks
22+
DatabricksHost:
23+
Description: "The Databricks workspace hostname (e.g. dbc-59ed3753-5cf0.cloud.databricks.com). Do not include https:// or port."
24+
Type: String
25+
AllowedPattern: ^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$
26+
ConstraintDescription: "Enter only the hostname (e.g. dbc-59ed3753-5cf0.cloud.databricks.com), not a full URL."
27+
DatabricksHttpPath:
28+
Description: "The HTTP path for the Databricks SQL warehouse API (e.g., /sql/1.0/warehouses/abc123)."
29+
Type: String
30+
DatabricksConnCatalog:
31+
Description: "The Databricks Unity Catalog name to connect to."
32+
Type: String
33+
Default: workspace
34+
SecretName:
35+
Description: "The name of the secret in AWS Secrets Manager that contains the Databricks personal access token."
36+
Type: String
2237
SpillBucket:
23-
Description: 'The name of the bucket where this function can spill data.'
38+
Description: 'The name of the bucket where this function can spill data. Use only the bucket name (e.g. my-athena-spill), not the full S3 URI or ARN.'
2439
Type: String
40+
AllowedPattern: ^[a-z0-9][a-z0-9.\-]{1,61}[a-z0-9]$
41+
ConstraintDescription: 'Enter only the bucket name (e.g. my-athena-spill), not an S3 URI (s3://...) or ARN.'
2542
SpillPrefix:
2643
Description: 'The prefix within SpillBucket where this function can spill data.'
2744
Type: String
2845
Default: athena-spill
46+
DatabricksDefaultDatabase:
47+
Description: "The default Databricks Unity Catalog database (catalog.schema) to use when not specified in the query."
48+
Default: default
49+
Type: String
50+
DatabricksFetchSize:
51+
Description: "Number of rows fetched per JDBC round trip. Higher values improve throughput but use more memory."
52+
Default: 10000
53+
Type: Number
2954
LambdaTimeout:
3055
Description: "Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)"
3156
Default: 900
@@ -34,23 +59,6 @@ Parameters:
3459
Description: "Lambda memory in MB (min 128 - 3008 max)."
3560
Default: 1024
3661
Type: Number
37-
SecretName:
38-
Description: "The name of the secret in AWS Secrets Manager that contains the Databricks personal access token."
39-
Type: String
40-
DatabricksHttpPath:
41-
Description: "The HTTP path for the Databricks SQL warehouse API (e.g., /sql/1.0/warehouses/abc123)."
42-
Type: String
43-
DatabricksConnCatalog:
44-
Description: "The Databricks Unity Catalog name to connect to."
45-
Type: String
46-
DatabricksFetchSize:
47-
Description: "Number of rows fetched per JDBC round trip. Higher values improve throughput but use more memory."
48-
Default: 10000
49-
Type: Number
50-
DatabricksDefaultDatabase:
51-
Description: "The default Databricks Unity Catalog database (catalog.schema) to use when not specified in the query."
52-
Default: default
53-
Type: String
5462
DisableSpillEncryption:
5563
Description: "WARNING: If set to 'true' encryption for spilled data is disabled."
5664
Default: "false"
@@ -65,6 +73,7 @@ Resources:
6573
ImageUri: connectorconfig:v1
6674
Environment:
6775
Variables:
76+
databricks_host: !Ref DatabricksHost
6877
disable_spill_encryption: !Ref DisableSpillEncryption
6978
spill_bucket: !Ref SpillBucket
7079
spill_prefix: !Ref SpillPrefix

connectors/athena-databricks-connector/src/main/java/com/amazonaws/athena/connectors/databricks/DatabricksConstants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ private DatabricksConstants() {}
3838
/** Databricks SQL quote character for identifiers. */
3939
public static final String QUOTE_CHARACTER = "`";
4040

41+
/** Environment variable key for the Databricks workspace hostname. */
42+
public static final String HOST_CONFIG_KEY = "databricks_host";
43+
/** Environment variable key for the Secrets Manager secret name containing the Databricks PAT. */
44+
public static final String SECRET_NAME_CONFIG_KEY = "secret_manager_databricks_token_name";
45+
/** Environment variable key for the default Databricks database. */
46+
public static final String DEFAULT_DATABASE_CONFIG_KEY = "databricks_default_database";
4147
/** Environment variable key for the Databricks SQL warehouse HTTP path. */
4248
public static final String HTTP_PATH_CONFIG_KEY = "databricks_http_path";
4349
/** Environment variable key for the Databricks Unity Catalog name. */

connectors/athena-databricks-connector/src/main/java/com/amazonaws/athena/connectors/databricks/DatabricksEnvironmentProperties.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,54 @@
2020
package com.amazonaws.athena.connectors.databricks;
2121

2222
import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties;
23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
2325

2426
import java.util.Map;
2527

28+
import static com.amazonaws.athena.connectors.databricks.DatabricksConstants.DATABRICKS_DEFAULT_PORT;
29+
import static com.amazonaws.athena.connectors.databricks.DatabricksConstants.DEFAULT_DATABASE_CONFIG_KEY;
30+
import static com.amazonaws.athena.connectors.databricks.DatabricksConstants.HOST_CONFIG_KEY;
31+
import static com.amazonaws.athena.connectors.databricks.DatabricksConstants.SECRET_NAME_CONFIG_KEY;
32+
2633
/**
2734
* Provides Databricks-specific JDBC environment properties.
28-
* Configures the JDBC connection string prefix for Databricks.
35+
* Builds the connection string from individual environment variables (host, database, secret)
36+
* so users don't need to provide the full JDBC URL.
2937
*/
3038
public class DatabricksEnvironmentProperties extends JdbcEnvironmentProperties
3139
{
40+
private static final Logger LOGGER = LoggerFactory.getLogger(DatabricksEnvironmentProperties.class);
41+
/**
42+
* Builds the {@code default} connection string required by the Athena Federation SDK
43+
* from individual Lambda environment variables.
44+
*
45+
* <p>If a {@code default} env var is already present (e.g. via a Glue connection),
46+
* it is left unchanged. Otherwise, the connection string is assembled from:
47+
* <ul>
48+
* <li>{@code databricks_host} — workspace hostname</li>
49+
* <li>{@code databricks_default_database} — target database (defaults to {@code "default"})</li>
50+
* <li>{@code secret_manager_databricks_token_name} — Secrets Manager secret for PAT injection</li>
51+
* </ul>
52+
*
53+
* @return environment map containing the {@code default} connection string and all Lambda env vars
54+
*/
55+
@Override
56+
public Map<String, String> createEnvironment()
57+
{
58+
Map<String, String> env = super.createEnvironment();
59+
if (!env.containsKey("default") && env.containsKey(HOST_CONFIG_KEY)) {
60+
String host = env.get(HOST_CONFIG_KEY);
61+
String database = env.getOrDefault(DEFAULT_DATABASE_CONFIG_KEY, "default");
62+
String secret = env.getOrDefault(SECRET_NAME_CONFIG_KEY, "");
63+
String connectionString = String.format("databricks://jdbc:databricks://%s:%d/%s${%s}",
64+
host, DATABRICKS_DEFAULT_PORT, database, secret);
65+
env.put("default", connectionString);
66+
LOGGER.trace("Built JDBC connection string: {}", connectionString);
67+
}
68+
return env;
69+
}
70+
3271
/**
3372
* {@inheritDoc}
3473
*/

0 commit comments

Comments
 (0)