Skip to content

feat(databricks-jdbc-driver): Support M2M OAuth Authentication #9651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/drivers-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,8 @@ jobs:
DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_TOKEN: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_TOKEN }}
DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY }}
DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET }}
DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID }}
DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET }}

# Redshift
DRIVERS_TESTS_CUBEJS_DB_REDSHIFT_HOST: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_REDSHIFT_HOST }}
Expand Down
45 changes: 28 additions & 17 deletions packages/cubejs-backend-shared/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -951,25 +951,10 @@ const variables: Record<string, (...args: any) => any> = {
* Databricks Driver *
***************************************************************** */

/**
* Accept Databricks policy flag. This environment variable doesn't
* need to be split by the data source.
* TODO: Tech-debt: Remove totally someday
*/
databrickAcceptPolicy: () => {
const val = get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict();

if (val !== undefined) {
console.warn(
'The CUBEJS_DB_DATABRICKS_ACCEPT_POLICY is not needed anymore. Please, remove it'
);
}
},

/**
* Databricks jdbc-connection url.
*/
databrickUrl: ({
databricksUrl: ({
dataSource,
}: {
dataSource: string,
Expand All @@ -990,7 +975,7 @@ const variables: Record<string, (...args: any) => any> = {
/**
* Databricks jdbc-connection token.
*/
databrickToken: ({
databricksToken: ({
dataSource,
}: {
dataSource: string,
Expand All @@ -1012,6 +997,32 @@ const variables: Record<string, (...args: any) => any> = {
keyByDataSource('CUBEJS_DB_DATABRICKS_CATALOG', dataSource)
],

/**
* Databricks OAuth Client ID (Same as the service principal UUID)
*/
databricksOAuthClientId: ({
dataSource,
}: {
dataSource: string,
}) => (
process.env[
keyByDataSource('CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID', dataSource)
]
),

/**
* Databricks OAuth Client Secret.
*/
databricksOAuthClientSecret: ({
dataSource,
}: {
dataSource: string,
}) => (
process.env[
keyByDataSource('CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET', dataSource)
]
),

/** ****************************************************************
* Athena Driver *
***************************************************************** */
Expand Down
36 changes: 18 additions & 18 deletions packages/cubejs-backend-shared/test/db_env_multi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1105,31 +1105,31 @@ describe('Multiple datasources', () => {
process.env.CUBEJS_DB_DATABRICKS_URL = 'default1';
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL = 'postgres1';
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL = 'wrong1';
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('postgres1');
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('postgres1');
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

process.env.CUBEJS_DB_DATABRICKS_URL = 'default2';
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL = 'postgres2';
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL = 'wrong2';
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('postgres2');
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('postgres2');
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

delete process.env.CUBEJS_DB_DATABRICKS_URL;
delete process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL;
delete process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL;
expect(() => getEnv('databrickUrl', { dataSource: 'default' })).toThrow(
expect(() => getEnv('databricksUrl', { dataSource: 'default' })).toThrow(
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
);
expect(() => getEnv('databrickUrl', { dataSource: 'postgres' })).toThrow(
expect(() => getEnv('databricksUrl', { dataSource: 'postgres' })).toThrow(
'The CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL is required and missing.'
);
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);
});
Expand All @@ -1138,27 +1138,27 @@ describe('Multiple datasources', () => {
process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default1';
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN = 'postgres1';
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN = 'wrong1';
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('postgres1');
expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow(
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('postgres1');
expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default2';
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN = 'postgres2';
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN = 'wrong2';
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('postgres2');
expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow(
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('postgres2');
expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);

delete process.env.CUBEJS_DB_DATABRICKS_TOKEN;
delete process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN;
delete process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN;
expect(getEnv('databrickToken', { dataSource: 'default' })).toBeUndefined();
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toBeUndefined();
expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow(
expect(getEnv('databricksToken', { dataSource: 'default' })).toBeUndefined();
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toBeUndefined();
expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow(
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
);
});
Expand Down
36 changes: 18 additions & 18 deletions packages/cubejs-backend-shared/test/db_env_single.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -705,42 +705,42 @@ describe('Single datasources', () => {

test('getEnv("databrickUrl")', () => {
process.env.CUBEJS_DB_DATABRICKS_URL = 'default1';
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('default1');
expect(getEnv('databrickUrl', { dataSource: 'wrong' })).toEqual('default1');
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('default1');
expect(getEnv('databricksUrl', { dataSource: 'wrong' })).toEqual('default1');

process.env.CUBEJS_DB_DATABRICKS_URL = 'default2';
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('default2');
expect(getEnv('databrickUrl', { dataSource: 'wrong' })).toEqual('default2');
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('default2');
expect(getEnv('databricksUrl', { dataSource: 'wrong' })).toEqual('default2');

delete process.env.CUBEJS_DB_DATABRICKS_URL;
expect(() => getEnv('databrickUrl', { dataSource: 'default' })).toThrow(
expect(() => getEnv('databricksUrl', { dataSource: 'default' })).toThrow(
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
);
expect(() => getEnv('databrickUrl', { dataSource: 'postgres' })).toThrow(
expect(() => getEnv('databricksUrl', { dataSource: 'postgres' })).toThrow(
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
);
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
);
});

test('getEnv("databrickToken")', () => {
process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default1';
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('default1');
expect(getEnv('databrickToken', { dataSource: 'wrong' })).toEqual('default1');
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default1');
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('default1');
expect(getEnv('databricksToken', { dataSource: 'wrong' })).toEqual('default1');

process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default2';
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('default2');
expect(getEnv('databrickToken', { dataSource: 'wrong' })).toEqual('default2');
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default2');
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('default2');
expect(getEnv('databricksToken', { dataSource: 'wrong' })).toEqual('default2');

delete process.env.CUBEJS_DB_DATABRICKS_TOKEN;
expect(getEnv('databrickToken', { dataSource: 'default' })).toBeUndefined();
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toBeUndefined();
expect(getEnv('databrickToken', { dataSource: 'wrong' })).toBeUndefined();
expect(getEnv('databricksToken', { dataSource: 'default' })).toBeUndefined();
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toBeUndefined();
expect(getEnv('databricksToken', { dataSource: 'wrong' })).toBeUndefined();
});

test('getEnv("databricksCatalog")', () => {
Expand Down
104 changes: 97 additions & 7 deletions packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
*/
token?: string,

/**
* Databricks OAuth Client ID.
*/
oauthClientId?: string,

/**
* Databricks OAuth Client Secret.
*/
oauthClientSecret?: string,

/**
* Azure tenant Id
*/
Expand Down Expand Up @@ -152,6 +162,10 @@ export class DatabricksDriver extends JDBCDriver {

private readonly parsedConnectionProperties: ParsedConnectionProperties;

private accessToken: string | undefined;

private accessTokenExpires: number = 0;

public static dialectClass() {
return DatabricksQuery;
}
Expand Down Expand Up @@ -192,14 +206,47 @@ export class DatabricksDriver extends JDBCDriver {
let showSparkProtocolWarn = false;
let url: string =
conf?.url ||
getEnv('databrickUrl', { dataSource }) ||
getEnv('databricksUrl', { dataSource }) ||
getEnv('jdbcUrl', { dataSource });
if (url.indexOf('jdbc:spark://') !== -1) {
showSparkProtocolWarn = true;
url = url.replace('jdbc:spark://', 'jdbc:databricks://');
}

const [uid, pwd, cleanedUrl] = extractAndRemoveUidPwdFromJdbcUrl(url);
const passwd = conf?.token ||
getEnv('databricksToken', { dataSource }) ||
pwd;
const oauthClientId = conf?.oauthClientId || getEnv('databricksOAuthClientId', { dataSource });
const oauthClientSecret = conf?.oauthClientSecret || getEnv('databricksOAuthClientSecret', { dataSource });

if (oauthClientId && !oauthClientSecret) {
throw new Error('Invalid credentials: No OAuth Client Secret provided');
} else if (!oauthClientId && oauthClientSecret) {
throw new Error('Invalid credentials: No OAuth Client ID provided');
} else if (!oauthClientId && !oauthClientSecret && !passwd) {
throw new Error('No credentials provided');
}

let authProps: Record<string, any> = {};

// OAuth has an advantage over UID+PWD
// For magic numbers below - see Databricks docs:
// https://docs.databricks.com/aws/en/integrations/jdbc-oss/configure#authenticate-the-driver
if (oauthClientId) {
authProps = {
OAuth2ClientID: oauthClientId,
OAuth2Secret: oauthClientSecret,
AuthMech: 11,
Auth_Flow: 1,
};
} else {
authProps = {
UID: uid,
PWD: passwd,
AuthMech: 3,
};
}

const config: DatabricksDriverConfiguration = {
...conf,
Expand All @@ -208,11 +255,7 @@ export class DatabricksDriver extends JDBCDriver {
drivername: 'com.databricks.client.jdbc.Driver',
customClassPath: undefined,
properties: {
UID: uid,
PWD:
conf?.token ||
getEnv('databrickToken', { dataSource }) ||
pwd,
...authProps,
UserAgentEntry: 'CubeDev_Cube',
},
catalog:
Expand Down Expand Up @@ -291,8 +334,55 @@ export class DatabricksDriver extends JDBCDriver {
this.showDeprecations();
}

private async fetchAccessToken(): Promise<void> {
// Need to exchange client ID + Secret => Access token

const basicAuth = Buffer.from(`${this.config.properties.OAuth2ClientID}:${this.config.properties.OAuth2Secret}`).toString('base64');

const res = await fetch(`https://${this.parsedConnectionProperties.host}/oidc/v1/token`, {
method: 'POST',
headers: {
Authorization: `Basic ${basicAuth}`,
'Content-Type': 'application/x-www-form-urlencoded',
},
body: new URLSearchParams({
grant_type: 'client_credentials',
scope: 'all-apis',
}),
});

if (!res.ok) {
throw new Error(`Failed to get access token: ${res.statusText}`);
}

const resp = await res.json();

this.accessToken = resp.access_token;
this.accessTokenExpires = Date.now() + resp.expires_in * 1000 - 60_000;
}

private async getValidAccessToken(): Promise<string> {
if (
!this.accessToken ||
!this.accessTokenExpires ||
Date.now() >= this.accessTokenExpires
) {
await this.fetchAccessToken();
}
return this.accessToken!;
}

public override async testConnection() {
const token = `Bearer ${this.config.properties.PWD}`;
let token: string;

// Databricks docs on accessing REST API
// https://docs.databricks.com/aws/en/dev-tools/auth/oauth-m2m
if (this.config.properties.OAuth2Secret) {
const at = await this.getValidAccessToken();
token = `Bearer ${at}`;
} else {
token = `Bearer ${this.config.properties.PWD}`;
}

const res = await fetch(`https://${this.parsedConnectionProperties.host}/api/2.0/sql/warehouses/${this.parsedConnectionProperties.warehouseId}`, {
headers: { Authorization: token },
Expand Down
5 changes: 3 additions & 2 deletions packages/cubejs-databricks-jdbc-driver/src/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export async function resolveJDBCDriver(): Promise<string> {

/**
* Extract if exist UID and PWD from URL and return UID, PWD and URL without these params.
* New Databricks OSS driver throws an error if UID and PWD are provided in the URL and as a separate params
* New Databricks OSS driver throws an error if any parameter is provided in the URL and as a separate param
* passed to the driver instance. That's why we strip them out from the URL if they exist there.
* @param jdbcUrl
*/
Expand All @@ -48,7 +48,8 @@ export function extractAndRemoveUidPwdFromJdbcUrl(jdbcUrl: string): [uid: string

const cleanedUrl = jdbcUrl
.replace(/;?UID=[^;]*/i, '')
.replace(/;?PWD=[^;]*/i, '');
.replace(/;?PWD=[^;]*/i, '')
.replace(/;?AuthMech=[^;]*/i, '');

return [uid, pwd, cleanedUrl];
}
Expand Down
Loading
Loading