Skip to content

Commit bb3b56b

Browse files
jimdowlingclaude
andcommitted
[FSTORE-2036] PR 4a — Python SDK support for Unity Catalog OAuth M2M
https://hopsworks.atlassian.net/browse/FSTORE-2036 PR 4 of 4 for FSTORE-2036, hopsworks-api half. Extend UnityCatalogConnector so the Python SDK round-trips the new OAuth fields the backend (PR 1 / PR 2) and frontend (PR 3) added. Legacy PAT-only construction keeps working unchanged. Constructor gains auth_method, client_id, client_secret, oauth_endpoint, account_id, account_host, has_access_token, and has_client_secret. auth_method defaults to "PAT" when absent so existing code paths and fixtures that construct connectors with just access_token keep producing PAT connectors. When the caller asks for OAUTH_M2M without specifying oauth_endpoint, it defaults to "WORKSPACE", matching the frontend default. has_access_token and has_client_secret are write-only-friendly booleans: the server emits them on read so a caller can tell whether a secret is on file without ever seeing it. When constructed locally with a secret in hand, has_* falls back to "is the secret non-None" so client code that builds a connector in-process still reports the correct state. from_response_json keeps using humps.decamelize + **kwargs splat; the new fields are picked up by name. The existing get_unity_catalog fixture is updated to match the post-PR-1 backend wire format (hasAccessToken: true on read; no decrypted access_token in the response). Two new fixtures (get_unity_catalog_oauth_workspace, get_unity_catalog_oauth_account) cover the OAuth modes. Tests extended from 4 to 8 in TestUnityCatalogConnector. New cases: from_response_json for OAuth workspace and OAuth account modes; legacy construction defaulting to PAT (no auth_method supplied); OAUTH_M2M construction defaulting oauth_endpoint to WORKSPACE. uv run pytest TestUnityCatalogConnector — 8/8 passing. uv run ruff check / docsig — clean. Signed-off-by: Jim Dowling <jim@hopsworks.ai> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 95e9507 commit bb3b56b

3 files changed

Lines changed: 236 additions & 2 deletions

File tree

python/hsfs/storage_connector.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3365,13 +3365,49 @@ def __init__(
33653365
default_catalog: str | None = None,
33663366
aws_region: str | None = None,
33673367
arguments: list[dict[str, Any]] | dict[str, Any] | None = None,
3368+
auth_method: str | None = None,
3369+
client_id: str | None = None,
3370+
client_secret: str | None = None,
3371+
oauth_endpoint: str | None = None,
3372+
account_id: str | None = None,
3373+
account_host: str | None = None,
3374+
has_access_token: bool | None = None,
3375+
has_client_secret: bool | None = None,
33683376
**kwargs: Any,
33693377
) -> None:
33703378
super().__init__(id, name, description, featurestore_id)
33713379
self._workspace_url = workspace_url
33723380
self._access_token = access_token
33733381
self._default_catalog = default_catalog
33743382
self._aws_region = aws_region
3383+
# auth_method defaults to 'PAT' for back-compat with connectors created
3384+
# before OAuth support landed; oauth_endpoint defaults to 'WORKSPACE'
3385+
# when caller asks for OAUTH_M2M without specifying one.
3386+
if auth_method is None:
3387+
self._auth_method = "PAT"
3388+
else:
3389+
self._auth_method = auth_method
3390+
self._client_id = client_id
3391+
self._client_secret = client_secret
3392+
if self._auth_method == "OAUTH_M2M" and oauth_endpoint is None:
3393+
self._oauth_endpoint = "WORKSPACE"
3394+
else:
3395+
self._oauth_endpoint = oauth_endpoint
3396+
self._account_id = account_id
3397+
self._account_host = account_host
3398+
# has_access_token / has_client_secret are server-emitted booleans that
3399+
# let callers tell whether a secret is on file without exposing it.
3400+
# They are never sent back on write (the backend ignores them).
3401+
self._has_access_token = (
3402+
bool(has_access_token)
3403+
if has_access_token is not None
3404+
else (access_token is not None)
3405+
)
3406+
self._has_client_secret = (
3407+
bool(has_client_secret)
3408+
if has_client_secret is not None
3409+
else (client_secret is not None)
3410+
)
33753411
if isinstance(arguments, list):
33763412
# Match the other connectors in this file: tolerate name-only entries
33773413
# and skip entries without a name. Backend serialises these as a list
@@ -3418,6 +3454,74 @@ def arguments(self) -> dict[str, Any]:
34183454
"""Additional Unity Catalog connection arguments passed through to the Arrow Flight server."""
34193455
return self._arguments
34203456

3457+
@public
3458+
@property
3459+
def auth_method(self) -> str:
3460+
"""Authentication method for the Databricks workspace, either "PAT" or "OAUTH_M2M".
3461+
3462+
Defaults to "PAT" for connectors created before OAuth support landed.
3463+
"""
3464+
return self._auth_method
3465+
3466+
@public
3467+
@property
3468+
def client_id(self) -> str | None:
3469+
"""Databricks service principal client ID, only set when [`auth_method`][hsfs.storage_connector.UnityCatalogConnector.auth_method] is "OAUTH_M2M"."""
3470+
return self._client_id
3471+
3472+
@public
3473+
@property
3474+
def client_secret(self) -> str | None:
3475+
"""Databricks service principal client secret.
3476+
3477+
Write-only on the backend: this property is only populated when the
3478+
caller has just constructed the connector locally with a secret in hand.
3479+
Server responses never carry it; use [`has_client_secret`][hsfs.storage_connector.UnityCatalogConnector.has_client_secret] to test
3480+
whether a secret is on file.
3481+
"""
3482+
return self._client_secret
3483+
3484+
@public
3485+
@property
3486+
def oauth_endpoint(self) -> str | None:
3487+
"""OAuth token endpoint flavour, either "WORKSPACE" or "ACCOUNT".
3488+
3489+
Only set when [`auth_method`][hsfs.storage_connector.UnityCatalogConnector.auth_method] is "OAUTH_M2M".
3490+
"""
3491+
return self._oauth_endpoint
3492+
3493+
@public
3494+
@property
3495+
def account_id(self) -> str | None:
3496+
"""Databricks account ID, only set when [`oauth_endpoint`][hsfs.storage_connector.UnityCatalogConnector.oauth_endpoint] is "ACCOUNT"."""
3497+
return self._account_id
3498+
3499+
@public
3500+
@property
3501+
def account_host(self) -> str | None:
3502+
"""Databricks account-console host, only set when [`oauth_endpoint`][hsfs.storage_connector.UnityCatalogConnector.oauth_endpoint] is "ACCOUNT"."""
3503+
return self._account_host
3504+
3505+
@public
3506+
@property
3507+
def has_access_token(self) -> bool:
3508+
"""True iff a personal access token is on file for this connector.
3509+
3510+
The server never returns the access token itself on read; this boolean
3511+
lets callers tell whether one exists without exposing the secret.
3512+
"""
3513+
return self._has_access_token
3514+
3515+
@public
3516+
@property
3517+
def has_client_secret(self) -> bool:
3518+
"""True iff a client secret is on file for this connector.
3519+
3520+
The server never returns the client secret itself on read; this boolean
3521+
lets callers tell whether one exists without exposing the secret.
3522+
"""
3523+
return self._has_client_secret
3524+
34213525
@public
34223526
def connector_options(self) -> dict[str, Any]:
34233527
"""Return UC connector options shaped for external library use."""

python/tests/fixtures/storage_connector_fixtures.json

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@
144144
"name": "test_unity_catalog",
145145
"storageConnectorType": "UNITY_CATALOG",
146146
"workspace_url": "https://test.cloud.databricks.com",
147-
"access_token": "dapi-test-token",
147+
"authMethod": "PAT",
148+
"hasAccessToken": true,
148149
"default_catalog": "test_catalog",
149150
"aws_region": "us-west-2",
150151
"arguments": [{"name": "arg1", "value": "val1"}]
@@ -185,6 +186,63 @@
185186
},
186187
"headers": null
187188
},
189+
"get_unity_catalog_oauth_workspace": {
190+
"response": {
191+
"type": "featurestoreUnityCatalogConnectorDTO",
192+
"featurestoreId": 67,
193+
"id": 1,
194+
"name": "test_unity_catalog_oauth",
195+
"storageConnectorType": "UNITY_CATALOG",
196+
"workspace_url": "https://test.cloud.databricks.com",
197+
"authMethod": "OAUTH_M2M",
198+
"oauthEndpoint": "WORKSPACE",
199+
"clientId": "test-sp-client-id",
200+
"hasClientSecret": true,
201+
"default_catalog": "test_catalog"
202+
},
203+
"method": "GET",
204+
"path_params": [
205+
"project",
206+
"119",
207+
"featurestores",
208+
67,
209+
"storageconnectors",
210+
"test_unity_catalog_oauth"
211+
],
212+
"query_params": {
213+
"temporaryCredentials": true
214+
},
215+
"headers": null
216+
},
217+
"get_unity_catalog_oauth_account": {
218+
"response": {
219+
"type": "featurestoreUnityCatalogConnectorDTO",
220+
"featurestoreId": 67,
221+
"id": 2,
222+
"name": "test_unity_catalog_account",
223+
"storageConnectorType": "UNITY_CATALOG",
224+
"workspace_url": "https://test.cloud.databricks.com",
225+
"authMethod": "OAUTH_M2M",
226+
"oauthEndpoint": "ACCOUNT",
227+
"clientId": "test-sp-client-id",
228+
"hasClientSecret": true,
229+
"accountId": "12345678-1234-1234-1234-1234567890ab",
230+
"accountHost": "accounts.cloud.databricks.com"
231+
},
232+
"method": "GET",
233+
"path_params": [
234+
"project",
235+
"119",
236+
"featurestores",
237+
67,
238+
"storageconnectors",
239+
"test_unity_catalog_account"
240+
],
241+
"query_params": {
242+
"temporaryCredentials": true
243+
},
244+
"headers": null
245+
},
188246
"get_redshift_basic_info": {
189247
"response": {
190248
"type": "featurestoreRedshiftConnectorDTO",

python/tests/test_storage_connector.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,51 @@ def test_from_response_json(self, backend_fixtures):
160160
assert sc.description == "Unity Catalog connector description"
161161
assert sc.type == storage_connector.StorageConnector.UNITY_CATALOG
162162
assert sc.workspace_url == "https://test.cloud.databricks.com"
163-
assert sc.access_token == "dapi-test-token"
163+
# access_token itself is write-only on the backend; the server never
164+
# returns it on GET. hasAccessToken signals that one is on file.
165+
assert sc.access_token is None
166+
assert sc.has_access_token is True
167+
assert sc.auth_method == "PAT"
164168
assert sc.default_catalog == "test_catalog"
165169
assert sc.aws_region == "us-west-2"
166170
assert sc.arguments == {"arg1": "val1"}
167171

172+
def test_from_response_json_oauth_workspace(self, backend_fixtures):
173+
# Arrange
174+
json = backend_fixtures["storage_connector"][
175+
"get_unity_catalog_oauth_workspace"
176+
]["response"]
177+
178+
# Act
179+
sc = storage_connector.StorageConnector.from_response_json(json)
180+
181+
# Assert
182+
assert sc.auth_method == "OAUTH_M2M"
183+
assert sc.oauth_endpoint == "WORKSPACE"
184+
assert sc.client_id == "test-sp-client-id"
185+
assert sc.client_secret is None
186+
assert sc.has_client_secret is True
187+
assert sc.account_id is None
188+
assert sc.account_host is None
189+
190+
def test_from_response_json_oauth_account(self, backend_fixtures):
191+
# Arrange
192+
json = backend_fixtures["storage_connector"]["get_unity_catalog_oauth_account"][
193+
"response"
194+
]
195+
196+
# Act
197+
sc = storage_connector.StorageConnector.from_response_json(json)
198+
199+
# Assert
200+
assert sc.auth_method == "OAUTH_M2M"
201+
assert sc.oauth_endpoint == "ACCOUNT"
202+
assert sc.client_id == "test-sp-client-id"
203+
assert sc.client_secret is None
204+
assert sc.has_client_secret is True
205+
assert sc.account_id == "12345678-1234-1234-1234-1234567890ab"
206+
assert sc.account_host == "accounts.cloud.databricks.com"
207+
168208
def test_from_response_json_basic_info(self, backend_fixtures):
169209
# Arrange
170210
json = backend_fixtures["storage_connector"]["get_unity_catalog_basic_info"][
@@ -205,6 +245,38 @@ def test_spark_options_not_supported(self):
205245
with pytest.raises(NotImplementedError):
206246
sc.spark_options()
207247

248+
def test_legacy_construction_defaults_pat(self):
249+
# Connectors built before OAuth support landed have no auth_method
250+
# field at all. They must keep working as PAT.
251+
sc = storage_connector.UnityCatalogConnector(
252+
id=1,
253+
name="uc",
254+
featurestore_id=1,
255+
workspace_url="https://ws.cloud.databricks.com",
256+
access_token="dapi-xyz",
257+
)
258+
assert sc.auth_method == "PAT"
259+
assert sc.oauth_endpoint is None
260+
assert sc.client_id is None
261+
assert sc.has_access_token is True
262+
assert sc.has_client_secret is False
263+
264+
def test_oauth_construction_defaults_workspace_endpoint(self):
265+
# auth_method=OAUTH_M2M without oauth_endpoint defaults to WORKSPACE,
266+
# matching the frontend default.
267+
sc = storage_connector.UnityCatalogConnector(
268+
id=1,
269+
name="uc",
270+
featurestore_id=1,
271+
workspace_url="https://ws.cloud.databricks.com",
272+
auth_method="OAUTH_M2M",
273+
client_id="cid",
274+
client_secret="csec",
275+
)
276+
assert sc.oauth_endpoint == "WORKSPACE"
277+
assert sc.client_secret == "csec"
278+
assert sc.has_client_secret is True
279+
208280

209281
class TestRedshiftConnector:
210282
def test_from_response_json(self, backend_fixtures):

0 commit comments

Comments
 (0)