Skip to content

Commit a9cef4d

Browse files
committed
Fix U2M/M2M OAuth for SPOG (unified) AWS hosts
SPOG / unified hosts (a single *.databricks.com custom URL that fronts workspaces across accounts, addressed with ?o=<workspaceId>) failed OAuth in two ways: 1. InferCloudFromHost returned Unknown for bare *.databricks.com hosts, so the U2M authenticator errored ("unhandled cloud type") before any OAuth. 2. For AWS/GCP, GetEndpoint discovered the OIDC endpoint by hitting https://<host>/oidc directly. On a SPOG host that resolves to the account-agnostic account-console authorize endpoint, which mints a token for the caller's default account. The target workspace (owned by a different account) then rejects it with 400 "Invalid Token". Fixes: - InferCloudFromHost: classify bare *.databricks.com hosts as AWS (checked after Azure/GCP so those remain correctly classified). - GetEndpoint (AWS/GCP): resolve the OIDC issuer via /.well-known/databricks-config, substituting the {account_id} placeholder, so unified/SPOG hosts use their account-rooted endpoint. Normal workspace hosts advertise https://<host>/oidc, so the issuer is identical to before. Any failure (absent endpoint, non-200, unparseable, timeout) falls back to the previous bare-host issuer, so existing behavior is preserved. This mirrors databricks-sdk-go's config host-metadata resolution. Adds oauth_test.go covering cloud inference (incl. SPOG and the GCP/Azure disambiguation), {account_id} substitution, and the databricks-config failure/fallback paths. Tested end-to-end against staging via the real driver auth paths: - AWS non-SPOG (e2-dogfood...cloud.databricks.com): U2M and M2M pass. - AWS SPOG (dogfood.staging.databricks.com): U2M and M2M pass. - Azure SPOG (dogfood-spog.staging.azuredatabricks.net): U2M passes (Azure path unchanged; account resolution handled by the host redirector). - Full unit suite passes with no regressions. Co-authored-by: Isaac
1 parent 4782948 commit a9cef4d

2 files changed

Lines changed: 225 additions & 1 deletion

File tree

auth/oauth/oauth.go

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,21 @@ package oauth
22

33
import (
44
"context"
5+
"encoding/json"
56
"errors"
67
"fmt"
8+
"net/http"
79
"strings"
10+
"time"
811

912
"github.com/coreos/go-oidc/v3/oidc"
1013
"golang.org/x/oauth2"
1114
)
1215

16+
// hostConfigTimeout bounds the /.well-known/databricks-config lookup so it cannot
17+
// stall connection setup; on any failure we fall back to bare-host OIDC discovery.
18+
const hostConfigTimeout = 30 * time.Second
19+
1320
var azureTenants = map[string]string{
1421
".dev.azuredatabricks.net": "62a912ac-b58e-4c1d-89ea-b2dbfc7358fc",
1522
".staging.azuredatabricks.net": "4a67d088-db5c-48f1-9ff2-0aace800ae68",
@@ -35,7 +42,12 @@ func GetEndpoint(ctx context.Context, hostName string) (oauth2.Endpoint, error)
3542
return oauth2.Endpoint{AuthURL: authURL, TokenURL: tokenURL}, nil
3643
}
3744

38-
issuerURL := fmt.Sprintf("https://%s/oidc", hostName)
45+
// AWS / GCP. Resolve the OIDC issuer via /.well-known/databricks-config so that
46+
// unified / SPOG hosts (one host fronting workspaces across multiple accounts)
47+
// use their account-rooted endpoint instead of the account-agnostic console login.
48+
// For normal workspace hosts this resolves to https://<host>/oidc, identical to
49+
// the previous behavior.
50+
issuerURL := resolveOIDCIssuer(ctx, hostName)
3951
ctx = oidc.InsecureIssuerURLContext(ctx, issuerURL)
4052
provider, err := oidc.NewProvider(ctx, issuerURL)
4153
if err != nil {
@@ -47,6 +59,71 @@ func GetEndpoint(ctx context.Context, hostName string) (oauth2.Endpoint, error)
4759
return endpoint, err
4860
}
4961

62+
// hostMetadata is the subset of /.well-known/databricks-config we consume.
63+
type hostMetadata struct {
64+
OIDCEndpoint string `json:"oidc_endpoint"`
65+
AccountID string `json:"account_id"`
66+
}
67+
68+
// resolveOIDCIssuer returns the OIDC issuer URL to use for AWS/GCP OAuth discovery.
69+
//
70+
// On a unified / SPOG host, the bare-host OIDC discovery doc points at the
71+
// account-agnostic account-console login. That mints a token for the caller's
72+
// default account, which the target workspace rejects ("Invalid Token") when the
73+
// workspace belongs to a different account. Such hosts advertise the correct,
74+
// account-rooted OIDC endpoint via /.well-known/databricks-config (with an
75+
// {account_id} placeholder); we consult it and substitute the account id.
76+
//
77+
// For a normal workspace host the advertised endpoint is just https://<host>/oidc,
78+
// so the result is identical to the historical bare-host issuer. Any failure
79+
// (endpoint absent, non-200, unparseable, missing field, timeout) falls back to
80+
// the bare-host issuer, preserving existing behavior.
81+
func resolveOIDCIssuer(ctx context.Context, hostName string) string {
82+
fallback := fmt.Sprintf("https://%s/oidc", hostName)
83+
84+
url := fmt.Sprintf("https://%s/.well-known/databricks-config", hostName)
85+
client := &http.Client{Timeout: hostConfigTimeout}
86+
87+
meta, ok := fetchHostMetadata(ctx, client, url)
88+
if !ok || meta.OIDCEndpoint == "" {
89+
return fallback
90+
}
91+
92+
return substituteAccountID(meta)
93+
}
94+
95+
// substituteAccountID resolves the {account_id} placeholder in the advertised
96+
// oidc_endpoint. Workspace hosts have no placeholder and are returned unchanged.
97+
func substituteAccountID(meta hostMetadata) string {
98+
return strings.ReplaceAll(meta.OIDCEndpoint, "{account_id}", meta.AccountID)
99+
}
100+
101+
// fetchHostMetadata GETs /.well-known/databricks-config and decodes it. The bool
102+
// is false on any failure (request error, non-200, unparseable body) so callers
103+
// fall back to bare-host discovery.
104+
func fetchHostMetadata(ctx context.Context, client *http.Client, url string) (hostMetadata, bool) {
105+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
106+
if err != nil {
107+
return hostMetadata{}, false
108+
}
109+
110+
resp, err := client.Do(req)
111+
if err != nil {
112+
return hostMetadata{}, false
113+
}
114+
defer resp.Body.Close() //nolint:errcheck
115+
116+
if resp.StatusCode != http.StatusOK {
117+
return hostMetadata{}, false
118+
}
119+
120+
var meta hostMetadata
121+
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
122+
return hostMetadata{}, false
123+
}
124+
return meta, true
125+
}
126+
50127
func GetScopes(hostName string, scopes []string) []string {
51128
for _, s := range []string{oidc.ScopeOfflineAccess} {
52129
if !HasScope(scopes, s) {
@@ -135,6 +212,16 @@ func InferCloudFromHost(hostname string) CloudType {
135212
return GCP
136213
}
137214
}
215+
216+
// Unified / SPOG (Single Pane of Glass) AWS hosts use bare *.databricks.com
217+
// custom URLs (e.g. <name>.databricks.com, <name>.staging.databricks.com) that
218+
// match none of the lists above. Treat them as AWS. This is checked last so the
219+
// more specific Azure (.azuredatabricks.net) and GCP (.gcp.databricks.com) hosts
220+
// are classified first.
221+
if strings.Contains(hostname, "databricks.com") {
222+
return AWS
223+
}
224+
138225
return Unknown
139226
}
140227

auth/oauth/oauth_test.go

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package oauth
2+
3+
import (
4+
"context"
5+
"net/http"
6+
"net/http/httptest"
7+
"testing"
8+
)
9+
10+
func TestInferCloudFromHost(t *testing.T) {
11+
cases := []struct {
12+
host string
13+
want CloudType
14+
}{
15+
// Standard per-workspace hosts.
16+
{"dbc-1234.cloud.databricks.com", AWS},
17+
{"example.cloud.databricks.us", AWS},
18+
{"foo.dev.databricks.com", AWS},
19+
{"adb-123.azuredatabricks.net", Azure},
20+
{"x.databricks.azure.us", Azure},
21+
{"y.databricks.azure.cn", Azure},
22+
{"ws.gcp.databricks.com", GCP},
23+
// SPOG / unified custom-URL AWS hosts (the fix): must classify as AWS,
24+
// not Unknown, and must NOT be swallowed by the GCP/Azure checks.
25+
{"pecoaws.databricks.com", AWS},
26+
{"dogfood.staging.databricks.com", AWS},
27+
// Azure SPOG stays Azure.
28+
{"dogfood-spog.staging.azuredatabricks.net", Azure},
29+
// GCP custom host must remain GCP even though it contains "databricks.com".
30+
{"foo.gcp.databricks.com", GCP},
31+
// Truly unrelated host stays Unknown.
32+
{"example.com", Unknown},
33+
}
34+
35+
for _, tc := range cases {
36+
t.Run(tc.host, func(t *testing.T) {
37+
if got := InferCloudFromHost(tc.host); got != tc.want {
38+
t.Fatalf("InferCloudFromHost(%q) = %v, want %v", tc.host, got, tc.want)
39+
}
40+
})
41+
}
42+
}
43+
44+
func TestGetAzureDnsZone(t *testing.T) {
45+
// Documents current behavior: the generic suffix is matched first, so staging
46+
// and dev Azure hosts resolve to the prod tenant. (Separate fix tracked.)
47+
cases := []struct {
48+
host string
49+
want string
50+
}{
51+
{"adb-123.azuredatabricks.net", ".azuredatabricks.net"},
52+
{"x.databricks.azure.us", ".databricks.azure.us"},
53+
{"nope.example.com", ""},
54+
}
55+
for _, tc := range cases {
56+
t.Run(tc.host, func(t *testing.T) {
57+
if got := GetAzureDnsZone(tc.host); got != tc.want {
58+
t.Fatalf("GetAzureDnsZone(%q) = %q, want %q", tc.host, got, tc.want)
59+
}
60+
})
61+
}
62+
}
63+
64+
func TestResolveOIDCIssuer_substitutesAccountID(t *testing.T) {
65+
// Unified / SPOG host advertises an account-rooted endpoint with a placeholder.
66+
srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
67+
if r.URL.Path != "/.well-known/databricks-config" {
68+
w.WriteHeader(http.StatusNotFound)
69+
return
70+
}
71+
_, _ = w.Write([]byte(`{
72+
"oidc_endpoint": "https://spog.example.com/oidc/accounts/{account_id}",
73+
"account_id": "7a99b43c-b46c-432b-b0a7-814217701909",
74+
"host_type": "unified"
75+
}`))
76+
}))
77+
defer srv.Close()
78+
79+
meta, ok := fetchHostMetadata(context.Background(), srv.Client(), srv.URL+"/.well-known/databricks-config")
80+
if !ok {
81+
t.Fatal("fetchHostMetadata returned ok=false, want true")
82+
}
83+
got := substituteAccountID(meta)
84+
want := "https://spog.example.com/oidc/accounts/7a99b43c-b46c-432b-b0a7-814217701909"
85+
if got != want {
86+
t.Fatalf("issuer = %q, want %q", got, want)
87+
}
88+
}
89+
90+
func TestResolveOIDCIssuer_workspaceHostUnchanged(t *testing.T) {
91+
// Normal workspace host: endpoint has no placeholder, so it is returned as-is
92+
// (equivalent to the historical https://<host>/oidc).
93+
srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
94+
_, _ = w.Write([]byte(`{
95+
"oidc_endpoint": "https://ws.cloud.databricks.com/oidc",
96+
"account_id": "7a99b43c-b46c-432b-b0a7-814217701909",
97+
"host_type": "workspace"
98+
}`))
99+
}))
100+
defer srv.Close()
101+
102+
meta, ok := fetchHostMetadata(context.Background(), srv.Client(), srv.URL+"/.well-known/databricks-config")
103+
if !ok {
104+
t.Fatal("fetchHostMetadata returned ok=false, want true")
105+
}
106+
if got := substituteAccountID(meta); got != "https://ws.cloud.databricks.com/oidc" {
107+
t.Fatalf("issuer = %q, want unchanged workspace endpoint", got)
108+
}
109+
}
110+
111+
func TestFetchHostMetadata_failuresFallBack(t *testing.T) {
112+
t.Run("404", func(t *testing.T) {
113+
srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
114+
w.WriteHeader(http.StatusNotFound)
115+
}))
116+
defer srv.Close()
117+
if _, ok := fetchHostMetadata(context.Background(), srv.Client(), srv.URL); ok {
118+
t.Fatal("ok=true on 404, want false (fallback)")
119+
}
120+
})
121+
122+
t.Run("garbage body", func(t *testing.T) {
123+
srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
124+
_, _ = w.Write([]byte("not json"))
125+
}))
126+
defer srv.Close()
127+
if _, ok := fetchHostMetadata(context.Background(), srv.Client(), srv.URL); ok {
128+
t.Fatal("ok=true on garbage body, want false (fallback)")
129+
}
130+
})
131+
132+
t.Run("unreachable", func(t *testing.T) {
133+
if _, ok := fetchHostMetadata(context.Background(), &http.Client{}, "https://127.0.0.1:1/nope"); ok {
134+
t.Fatal("ok=true on unreachable host, want false (fallback)")
135+
}
136+
})
137+
}

0 commit comments

Comments
 (0)