Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/BIGQUERY_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export BIGQUERY_LOCATION="<your-dataset-location>" # Optional
export BIGQUERY_USE_CLIENT_OAUTH="true" # Optional: true, false, or a custom header name
export BIGQUERY_SCOPES="<comma-separated-scopes>" # Optional
export BIGQUERY_IMPERSONATE_SERVICE_ACCOUNT="<service-account-email>" # Optional: Service account to impersonate
export BIGQUERY_ENDPOINT="" # Optional: proxy/emulator URL or host:port (http supported); empty = default Google API
```

Add the following configuration to your MCP client (e.g., `settings.json` for Gemini CLI, `mcp_config.json` for Antigravity):
Expand Down
5 changes: 5 additions & 0 deletions docs/en/integrations/bigquery/prebuilt-configs/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ description: "Details of the BigQuery prebuilt configuration."
to impersonate when making BigQuery and Dataplex API calls. The
authenticated principal must have `roles/iam.serviceAccountTokenCreator`
on the target service account.
* `BIGQUERY_ENDPOINT`: (Optional) Override the BigQuery API host (URL or
`host:port`) for proxies, alternate front-ends, or local emulators.
`http` endpoints are supported (e.g. `http://localhost:9050`). Unset or
empty uses the default Google endpoint. (For the official local emulator,
client libraries use the `BIGQUERY_EMULATOR_HOST` convention.)
* **Permissions:**
* **BigQuery User** (`roles/bigquery.user`) to execute queries and view
metadata.
Expand Down
3 changes: 3 additions & 0 deletions docs/en/integrations/bigquery/source.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ project: "my-project-id"
# - "https://www.googleapis.com/auth/drive.readonly"
# maxQueryResultRows: 50 # Optional: Limits the number of rows returned by queries. Defaults to 50.
# maximumBytesBilled: 10737418240 # Optional: Per-query bytes scanned cap (in bytes).
# apiEndpoint: "https://my-proxy.example.com" # Optional: Override the BigQuery API host (URL or host:port) for proxies/emulators; http is supported. Unset or empty uses the default Google endpoint.
```

Initialize a BigQuery source that uses the client's access token:
Expand All @@ -135,6 +136,7 @@ useClientOAuth: true
# - "https://www.googleapis.com/auth/drive.readonly"
# maxQueryResultRows: 50 # Optional: Limits the number of rows returned by queries. Defaults to 50.
# maximumBytesBilled: 10737418240 # Optional: Per-query bytes scanned cap (in bytes).
# apiEndpoint: "https://my-proxy.example.com" # Optional: Override the BigQuery API host (URL or host:port) for proxies/emulators; http is supported. Unset or empty uses the default Google endpoint.
```

## Reference
Expand All @@ -151,3 +153,4 @@ useClientOAuth: true
| impersonateServiceAccount | string | false | Service account email to impersonate when making BigQuery and Dataplex API calls. The authenticated principal must have the `roles/iam.serviceAccountTokenCreator` role on the target service account. [Learn More](https://cloud.google.com/iam/docs/service-account-impersonation) |
| maxQueryResultRows | int | false | The maximum number of rows to return from a query. Defaults to 50. |
| maximumBytesBilled | int64 | false | The maximum bytes billed per query. When set, queries that exceed this limit fail before executing. |
| apiEndpoint | string | false | Overrides the BigQuery API endpoint (URL or `host:port`) for proxies, alternate front-ends, or local emulators. `http` endpoints are supported (e.g. `http://localhost:9050`). Unset or empty uses the default Google endpoint. The scheme is preserved and a default port is added when missing (`80` for http, otherwise `443`). Dataplex and `ask_data_insights` use different API surfaces and are not affected. |
1 change: 1 addition & 0 deletions internal/prebuiltconfigs/tools/bigquery.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ useClientOAuth: ${BIGQUERY_USE_CLIENT_OAUTH:false}
scopes: ${BIGQUERY_SCOPES:}
maxQueryResultRows: ${BIGQUERY_MAX_QUERY_RESULT_ROWS:50}
impersonateServiceAccount: ${BIGQUERY_IMPERSONATE_SERVICE_ACCOUNT:}
apiEndpoint: ${BIGQUERY_ENDPOINT:}
---
kind: tool
name: analyze_contribution
Expand Down
57 changes: 57 additions & 0 deletions internal/sources/bigquery/api_endpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package bigquery

import (
"strings"

"google.golang.org/api/option"
)

// normalizeAPIEndpoint returns a value accepted by option.WithEndpoint for both
// the bigquery client and the bigquery/v2 REST service. Empty/whitespace yields
// "" (default Google endpoint). The scheme is preserved so http-only proxies and
// emulators (e.g. http://localhost:9050) keep working; a missing scheme defaults
// to https. A default port is appended when absent (:80 for http, else :443), and
// a trailing slash is removed.
func normalizeAPIEndpoint(raw string) string {
s := strings.TrimSpace(raw)
if s == "" {
return ""
}
scheme := "https://"
for _, p := range []string{"https://", "http://"} {
if len(s) >= len(p) && strings.EqualFold(s[:len(p)], p) {
scheme, s = p, s[len(p):]
break
}
}
host := strings.TrimSuffix(s, "/")
if !strings.Contains(host, ":") {
if scheme == "http://" {
host += ":80"
} else {
host += ":443"
}
}
return scheme + host
}
Comment on lines +29 to +50

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation of normalizeAPIEndpoint strips the URL scheme (e.g., http:// or https://) and appends :443 if no port is specified. This causes issues when a user explicitly configures an HTTP endpoint (such as http://localhost:8080 or http://proxy.example.com for local emulators or HTTP proxies).

When the scheme is stripped, the Google API client library's HTTP transport automatically prepends https:// because the endpoint does not contain ://. This forces the client to use HTTPS/TLS, which will fail against HTTP-only proxies or emulators.

We should preserve the scheme if it is present, and only append the default port (80 for HTTP, 443 for HTTPS or when no scheme is specified) if no port is explicitly provided.

func normalizeAPIEndpoint(raw string) string {
	s := strings.TrimSpace(raw)
	if s == "" {
		return ""
	}
	var scheme string
	host := s
	for _, prefix := range []string{"https://", "http://"} {
		if len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix) {
			scheme = prefix
			host = s[len(prefix):]
			break
		}
	}
	host = strings.TrimSuffix(host, "/")
	if isDirectAPIEndpoint(host) {
		return ""
	}
	if !strings.Contains(host, ":") {
		if scheme == "http://" {
			host = host + ":80"
		} else {
			host = host + ":443"
		}
	}
	return scheme + host
}

func isDirectAPIEndpoint(host string) bool {
	lower := strings.ToLower(host)
	if _, ok := directAPIEndpointAliases[lower]; ok {
		return true
	}
	if i := strings.LastIndex(lower, ":"); i != -1 {
		if _, ok := directAPIEndpointAliases[lower[:i]]; ok {
			return true
		}
	}
	return false
}


func appendAPIEndpointOption(opts []option.ClientOption, apiEndpoint string) []option.ClientOption {
if ep := normalizeAPIEndpoint(apiEndpoint); ep != "" {
return append(opts, option.WithEndpoint(ep))
}
return opts
}
55 changes: 55 additions & 0 deletions internal/sources/bigquery/api_endpoint_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package bigquery

import (
"testing"

"google.golang.org/api/option"
)

func TestNormalizeAPIEndpoint(t *testing.T) {
tcs := []struct {
in string
want string
}{
{"", ""},
{" ", ""},
{"https://proxy.example.com", "https://proxy.example.com:443"},
{"https://proxy.example.com/", "https://proxy.example.com:443"},
{"http://proxy.example.com", "http://proxy.example.com:80"},
{"http://localhost:9050", "http://localhost:9050"},
{"proxy.example.com", "https://proxy.example.com:443"},
{"proxy.example.com:8443", "https://proxy.example.com:8443"},
}
for _, tc := range tcs {
t.Run(tc.in, func(t *testing.T) {
if got := normalizeAPIEndpoint(tc.in); got != tc.want {
t.Fatalf("normalizeAPIEndpoint(%q) = %q, want %q", tc.in, got, tc.want)
}
})
}
}

func TestAppendAPIEndpointOption(t *testing.T) {
base := []option.ClientOption{option.WithUserAgent("test")}

if got := appendAPIEndpointOption(base, ""); len(got) != len(base) {
t.Fatalf("empty endpoint: got %d options, want %d", len(got), len(base))
}
if got := appendAPIEndpointOption(base, "https://proxy.example.com"); len(got) != len(base)+1 {
t.Fatalf("set endpoint: got %d options, want %d", len(got), len(base)+1)
}
}
32 changes: 23 additions & 9 deletions internal/sources/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ type Config struct {
Scopes StringOrStringSlice `yaml:"scopes"`
MaxQueryResultRows int `yaml:"maxQueryResultRows"`
MaximumBytesBilled int64 `yaml:"maximumBytesBilled" validate:"gte=0"`
// ApiEndpoint overrides the BigQuery API host (URL or host:port) for proxies
// and emulators. Empty uses the default Google endpoint; http endpoints are
// supported.
ApiEndpoint string `yaml:"apiEndpoint"`
}

// StringOrStringSlice is a custom type that can unmarshal both a single string
Expand Down Expand Up @@ -166,7 +170,7 @@ func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.So

if strings.ToLower(r.UseClientOAuth) == "false" || r.UseClientOAuth == "" {
// Initializes a BigQuery Google SQL source
client, restService, tokenSource, err = initBigQueryConnection(ctx, tracer, r.Name, r.Project, r.Location, r.ImpersonateServiceAccount, r.Scopes)
client, restService, tokenSource, err = initBigQueryConnection(ctx, tracer, r.Name, r.Project, r.Location, r.ImpersonateServiceAccount, r.Scopes, r.ApiEndpoint)
if err != nil {
return nil, fmt.Errorf("error creating client from ADC: %w", err)
}
Expand All @@ -183,7 +187,7 @@ func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.So
s.AuthTokenHeaderName = r.UseClientOAuth
}
// use client OAuth
baseClientCreator, err := newBigQueryClientCreator(ctx, tracer, r.Project, r.Location, r.Name)
baseClientCreator, err := newBigQueryClientCreator(ctx, tracer, r.Project, r.Location, r.Name, r.ApiEndpoint)
if err != nil {
return nil, fmt.Errorf("error constructing client creator: %w", err)
}
Expand Down Expand Up @@ -689,6 +693,7 @@ func initBigQueryConnection(
location string,
impersonateServiceAccount string,
scopes []string,
apiEndpoint string,
) (*bigqueryapi.Client, *bigqueryrestapi.Service, oauth2.TokenSource, error) {
ctx, span := sources.InitConnectionSpan(ctx, tracer, SourceType, name)
defer span.End()
Expand Down Expand Up @@ -721,21 +726,21 @@ func initBigQueryConnection(
return nil, nil, nil, fmt.Errorf("failed to create impersonated credentials for %q: %w", impersonateServiceAccount, err)
}
tokenSource = cloudPlatformTokenSource
opts = []option.ClientOption{
opts = appendAPIEndpointOption([]option.ClientOption{
option.WithUserAgent(userAgent),
option.WithTokenSource(cloudPlatformTokenSource),
}
}, apiEndpoint)
} else {
// Use default credentials
cred, err := google.FindDefaultCredentials(ctx, credScopes...)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to find default Google Cloud credentials with scopes %v: %w", credScopes, err)
}
tokenSource = cred.TokenSource
opts = []option.ClientOption{
opts = appendAPIEndpointOption([]option.ClientOption{
option.WithUserAgent(userAgent),
option.WithCredentials(cred),
}
}, apiEndpoint)
}

// Initialize the high-level BigQuery client
Expand Down Expand Up @@ -765,6 +770,7 @@ func initBigQueryConnectionWithOAuthToken(
userAgent string,
tokenString string,
wantRestService bool,
apiEndpoint string,
) (*bigqueryapi.Client, *bigqueryrestapi.Service, error) {
ctx, span := sources.InitConnectionSpan(ctx, tracer, SourceType, name)
defer span.End()
Expand All @@ -774,16 +780,21 @@ func initBigQueryConnectionWithOAuthToken(
}
ts := oauth2.StaticTokenSource(token)

oauthOpts := appendAPIEndpointOption([]option.ClientOption{
option.WithUserAgent(userAgent),
option.WithTokenSource(ts),
}, apiEndpoint)

// Initialize the BigQuery client with tokenSource
client, err := bigqueryapi.NewClient(ctx, project, option.WithUserAgent(userAgent), option.WithTokenSource(ts))
client, err := bigqueryapi.NewClient(ctx, project, oauthOpts...)
if err != nil {
return nil, nil, fmt.Errorf("failed to create BigQuery client for project %q: %w", project, err)
}
client.Location = location

if wantRestService {
// Initialize the low-level BigQuery REST service using the same credentials
restService, err := bigqueryrestapi.NewService(ctx, option.WithUserAgent(userAgent), option.WithTokenSource(ts))
restService, err := bigqueryrestapi.NewService(ctx, oauthOpts...)
if err != nil {
return nil, nil, fmt.Errorf("failed to create BigQuery v2 service: %w", err)
}
Expand All @@ -802,17 +813,20 @@ func newBigQueryClientCreator(
project string,
location string,
name string,
apiEndpoint string,
) (func(string, bool) (*bigqueryapi.Client, *bigqueryrestapi.Service, error), error) {
userAgent, err := util.UserAgentFromContext(ctx)
if err != nil {
return nil, err
}

return func(tokenString string, wantRestService bool) (*bigqueryapi.Client, *bigqueryrestapi.Service, error) {
return initBigQueryConnectionWithOAuthToken(ctx, tracer, project, location, name, userAgent, tokenString, wantRestService)
return initBigQueryConnectionWithOAuthToken(ctx, tracer, project, location, name, userAgent, tokenString, wantRestService, apiEndpoint)
}, nil
}

// apiEndpoint is intentionally not applied here: Dataplex (catalog search) and
// ask_data_insights use different API surfaces and are out of scope for this override.
func initDataplexConnection(
ctx context.Context,
tracer trace.Tracer,
Expand Down
18 changes: 18 additions & 0 deletions internal/sources/bigquery/bigquery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,24 @@ func TestParseFromYamlBigQuery(t *testing.T) {
},
},
},
{
desc: "with api endpoint example",
in: `
kind: source
name: my-instance
type: bigquery
project: my-project
apiEndpoint: https://proxy.example.com
`,
want: map[string]sources.SourceConfig{
"my-instance": bigquery.Config{
Name: "my-instance",
Type: bigquery.SourceType,
Project: "my-project",
ApiEndpoint: "https://proxy.example.com",
},
},
},
}
for _, tc := range tcs {
t.Run(tc.desc, func(t *testing.T) {
Expand Down