21
21
22
22
23
23
@contextmanager
24
- def bigquery_clients (project_id , credentials ):
24
+ def bigquery_clients (project_id , credentials = None ):
25
25
"""This context manager is a temporary solution until there is an
26
26
upstream solution to handle this.
27
27
See googleapis/google-cloud-python#9457
@@ -73,17 +73,20 @@ def bigquery_read(
73
73
Name of the BigQuery project.
74
74
read_kwargs: dict
75
75
kwargs to pass to read_rows()
76
- creds: dict
77
- credentials dictionary
78
76
stream_name: str
79
77
BigQuery Storage API Stream "name"
80
78
NOTE: Please set if reading from Storage API without any `row_restriction`.
81
79
https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1beta1#stream
80
+ cred_token: str
81
+ google_auth bearer token
82
82
"""
83
83
84
- credentials = google .oauth2 .credentials .Credentials (cred_token )
84
+ if cred_token :
85
+ credentials = google .oauth2 .credentials .Credentials (cred_token )
86
+ else :
87
+ credentials = None
85
88
86
- with bigquery_clients (project_id , credentials ) as (_ , bqs_client ):
89
+ with bigquery_clients (project_id , credentials = credentials ) as (_ , bqs_client ):
87
90
session = bqs_client .create_read_session (make_create_read_session_request ())
88
91
schema = pyarrow .ipc .read_schema (
89
92
pyarrow .py_buffer (session .arrow_schema .serialized_schema )
@@ -103,6 +106,7 @@ def read_gbq(
103
106
row_filter : str = "" ,
104
107
columns : list [str ] = None ,
105
108
read_kwargs : dict = None ,
109
+ fwd_creds : bool = False ,
106
110
):
107
111
"""Read table as dask dataframe using BigQuery Storage API via Arrow format.
108
112
Partitions will be approximately balanced according to BigQuery stream allocation logic.
@@ -121,26 +125,35 @@ def read_gbq(
121
125
list of columns to load from the table
122
126
read_kwargs: dict
123
127
kwargs to pass to read_rows()
128
+ fwd_creds: bool
129
+ Set to True if user desires to forward credentials to the workers. Default to False.
124
130
125
131
Returns
126
132
-------
127
133
Dask DataFrame
128
134
"""
129
135
read_kwargs = read_kwargs or {}
130
136
131
- creds_path = os .environ .get ("GOOGLE_APPLICATION_CREDENTIALS" )
132
- if creds_path is None :
133
- raise ValueError ("No credentials found" )
137
+ if fwd_creds :
138
+ creds_path = os .environ .get ("GOOGLE_APPLICATION_CREDENTIALS" )
139
+ if creds_path is None :
140
+ raise ValueError ("No credentials found" )
134
141
135
- credentials = service_account .Credentials .from_service_account_file (
136
- creds_path , scopes = ["https://www.googleapis.com/auth/bigquery.readonly" ]
137
- )
138
-
139
- auth_req = google .auth .transport .requests .Request ()
140
- credentials .refresh (auth_req )
141
- cred_token = credentials .token
142
+ credentials = service_account .Credentials .from_service_account_file (
143
+ creds_path , scopes = ["https://www.googleapis.com/auth/bigquery.readonly" ]
144
+ )
142
145
143
- with bigquery_clients (project_id , credentials ) as (bq_client , bqs_client ):
146
+ auth_req = google .auth .transport .requests .Request ()
147
+ credentials .refresh (auth_req )
148
+ cred_token = credentials .token
149
+ else :
150
+ credentials = None
151
+ cred_token = None
152
+
153
+ with bigquery_clients (project_id , credentials = credentials ) as (
154
+ bq_client ,
155
+ bqs_client ,
156
+ ):
144
157
table_ref = bq_client .get_table (f"{ dataset_id } .{ table_id } " )
145
158
if table_ref .table_type == "VIEW" :
146
159
raise TypeError ("Table type VIEW not supported" )
0 commit comments