add possibility to provide cred via filename

ncclementi · ncclementi · commit d4bb6a514417 · 2021-09-30T15:40:46.000Z
diff --git a/dask_bigquery/core.py b/dask_bigquery/core.py
@@ -12,12 +12,13 @@
 from google.api_core import client_info as rest_client_info
 from google.api_core.gapic_v1 import client_info as grpc_client_info
 from google.cloud import bigquery, bigquery_storage
+from google.oauth2 import service_account
 
 import dask_bigquery
 
 
 @contextmanager
-def bigquery_clients(project_id):
+def bigquery_clients(project_id, cred_fpath):
     """This context manager is a temporary solution until there is an
     upstream solution to handle this.
     See googleapis/google-cloud-python#9457
@@ -30,7 +31,14 @@ def bigquery_clients(project_id):
         user_agent=f"dask-bigquery/{dask_bigquery.__version__}"
     )
 
-    with bigquery.Client(project_id, client_info=bq_client_info) as bq_client:
+    if cred_fpath:
+        credentials = service_account.Credentials.from_service_account_file(cred_fpath)
+    else:
+        credentials = cred_fpath  # if no path set to None to try read default
+
+    with bigquery.Client(
+        project_id, credentials=credentials, client_info=bq_client_info
+    ) as bq_client:
         bq_storage_client = bigquery_storage.BigQueryReadClient(
             credentials=bq_client._credentials,
             client_info=bqstorage_client_info,
@@ -53,6 +61,7 @@ def _stream_to_dfs(bqs_client, stream_name, schema, read_kwargs):
 def bigquery_read(
     make_create_read_session_request: callable,
     project_id: str,
+    cred_fpath: str,
     read_kwargs: dict,
     stream_name: str,
 ) -> pd.DataFrame:
@@ -71,7 +80,7 @@ def bigquery_read(
       NOTE: Please set if reading from Storage API without any `row_restriction`.
             https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1beta1#stream
     """
-    with bigquery_clients(project_id) as (_, bqs_client):
+    with bigquery_clients(project_id, cred_fpath) as (_, bqs_client):
         session = bqs_client.create_read_session(make_create_read_session_request())
         schema = pyarrow.ipc.read_schema(
             pyarrow.py_buffer(session.arrow_schema.serialized_schema)
@@ -89,6 +98,8 @@ def read_gbq(
     dataset_id: str,
     table_id: str,
     row_filter="",
+    *,
+    cred_fpath: str = None,
     read_kwargs: dict = None,
 ):
     """Read table as dask dataframe using BigQuery Storage API via Arrow format.
@@ -104,6 +115,8 @@ def read_gbq(
       BigQuery table within dataset
     row_filter: str
       SQL text filtering statement to pass to `row_restriction`
+    cred_fpath: str
+        path for the service account key json file.
     read_kwargs: dict
       kwargs to pass to read_rows()
 
@@ -112,7 +125,7 @@ def read_gbq(
         Dask DataFrame
     """
     read_kwargs = read_kwargs or {}
-    with bigquery_clients(project_id) as (bq_client, bqs_client):
+    with bigquery_clients(project_id, cred_fpath) as (bq_client, bqs_client):
         table_ref = bq_client.get_table(f"{dataset_id}.{table_id}")
         if table_ref.table_type == "VIEW":
             raise TypeError("Table type VIEW not supported")
@@ -157,6 +170,7 @@ def make_create_read_session_request(row_filter=""):
                 bigquery_read,
                 make_create_read_session_request,
                 project_id,
+                cred_fpath,
                 read_kwargs,
             ),
             label=label,