From 5f49726217bc729f8aac22f650d05fda903e15d1 Mon Sep 17 00:00:00 2001 From: sharinetmc Date: Thu, 14 Aug 2025 12:29:33 -1000 Subject: [PATCH 1/6] remove error --- parsons/google/google_bigquery.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 3268bfb4b6..62c6e18c35 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -1565,8 +1565,6 @@ def _validate_copy_inputs(self, if_exists: str, data_type: str): f"Unexpected value for if_exists: {if_exists}, must be one of " '"append", "drop", "truncate", or "fail"' ) - if data_type not in ["csv", "json"]: - raise ValueError(f"Only supports csv or json files [data_type = {data_type}]") def _load_table_from_uri( self, source_uris, destination, job_config, max_timeout, **load_kwargs From b563fd7ce4b727d89814551abc47e83fd1a73aec Mon Sep 17 00:00:00 2001 From: sharinetmc Date: Thu, 14 Aug 2025 12:30:00 -1000 Subject: [PATCH 2/6] remove param --- parsons/google/google_bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 62c6e18c35..649a7c4792 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -1559,7 +1559,7 @@ def _fetch_query_results(self, cursor) -> Table: ptable = petl.frompickle(temp_filename) return Table(ptable) - def _validate_copy_inputs(self, if_exists: str, data_type: str): + def _validate_copy_inputs(self, if_exists: str): if if_exists not in ["fail", "truncate", "append", "drop"]: raise ValueError( f"Unexpected value for if_exists: {if_exists}, must be one of " From a6fa8cd07a14cfdfc48d5468d5fa80219a8c78ab Mon Sep 17 00:00:00 2001 From: sharinetmc Date: Thu, 14 Aug 2025 12:32:45 -1000 Subject: [PATCH 3/6] add override_data_type_check --- parsons/google/google_bigquery.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 649a7c4792..74d20bcf11 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -456,7 +456,7 @@ def copy_from_gcs( Other arguments to pass to the underlying load_table_from_uri call on the BigQuery client. """ - self._validate_copy_inputs(if_exists=if_exists, data_type=data_type) + self._validate_copy_inputs(if_exists=if_exists, data_type=data_type, override_data_type_check=True) job_config = self._process_job_config( job_config=job_config, @@ -1559,12 +1559,14 @@ def _fetch_query_results(self, cursor) -> Table: ptable = petl.frompickle(temp_filename) return Table(ptable) - def _validate_copy_inputs(self, if_exists: str): + def _validate_copy_inputs(self, if_exists: str, data_type: str, override_data_type_check: bool = False): if if_exists not in ["fail", "truncate", "append", "drop"]: raise ValueError( f"Unexpected value for if_exists: {if_exists}, must be one of " '"append", "drop", "truncate", or "fail"' ) + if data_type not in ["csv", "json"] and not override_data_type_check: + raise ValueError(f"Only supports csv or json files [data_type = {data_type}]") def _load_table_from_uri( self, source_uris, destination, job_config, max_timeout, **load_kwargs From b3193bed0eb69c4265e696769178fa3c9e378d49 Mon Sep 17 00:00:00 2001 From: sharinetmc Date: Thu, 14 Aug 2025 12:35:13 -1000 Subject: [PATCH 4/6] ruff --- parsons/google/google_bigquery.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 74d20bcf11..181ba7cbe6 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -456,7 +456,9 @@ def copy_from_gcs( Other arguments to pass to the underlying load_table_from_uri call on the BigQuery client. """ - self._validate_copy_inputs(if_exists=if_exists, data_type=data_type, override_data_type_check=True) + self._validate_copy_inputs( + if_exists=if_exists, data_type=data_type, override_data_type_check=True + ) job_config = self._process_job_config( job_config=job_config, @@ -1559,7 +1561,9 @@ def _fetch_query_results(self, cursor) -> Table: ptable = petl.frompickle(temp_filename) return Table(ptable) - def _validate_copy_inputs(self, if_exists: str, data_type: str, override_data_type_check: bool = False): + def _validate_copy_inputs( + self, if_exists: str, data_type: str, override_data_type_check: bool = False + ): if if_exists not in ["fail", "truncate", "append", "drop"]: raise ValueError( f"Unexpected value for if_exists: {if_exists}, must be one of " From b708281dfd255ec1fd6fb064894007c7bf3baedb Mon Sep 17 00:00:00 2001 From: sharinetmc Date: Fri, 15 Aug 2025 08:49:09 -1000 Subject: [PATCH 5/6] add accepted data types param --- parsons/google/google_bigquery.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 181ba7cbe6..9d8e15fd29 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -456,9 +456,7 @@ def copy_from_gcs( Other arguments to pass to the underlying load_table_from_uri call on the BigQuery client. """ - self._validate_copy_inputs( - if_exists=if_exists, data_type=data_type, override_data_type_check=True - ) + self._validate_copy_inputs(if_exists=if_exists, data_type=data_type) job_config = self._process_job_config( job_config=job_config, @@ -626,7 +624,11 @@ def copy_large_compressed_file_from_gcs( client. """ - self._validate_copy_inputs(if_exists=if_exists, data_type=data_type) + self._validate_copy_inputs( + if_exists=if_exists, + data_type=data_type, + accepted_data_types=["csv", "newline_delimited_json"], + ) job_config = self._process_job_config( job_config=job_config, @@ -1007,7 +1009,9 @@ def _prepare_local_upload_job( ): data_type = "csv" - self._validate_copy_inputs(if_exists=if_exists, data_type=data_type) + self._validate_copy_inputs( + if_exists=if_exists, data_type=data_type, accepted_data_types=["csv"] + ) # If our source table is loaded from CSV with no transformations # The original source file will be directly loaded to GCS @@ -1562,15 +1566,26 @@ def _fetch_query_results(self, cursor) -> Table: return Table(ptable) def _validate_copy_inputs( - self, if_exists: str, data_type: str, override_data_type_check: bool = False + self, + if_exists: str, + data_type: str, + accepted_data_types: list[str] = [ + "csv", + "json", + "parquet", + "datastore_backup", + "newline_delimited_json", + "avro", + "orc", + ], ): if if_exists not in ["fail", "truncate", "append", "drop"]: raise ValueError( f"Unexpected value for if_exists: {if_exists}, must be one of " '"append", "drop", "truncate", or "fail"' ) - if data_type not in ["csv", "json"] and not override_data_type_check: - raise ValueError(f"Only supports csv or json files [data_type = {data_type}]") + if data_type not in accepted_data_types: + raise ValueError(f"Only supports {accepted_data_types} files [data_type = {data_type}]") def _load_table_from_uri( self, source_uris, destination, job_config, max_timeout, **load_kwargs From 7ca1e5b25c3c9e9d20206a52d038d1e37f7f808c Mon Sep 17 00:00:00 2001 From: sharinetmc Date: Fri, 15 Aug 2025 08:53:29 -1000 Subject: [PATCH 6/6] remove mutable defualt --- parsons/google/google_bigquery.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index 9d8e15fd29..f804eebc2e 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -456,7 +456,19 @@ def copy_from_gcs( Other arguments to pass to the underlying load_table_from_uri call on the BigQuery client. """ - self._validate_copy_inputs(if_exists=if_exists, data_type=data_type) + self._validate_copy_inputs( + if_exists=if_exists, + data_type=data_type, + accepted_data_types=[ + "csv", + "json", + "parquet", + "datastore_backup", + "newline_delimited_json", + "avro", + "orc", + ], + ) job_config = self._process_job_config( job_config=job_config, @@ -1565,25 +1577,13 @@ def _fetch_query_results(self, cursor) -> Table: ptable = petl.frompickle(temp_filename) return Table(ptable) - def _validate_copy_inputs( - self, - if_exists: str, - data_type: str, - accepted_data_types: list[str] = [ - "csv", - "json", - "parquet", - "datastore_backup", - "newline_delimited_json", - "avro", - "orc", - ], - ): + def _validate_copy_inputs(self, if_exists: str, data_type: str, accepted_data_types: list[str]): if if_exists not in ["fail", "truncate", "append", "drop"]: raise ValueError( f"Unexpected value for if_exists: {if_exists}, must be one of " '"append", "drop", "truncate", or "fail"' ) + if data_type not in accepted_data_types: raise ValueError(f"Only supports {accepted_data_types} files [data_type = {data_type}]")