Skip to content

Commit 3042245

Browse files
authored
issue204/ Validate correct handling of Numeric Datatypes in BigQuery (#210)
* adding bq numeric testing to validate behavior of bignumeric and other cross datatype tests * tagging * test exact error and lint * add changes with working BIGNUMERIC test * remove unused imports * adding a timestamp to test
1 parent 01ad3ba commit 3042245

File tree

5 files changed

+60
-3
lines changed

5 files changed

+60
-3
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## Untagged
44

5+
## 1.1.7
6+
7+
- Adding tests to validate BIGNUMERIC BQ type behavior
8+
59
## 1.1.6
610

711
- Minor fix for Teradata client from breaking IBis changes

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
name = "google-pso-data-validator"
2020
description = "A package to enable easy data validation"
21-
version = "1.1.6"
21+
version = "1.1.7"
2222
release_status = "Development Status :: 3 - Alpha"
2323

2424
with open("README.md", "r") as fh:
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
-- BigQuery table with several different numeric datatypes with the same value
3+
CREATE OR REPLACE TABLE pso_data_validator.test_data_types AS
4+
SELECT
5+
CAST('1234567890123456789012345678901234.00' AS BIGNUMERIC) bignumeric_type,
6+
CAST(2 AS INT64) int_type,
7+
CAST(2 AS DECIMAL) decimal_type,
8+
CAST(2 AS STRING) text_type,
9+
CAST('2021-01-01 00:00:00' AS TIMESTAMP) timestamp_type
10+

tests/system/data_sources/test_bigquery.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,39 @@
9696
],
9797
}
9898

99+
# TODO: The definition for this table is stored in: ./tests/resources/
100+
CONFIG_NUMERIC_AGG_VALID = {
101+
# BigQuery Specific Connection Config
102+
consts.CONFIG_SOURCE_CONN: BQ_CONN,
103+
consts.CONFIG_TARGET_CONN: BQ_CONN,
104+
# Validation Type
105+
consts.CONFIG_TYPE: "Column",
106+
# Configuration Required Depending on Validator Type
107+
consts.CONFIG_SCHEMA_NAME: "pso_data_validator",
108+
consts.CONFIG_TABLE_NAME: "test_data_types",
109+
consts.CONFIG_AGGREGATES: [
110+
{
111+
consts.CONFIG_TYPE: "count",
112+
consts.CONFIG_SOURCE_COLUMN: None,
113+
consts.CONFIG_TARGET_COLUMN: None,
114+
consts.CONFIG_FIELD_ALIAS: "count",
115+
},
116+
{
117+
consts.CONFIG_TYPE: "sum",
118+
consts.CONFIG_SOURCE_COLUMN: "int_type",
119+
consts.CONFIG_TARGET_COLUMN: "decimal_type",
120+
consts.CONFIG_FIELD_ALIAS: "compare_int_decimal",
121+
},
122+
{
123+
consts.CONFIG_TYPE: "sum",
124+
consts.CONFIG_SOURCE_COLUMN: "bignumeric_type",
125+
consts.CONFIG_TARGET_COLUMN: "bignumeric_type",
126+
consts.CONFIG_FIELD_ALIAS: "compare_bignumeric",
127+
},
128+
],
129+
consts.CONFIG_GROUPED_COLUMNS: [],
130+
}
131+
99132
BQ_CONN_NAME = "bq-integration-test"
100133
CLI_CONFIG_FILE = "example_test.yaml"
101134

@@ -185,6 +218,16 @@ def test_grouped_count_validator():
185218
assert row["source_agg_value"] == row["target_agg_value"]
186219

187220

221+
def test_numeric_types():
222+
validator = data_validation.DataValidation(CONFIG_NUMERIC_AGG_VALID, verbose=True)
223+
df = validator.execute()
224+
225+
for validation in df.to_dict(orient="records"):
226+
assert float(validation["source_agg_value"]) == float(
227+
validation["target_agg_value"]
228+
)
229+
230+
188231
def test_cli_store_yaml_then_run():
189232
# Store BQ Connection
190233
_store_bq_conn()

third_party/ibis/ibis_addon/datatypes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222

2323
# BigQuery BIGNUMERIC support needs to be pushed to Ibis
24-
bigquery._pandas_helpers.BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow.string
25-
_DTYPE_TO_IBIS_TYPE["BIGNUMERIC"] = dt.string
24+
bigquery._pandas_helpers.BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow.decimal256
25+
_DTYPE_TO_IBIS_TYPE["BIGNUMERIC"] = dt.float64
2626
_DTYPE_TO_IBIS_TYPE["NUMERIC"] = dt.float64
2727

2828

0 commit comments

Comments
 (0)