forked from move-coop/parsons
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_postgres.py
More file actions
248 lines (201 loc) · 9.01 KB
/
test_postgres.py
File metadata and controls
248 lines (201 loc) · 9.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import os
import unittest
import pytest
from parsons import Postgres, Table
from test.utils import assert_matching_tables, mark_live_test
# The name of the schema and will be temporarily created for the tests
TEMP_SCHEMA = "parsons_test"
# These tests do not interact with the Postgres Database directly, and don't need real credentials
class TestPostgresCreateStatement(unittest.TestCase):
def setUp(self):
self.pg = Postgres(username="test", password="test", host="test", db="test", port=123)
self.tbl = Table([["ID", "Name"], [1, "Jim"], [2, "John"], [3, "Sarah"]])
self.tbl2 = Table(
[
["c1", "c2", "c3", "c4", "c5", "c6", "c7"],
["a", "", 1, "NA", 1.4, 1, 2],
["b", "", 2, "NA", 1.4, 1, 2],
["c", "", 3.4, "NA", "", "", "a"],
["d", "", 5, "NA", 1.4, 1, 2],
["e", "", 6, "NA", 1.4, 1, 2],
["f", "", 7.8, "NA", 1.4, 1, 2],
["g", "", 9, "NA", 1.4, 1, 2],
]
)
self.mapping = self.pg.generate_data_types(self.tbl)
self.mapping2 = self.pg.generate_data_types(self.tbl2)
def test_connection(self):
# Test connection with kwargs passed
Postgres(username="test", password="test", host="test", db="test")
# Test connection with env variables
os.environ["PGUSER"] = "user_env"
os.environ["PGPASSWORD"] = "pass_env"
os.environ["PGHOST"] = "host_env"
os.environ["PGDATABASE"] = "db_env"
os.environ["PGPORT"] = "5432"
pg_env = Postgres()
assert pg_env.username == "user_env"
assert pg_env.password == "pass_env"
assert pg_env.host == "host_env"
assert pg_env.db == "db_env"
assert pg_env.port == 5432
def test_data_type(self):
# Test smallint
assert self.pg.data_type(1, "") == "smallint"
assert self.pg.data_type(2, "") == "smallint"
# Test int
assert self.pg.data_type(32769, "") == "int"
# Test bigint
assert self.pg.data_type(2147483648, "") == "bigint"
# Test varchar that looks like an int
assert self.pg.data_type("00001", "") == "varchar"
# Test a float as a decimal
assert self.pg.data_type(5.001, "") == "decimal"
# Test varchar
assert self.pg.data_type("word", "") == "varchar"
# Test int with underscore as string
assert self.pg.data_type("1_2", "") == "varchar"
# Test int with leading zero as string
assert self.pg.data_type("01", "") == "varchar"
# Test int with underscore
assert self.pg.data_type(12, "") == "smallint"
# Test bool
assert self.pg.data_type(True, "") == "bool"
def test_generate_data_types(self):
# Test correct header labels
assert self.mapping["headers"] == ["ID", "Name"]
# Test correct data types
assert self.mapping["type_list"] == ["smallint", "varchar"]
assert self.mapping2["type_list"] == [
"varchar",
"varchar",
"decimal",
"varchar",
"decimal",
"smallint",
"varchar",
]
# Test correct lengths
assert self.mapping["longest"] == [1, 5]
def test_vc_padding(self):
# Test padding calculated correctly
assert self.pg.vc_padding(self.mapping, 0.2) == [1, 6]
def test_vc_max(self):
# Test max sets it to the max
assert self.pg.vc_max(self.mapping, ["Name"]) == [1, 65535]
# Test raises when can't find column
# To Do
def test_vc_validate(self):
# Test that a column with a width of 0 is set to 1
self.mapping["longest"][0] = 0
self.mapping = self.pg.vc_validate(self.mapping)
assert self.mapping == [1, 5]
def test_create_sql(self):
# Test the the statement is expected
sql = self.pg.create_sql("tmc.test", self.mapping, distkey="ID")
exp_sql = "create table tmc.test (\n id smallint,\n name varchar(5)) \ndistkey(ID) ;"
assert sql == exp_sql
def test_column_validate(self):
bad_cols = [
"a",
"a",
"",
"SELECT",
"asdfjkasjdfklasjdfklajskdfljaskldfjaklsdfjlaksdfjklasjdfklasjdkfljaskldfljkasjdkfasjlkdfjklasdfjklakjsfasjkdfljaslkdfjklasdfjklasjkldfakljsdfjalsdkfjklasjdfklasjdfklasdkljf",
]
fixed_cols = [
"a",
"a_1",
"col_2",
"col_3",
"asdfjkasjdfklasjdfklajskdfljaskldfjaklsdfjlaksdfjklasjdfklasjdkfljaskldfljkasjdkfasjlkdfjklasdfjklakjsfasjkdfljaslkdfjkl",
]
assert self.pg.column_name_validate(bad_cols) == fixed_cols
def test_create_statement(self):
# Assert that copy statement is expected
sql = self.pg.create_statement(self.tbl, "tmc.test", distkey="ID")
exp_sql = (
"""create table tmc.test (\n "id" smallint,\n "name" varchar(5)) \ndistkey(ID) ;"""
)
assert sql == exp_sql
# Assert that an error is raised by an empty table
empty_table = Table([["Col_1", "Col_2"]])
with pytest.raises(ValueError, match="Table is empty. Must have 1 or more rows"): # noqa: PT011
self.pg.create_statement(empty_table, "tmc.test")
# These tests interact directly with the Postgres database
@mark_live_test
class TestPostgresDB(unittest.TestCase):
def setUp(self):
self.temp_schema = TEMP_SCHEMA
self.pg = Postgres()
self.tbl = Table([["ID", "Name"], [1, "Jim"], [2, "John"], [3, "Sarah"]])
# Create a schema, create a table, create a view
setup_sql = f"""
drop schema if exists {self.temp_schema} cascade;
create schema {self.temp_schema};
"""
other_sql = f"""
create table {self.temp_schema}.test (id smallint,name varchar(5));
create view {self.temp_schema}.test_view as (select * from {self.temp_schema}.test);
"""
self.pg.query(setup_sql)
self.pg.query(other_sql)
def tearDown(self):
# Drop the view, the table and the schema
teardown_sql = f"""
drop schema if exists {self.temp_schema} cascade;
"""
self.pg.query(teardown_sql)
def test_query(self):
# Check that query sending back expected result
r = self.pg.query("select 1")
assert r[0]["?column?"] == 1
def test_query_with_parameters(self):
table_name = f"{self.temp_schema}.test"
self.pg.copy(self.tbl, f"{self.temp_schema}.test", if_exists="append")
sql = f"select * from {table_name} where name = %s"
name = "Sarah"
r = self.pg.query(sql, parameters=[name])
assert r[0]["name"] == name
sql = f"select * from {table_name} where name in (%s, %s)"
names = ["Sarah", "John"]
r = self.pg.query(sql, parameters=names)
assert r.num_rows == 2
def test_copy(self):
# Copy a table and ensure table exists
self.pg.copy(self.tbl, f"{self.temp_schema}.test_copy", if_exists="drop")
r = self.pg.query(f"select * from {self.temp_schema}.test_copy where name='Jim'")
assert r[0]["id"] == 1
# Copy table and ensure truncate works.
self.pg.copy(self.tbl, f"{self.temp_schema}.test_copy", if_exists="truncate")
tbl = self.pg.query(f"select count(*) from {self.temp_schema}.test_copy")
assert tbl.first == 3
# Copy table and ensure that drop works.
self.pg.copy(self.tbl, f"{self.temp_schema}.test_copy", if_exists="drop")
tbl = self.pg.query(f"select count(*) from {self.temp_schema}.test_copy")
assert tbl.first == 3
# Copy table and ensure that append works.
self.pg.copy(self.tbl, f"{self.temp_schema}.test_copy", if_exists="append")
tbl = self.pg.query(f"select count(*) from {self.temp_schema}.test_copy")
assert tbl.first == 6
# Try to copy the table and ensure that default fail works.
with pytest.raises(ValueError): # noqa: PT011
self.pg.copy(self.tbl, f"{self.temp_schema}.test_copy")
# Try to copy the table and ensure that explicit fail works.
with pytest.raises(ValueError): # noqa: PT011
self.pg.copy(
self.tbl,
f"{self.temp_schema}.test_copy",
if_exists="fail",
)
def test_to_postgres(self):
self.tbl.to_postgres(f"{self.temp_schema}.test_copy")
r = self.pg.query(f"select * from {self.temp_schema}.test_copy where name='Jim'")
assert r[0]["id"] == 1
def test_from_postgres(self):
tbl = Table([["id", "name"], [1, "Jim"], [2, "John"], [3, "Sarah"]])
self.pg.copy(self.tbl, f"{self.temp_schema}.test_copy", if_exists="drop")
out_tbl = self.tbl.from_postgres(f"SELECT * FROM {self.temp_schema}.test_copy")
assert_matching_tables(out_tbl, tbl)
if __name__ == "__main__":
unittest.main()