Skip to content

Commit a2b7d5a

Browse files
authored
Merge pull request #216 from mheilman/smoke_tests
Add script for smoke tests
2 parents ee5099b + b9d4bde commit a2b7d5a

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ All notable changes to this project will be documented in this file.
33
This project adheres to [Semantic Versioning](http://semver.org/).
44

55
## Unreleased
6+
### Added
7+
- Added a script for integration tests (smoke tests).
68

79
### Changed
810
- Updated CivisML 2.0 notebook (#214)

tools/smoke_tests.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
This script tests end-to-end functionality using the Civis Python client.
5+
It uses the live Civis API and Redshift, so a valid CIVIS_API_KEY is needed.
6+
7+
This is based on a similar script for the R client:
8+
https://github.com/civisanalytics/civis-r/blob/master/tools/integration_tests/smoke_test.R
9+
"""
10+
11+
import io
12+
import logging
13+
import time
14+
15+
import civis
16+
import pandas as pd
17+
from sklearn.datasets import load_iris
18+
19+
20+
def main():
21+
logging.basicConfig(format="", level=logging.INFO)
22+
23+
logger = logging.getLogger("civis")
24+
25+
t0 = time.time()
26+
27+
database = "redshift-general"
28+
client = civis.APIClient(resources="all")
29+
30+
# Test read_civis and read_civis_sql produce the same results.
31+
# The table used here has an explicit index column to sort by in case the
32+
# rows come back in a different order.
33+
logger.info('Testing reading from redshift...')
34+
sql = "SELECT * FROM datascience.iris"
35+
df1 = civis.io.read_civis_sql(
36+
sql=sql, database=database, use_pandas=True, client=client
37+
).sort_values(by='index')
38+
df2 = civis.io.read_civis(
39+
table="datascience.iris", database=database, use_pandas=True,
40+
client=client
41+
).sort_values(by='index')
42+
assert df1.shape == (150, 6)
43+
pd.testing.assert_frame_equal(df1, df2)
44+
45+
# Test uploading data.
46+
logger.info('Testing uploading to redshift...')
47+
table = "scratch.smoke_test_{}".format(int(time.time()))
48+
iris = load_iris()
49+
df_iris1 = (
50+
pd.DataFrame(iris.data)
51+
.rename(columns={0: 'c0', 1: 'c1', 2: 'c2', 3: 'c3'})
52+
.join(pd.DataFrame(iris.target).rename(columns={0: 'label'}))
53+
.reset_index()
54+
)
55+
try:
56+
civis.io.dataframe_to_civis(
57+
df_iris1, database, table, client=client).result()
58+
df_iris2 = civis.io.read_civis(
59+
table=table, database=database, use_pandas=True, client=client)
60+
pd.testing.assert_frame_equal(
61+
df_iris1.sort_values(by='index').set_index('index'),
62+
df_iris2.sort_values(by='index').set_index('index')
63+
)
64+
finally:
65+
civis.io.query_civis("DROP TABLE IF EXISTS %s" % table,
66+
database=database, client=client)
67+
68+
# Test uploading and downloading file.
69+
logger.info('Testing File uploading and downloading...')
70+
buf = io.BytesIO()
71+
csv_bytes1 = df_iris1.to_csv(index=False).encode('utf-8')
72+
buf.write(csv_bytes1)
73+
buf.seek(0)
74+
file_id = civis.io.file_to_civis(
75+
buf, name="civis-python test file", client=client)
76+
buf.seek(0)
77+
civis.io.civis_to_file(file_id, buf, client=client)
78+
buf.seek(0)
79+
csv_bytes2 = buf.read()
80+
assert csv_bytes1 == csv_bytes2, "File upload/download did not match."
81+
82+
# Test modeling.
83+
logger.info('Testing Civis-ML...')
84+
mp = civis.ml.ModelPipeline(
85+
model="sparse_logistic",
86+
dependent_variable="type",
87+
primary_key="index",
88+
client=client
89+
)
90+
result = mp.train(
91+
table_name="datascience.iris", database_name=database).result()
92+
assert result['state'] == 'succeeded'
93+
94+
logger.info("%.1f seconds elapsed in total.", time.time() - t0)
95+
96+
97+
if __name__ == '__main__':
98+
main()

0 commit comments

Comments
 (0)