|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +""" |
| 4 | +This script tests end-to-end functionality using the Civis Python client. |
| 5 | +It uses the live Civis API and Redshift, so a valid CIVIS_API_KEY is needed. |
| 6 | +
|
| 7 | +This is based on a similar script for the R client: |
| 8 | +https://github.com/civisanalytics/civis-r/blob/master/tools/integration_tests/smoke_test.R |
| 9 | +""" |
| 10 | + |
| 11 | +import io |
| 12 | +import logging |
| 13 | +import time |
| 14 | + |
| 15 | +import civis |
| 16 | +import pandas as pd |
| 17 | +from sklearn.datasets import load_iris |
| 18 | + |
| 19 | + |
| 20 | +def main(): |
| 21 | + logging.basicConfig(format="", level=logging.INFO) |
| 22 | + |
| 23 | + logger = logging.getLogger("civis") |
| 24 | + |
| 25 | + t0 = time.time() |
| 26 | + |
| 27 | + database = "redshift-general" |
| 28 | + client = civis.APIClient(resources="all") |
| 29 | + |
| 30 | + # Test read_civis and read_civis_sql produce the same results. |
| 31 | + # The table used here has an explicit index column to sort by in case the |
| 32 | + # rows come back in a different order. |
| 33 | + logger.info('Testing reading from redshift...') |
| 34 | + sql = "SELECT * FROM datascience.iris" |
| 35 | + df1 = civis.io.read_civis_sql( |
| 36 | + sql=sql, database=database, use_pandas=True, client=client |
| 37 | + ).sort_values(by='index') |
| 38 | + df2 = civis.io.read_civis( |
| 39 | + table="datascience.iris", database=database, use_pandas=True, |
| 40 | + client=client |
| 41 | + ).sort_values(by='index') |
| 42 | + assert df1.shape == (150, 6) |
| 43 | + pd.testing.assert_frame_equal(df1, df2) |
| 44 | + |
| 45 | + # Test uploading data. |
| 46 | + logger.info('Testing uploading to redshift...') |
| 47 | + table = "scratch.smoke_test_{}".format(int(time.time())) |
| 48 | + iris = load_iris() |
| 49 | + df_iris1 = ( |
| 50 | + pd.DataFrame(iris.data) |
| 51 | + .rename(columns={0: 'c0', 1: 'c1', 2: 'c2', 3: 'c3'}) |
| 52 | + .join(pd.DataFrame(iris.target).rename(columns={0: 'label'})) |
| 53 | + .reset_index() |
| 54 | + ) |
| 55 | + try: |
| 56 | + civis.io.dataframe_to_civis( |
| 57 | + df_iris1, database, table, client=client).result() |
| 58 | + df_iris2 = civis.io.read_civis( |
| 59 | + table=table, database=database, use_pandas=True, client=client) |
| 60 | + pd.testing.assert_frame_equal( |
| 61 | + df_iris1.sort_values(by='index').set_index('index'), |
| 62 | + df_iris2.sort_values(by='index').set_index('index') |
| 63 | + ) |
| 64 | + finally: |
| 65 | + civis.io.query_civis("DROP TABLE IF EXISTS %s" % table, |
| 66 | + database=database, client=client) |
| 67 | + |
| 68 | + # Test uploading and downloading file. |
| 69 | + logger.info('Testing File uploading and downloading...') |
| 70 | + buf = io.BytesIO() |
| 71 | + csv_bytes1 = df_iris1.to_csv(index=False).encode('utf-8') |
| 72 | + buf.write(csv_bytes1) |
| 73 | + buf.seek(0) |
| 74 | + file_id = civis.io.file_to_civis( |
| 75 | + buf, name="civis-python test file", client=client) |
| 76 | + buf.seek(0) |
| 77 | + civis.io.civis_to_file(file_id, buf, client=client) |
| 78 | + buf.seek(0) |
| 79 | + csv_bytes2 = buf.read() |
| 80 | + assert csv_bytes1 == csv_bytes2, "File upload/download did not match." |
| 81 | + |
| 82 | + # Test modeling. |
| 83 | + logger.info('Testing Civis-ML...') |
| 84 | + mp = civis.ml.ModelPipeline( |
| 85 | + model="sparse_logistic", |
| 86 | + dependent_variable="type", |
| 87 | + primary_key="index", |
| 88 | + client=client |
| 89 | + ) |
| 90 | + result = mp.train( |
| 91 | + table_name="datascience.iris", database_name=database).result() |
| 92 | + assert result['state'] == 'succeeded' |
| 93 | + |
| 94 | + logger.info("%.1f seconds elapsed in total.", time.time() - t0) |
| 95 | + |
| 96 | + |
| 97 | +if __name__ == '__main__': |
| 98 | + main() |
0 commit comments