Skip to content

Commit 413e8ab

Browse files
committed
dbutils
1 parent 410c5cb commit 413e8ab

File tree

7 files changed

+62
-58
lines changed

7 files changed

+62
-58
lines changed

.DS_Store

6 KB
Binary file not shown.

build/lib/data_setup/test_data_setup.py

+30-28
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,37 @@
1-
class test_dataset:
1+
from pyspark.dbutils import DBUtils
22

3-
# spend_csv contains:
4-
# 3 "standard spends"
5-
# 1 spend with Null age
6-
# 1 spend with null ID
7-
# 1 spend with incompatible schema (ID as string)
8-
spend_csv = """id,age,annual_income,spending_core
9-
3,47,858.9,99.4
10-
1,47,861.9,48.1
11-
2,97,486.4,880.8
12-
4,,283.8,117.8
13-
,95,847.5,840.9
14-
invalid_id,1,514.5,284.5"""
3+
class test_data:
4+
def __init__(self):
5+
# spend_csv contains:
6+
# 3 "standard spends"
7+
# 1 spend with Null age
8+
# 1 spend with null ID
9+
# 1 spend with incompatible schema (ID as string)
10+
self.spend_csv = """id,age,annual_income,spending_core
11+
3,47,858.9,99.4
12+
1,47,861.9,48.1
13+
2,97,486.4,880.8
14+
4,,283.8,117.8
15+
,95,847.5,840.9
16+
invalid_id,1,514.5,284.5"""
1517

16-
# users_json contains:
17-
# 4 "standard users"
18-
# 1 user with Null ID
19-
# 1 user with an ID as a string
20-
users_json = """{"id":1,"email":"[email protected]","creation_date":"11-28-2021 12:08:46","last_activity_date":"08-20-2021 08:24:44","firstname":"Randall","lastname":"Espinoza","address":"71571 Jennifer Creek - East John, CO 81653","city":"Port Nicholas","last_ip":"22.207.225.77","postcode":"62389"}
21-
{"id":4,"email":"[email protected]","creation_date":"06-30-2022 22:51:30","last_activity_date":"08-22-2021 17:25:06","firstname":"Jose","lastname":"Bell","address":"865 Young Crest - Lake Adriennebury, VA 67749","city":"Brownstad","last_ip":"159.111.101.250","postcode":"52432"}
22-
{"id":0,"email":"[email protected]","creation_date":"10-21-2021 02:37:38","last_activity_date":"07-22-2021 15:06:48","firstname":"Dylan","lastname":"Barber","address":"7995 Ronald Flat Suite 597 - Williefurt, AL 37894","city":"Port Steven","last_ip":"173.88.213.168","postcode":"58368"}
23-
{"id":3,"email":"[email protected]","creation_date":"07-06-2022 12:27:24","last_activity_date":"01-09-2022 15:04:45","firstname":"Phillip","lastname":"Morgan","address":"523 Garza Crossroad - New Maryview, OK 92301","city":"Julieshire","last_ip":"170.233.120.199","postcode":"34528"}
24-
{"id":2,"email":"[email protected]","creation_date":"09-10-2021 02:31:37","last_activity_date":"01-11-2022 20:39:01","firstname":"Gregory","lastname":"Crane","address":"068 Shawn Port - West Jessica, KS 84864","city":"South Tonya","last_ip":"192.220.63.96","postcode":"88033"}
25-
{"email":"[email protected]","creation_date":"05-28-2022 09:54:50","last_activity_date":"12-18-2021 21:48:48","firstname":"Jeremy","lastname":"Knight","address":"06183 Acevedo Bypass - Petermouth, ME 34177","city":"West Brianburgh","last_ip":"53.240.159.208","postcode":"73380"}
26-
{"id":"invalid ID","email":"[email protected]","creation_date":"12-20-2021 19:57:28","last_activity_date":"07-27-2021 09:39:28","firstname":"Angela","lastname":"Adams","address":"098 Daniel Ferry Suite 565 - South Andrea, ND 36326","city":"New Mariafort","last_ip":"7.176.250.65","postcode":"21300"}"""
18+
# users_json contains:
19+
# 4 "standard users"
20+
# 1 user with Null ID
21+
# 1 user with an ID as a string
22+
self.users_json = """{"id":1,"email":"[email protected]","creation_date":"11-28-2021 12:08:46","last_activity_date":"08-20-2021 08:24:44","firstname":"Randall","lastname":"Espinoza","address":"71571 Jennifer Creek - East John, CO 81653","city":"Port Nicholas","last_ip":"22.207.225.77","postcode":"62389"}
23+
{"id":4,"email":"[email protected]","creation_date":"06-30-2022 22:51:30","last_activity_date":"08-22-2021 17:25:06","firstname":"Jose","lastname":"Bell","address":"865 Young Crest - Lake Adriennebury, VA 67749","city":"Brownstad","last_ip":"159.111.101.250","postcode":"52432"}
24+
{"id":0,"email":"[email protected]","creation_date":"10-21-2021 02:37:38","last_activity_date":"07-22-2021 15:06:48","firstname":"Dylan","lastname":"Barber","address":"7995 Ronald Flat Suite 597 - Williefurt, AL 37894","city":"Port Steven","last_ip":"173.88.213.168","postcode":"58368"}
25+
{"id":3,"email":"[email protected]","creation_date":"07-06-2022 12:27:24","last_activity_date":"01-09-2022 15:04:45","firstname":"Phillip","lastname":"Morgan","address":"523 Garza Crossroad - New Maryview, OK 92301","city":"Julieshire","last_ip":"170.233.120.199","postcode":"34528"}
26+
{"id":2,"email":"[email protected]","creation_date":"09-10-2021 02:31:37","last_activity_date":"01-11-2022 20:39:01","firstname":"Gregory","lastname":"Crane","address":"068 Shawn Port - West Jessica, KS 84864","city":"South Tonya","last_ip":"192.220.63.96","postcode":"88033"}
27+
{"email":"[email protected]","creation_date":"05-28-2022 09:54:50","last_activity_date":"12-18-2021 21:48:48","firstname":"Jeremy","lastname":"Knight","address":"06183 Acevedo Bypass - Petermouth, ME 34177","city":"West Brianburgh","last_ip":"53.240.159.208","postcode":"73380"}
28+
{"id":"invalid ID","email":"[email protected]","creation_date":"12-20-2021 19:57:28","last_activity_date":"07-27-2021 09:39:28","firstname":"Angela","lastname":"Adams","address":"098 Daniel Ferry Suite 565 - South Andrea, ND 36326","city":"New Mariafort","last_ip":"7.176.250.65","postcode":"21300"}"""
2729

28-
def put_spend_csv():
29-
dbutils.fs.put('/Users/[email protected]/test_data/spend_csv/spend.csv', spend_csv, True)
30+
def put_spend_csv(self):
31+
dbutils.fs.put('/Users/[email protected]/test_data/spend_csv/spend.csv', self.spend_csv, True)
3032

31-
def put_users_json():
32-
dbutils.fs.put('/Users/[email protected]/test_data/users_json/users.json', users_json, True)
33+
def put_users_json(self):
34+
dbutils.fs.put('/Users/[email protected]/test_data/users_json/users.json', self.users_json, True)
3335

3436
# MAGIC %fs head /Users/[email protected]/test_data/users_json/users.json
3537

data_setup/test_data_setup.py

+30-28
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,37 @@
1-
class test_dataset:
1+
from pyspark.dbutils import DBUtils
22

3-
# spend_csv contains:
4-
# 3 "standard spends"
5-
# 1 spend with Null age
6-
# 1 spend with null ID
7-
# 1 spend with incompatible schema (ID as string)
8-
spend_csv = """id,age,annual_income,spending_core
9-
3,47,858.9,99.4
10-
1,47,861.9,48.1
11-
2,97,486.4,880.8
12-
4,,283.8,117.8
13-
,95,847.5,840.9
14-
invalid_id,1,514.5,284.5"""
3+
class test_data:
4+
def __init__(self):
5+
# spend_csv contains:
6+
# 3 "standard spends"
7+
# 1 spend with Null age
8+
# 1 spend with null ID
9+
# 1 spend with incompatible schema (ID as string)
10+
self.spend_csv = """id,age,annual_income,spending_core
11+
3,47,858.9,99.4
12+
1,47,861.9,48.1
13+
2,97,486.4,880.8
14+
4,,283.8,117.8
15+
,95,847.5,840.9
16+
invalid_id,1,514.5,284.5"""
1517

16-
# users_json contains:
17-
# 4 "standard users"
18-
# 1 user with Null ID
19-
# 1 user with an ID as a string
20-
users_json = """{"id":1,"email":"[email protected]","creation_date":"11-28-2021 12:08:46","last_activity_date":"08-20-2021 08:24:44","firstname":"Randall","lastname":"Espinoza","address":"71571 Jennifer Creek - East John, CO 81653","city":"Port Nicholas","last_ip":"22.207.225.77","postcode":"62389"}
21-
{"id":4,"email":"[email protected]","creation_date":"06-30-2022 22:51:30","last_activity_date":"08-22-2021 17:25:06","firstname":"Jose","lastname":"Bell","address":"865 Young Crest - Lake Adriennebury, VA 67749","city":"Brownstad","last_ip":"159.111.101.250","postcode":"52432"}
22-
{"id":0,"email":"[email protected]","creation_date":"10-21-2021 02:37:38","last_activity_date":"07-22-2021 15:06:48","firstname":"Dylan","lastname":"Barber","address":"7995 Ronald Flat Suite 597 - Williefurt, AL 37894","city":"Port Steven","last_ip":"173.88.213.168","postcode":"58368"}
23-
{"id":3,"email":"[email protected]","creation_date":"07-06-2022 12:27:24","last_activity_date":"01-09-2022 15:04:45","firstname":"Phillip","lastname":"Morgan","address":"523 Garza Crossroad - New Maryview, OK 92301","city":"Julieshire","last_ip":"170.233.120.199","postcode":"34528"}
24-
{"id":2,"email":"[email protected]","creation_date":"09-10-2021 02:31:37","last_activity_date":"01-11-2022 20:39:01","firstname":"Gregory","lastname":"Crane","address":"068 Shawn Port - West Jessica, KS 84864","city":"South Tonya","last_ip":"192.220.63.96","postcode":"88033"}
25-
{"email":"[email protected]","creation_date":"05-28-2022 09:54:50","last_activity_date":"12-18-2021 21:48:48","firstname":"Jeremy","lastname":"Knight","address":"06183 Acevedo Bypass - Petermouth, ME 34177","city":"West Brianburgh","last_ip":"53.240.159.208","postcode":"73380"}
26-
{"id":"invalid ID","email":"[email protected]","creation_date":"12-20-2021 19:57:28","last_activity_date":"07-27-2021 09:39:28","firstname":"Angela","lastname":"Adams","address":"098 Daniel Ferry Suite 565 - South Andrea, ND 36326","city":"New Mariafort","last_ip":"7.176.250.65","postcode":"21300"}"""
18+
# users_json contains:
19+
# 4 "standard users"
20+
# 1 user with Null ID
21+
# 1 user with an ID as a string
22+
self.users_json = """{"id":1,"email":"[email protected]","creation_date":"11-28-2021 12:08:46","last_activity_date":"08-20-2021 08:24:44","firstname":"Randall","lastname":"Espinoza","address":"71571 Jennifer Creek - East John, CO 81653","city":"Port Nicholas","last_ip":"22.207.225.77","postcode":"62389"}
23+
{"id":4,"email":"[email protected]","creation_date":"06-30-2022 22:51:30","last_activity_date":"08-22-2021 17:25:06","firstname":"Jose","lastname":"Bell","address":"865 Young Crest - Lake Adriennebury, VA 67749","city":"Brownstad","last_ip":"159.111.101.250","postcode":"52432"}
24+
{"id":0,"email":"[email protected]","creation_date":"10-21-2021 02:37:38","last_activity_date":"07-22-2021 15:06:48","firstname":"Dylan","lastname":"Barber","address":"7995 Ronald Flat Suite 597 - Williefurt, AL 37894","city":"Port Steven","last_ip":"173.88.213.168","postcode":"58368"}
25+
{"id":3,"email":"[email protected]","creation_date":"07-06-2022 12:27:24","last_activity_date":"01-09-2022 15:04:45","firstname":"Phillip","lastname":"Morgan","address":"523 Garza Crossroad - New Maryview, OK 92301","city":"Julieshire","last_ip":"170.233.120.199","postcode":"34528"}
26+
{"id":2,"email":"[email protected]","creation_date":"09-10-2021 02:31:37","last_activity_date":"01-11-2022 20:39:01","firstname":"Gregory","lastname":"Crane","address":"068 Shawn Port - West Jessica, KS 84864","city":"South Tonya","last_ip":"192.220.63.96","postcode":"88033"}
27+
{"email":"[email protected]","creation_date":"05-28-2022 09:54:50","last_activity_date":"12-18-2021 21:48:48","firstname":"Jeremy","lastname":"Knight","address":"06183 Acevedo Bypass - Petermouth, ME 34177","city":"West Brianburgh","last_ip":"53.240.159.208","postcode":"73380"}
28+
{"id":"invalid ID","email":"[email protected]","creation_date":"12-20-2021 19:57:28","last_activity_date":"07-27-2021 09:39:28","firstname":"Angela","lastname":"Adams","address":"098 Daniel Ferry Suite 565 - South Andrea, ND 36326","city":"New Mariafort","last_ip":"7.176.250.65","postcode":"21300"}"""
2729

28-
def put_spend_csv():
29-
dbutils.fs.put('/Users/[email protected]/test_data/spend_csv/spend.csv', spend_csv, True)
30+
def put_spend_csv(self):
31+
dbutils.fs.put('/Users/[email protected]/test_data/spend_csv/spend.csv', self.spend_csv, True)
3032

31-
def put_users_json():
32-
dbutils.fs.put('/Users/[email protected]/test_data/users_json/users.json', users_json, True)
33+
def put_users_json(self):
34+
dbutils.fs.put('/Users/[email protected]/test_data/users_json/users.json', self.users_json, True)
3335

3436
# MAGIC %fs head /Users/[email protected]/test_data/users_json/users.json
3537

21 Bytes
Binary file not shown.
2.58 KB
Binary file not shown.

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
setuptools.setup(
44
name="test_data_setup",
5-
version="0.0.1",
5+
version="0.0.3",
66
packages=setuptools.find_packages(),
77
)

test_data_setup.egg-info/PKG-INFO

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Metadata-Version: 2.1
22
Name: test-data-setup
3-
Version: 0.0.1
3+
Version: 0.0.3

0 commit comments

Comments
 (0)