-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerator.py
More file actions
118 lines (93 loc) · 4.51 KB
/
Copy pathgenerator.py
File metadata and controls
118 lines (93 loc) · 4.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Warning: this python script is not meant to be used by the end users. The purpose of this script is to simplify the
# process of creating data sets to use in this prototype. If you want to use this prototype to learn about
# privacy-preserving data publishing you do not need to run this python script.
# Use at your own risk: The author of this script shall not be held liable for possible damages that this script could
# cause.
import random
from random import randrange
table_xy = "name,last name,age,gender,city,disease\n"
table_disease = "name,last name,age,gender,city,disease\n"
table_job = "name,last name,age,gender,city,job\n"
table_job = "id,age,gender,city,job,salary\n"
genders = ["Male", "Female"]
# http://nameberry.com/popular_names/US
girls = ["Emma", "Olivia", "Sophia", "Ava", "Isabella", "Abigal", "Emily", "Charlotte", "Harper", "Madison", "Amelia",
"Elizabeth"]
boys = ["Noah", "Laim", "Mason", "Jacob", "Willia", "Ethan", "Daniel", "Alexander", "Michael", "Benjamin", "Elijah",
"Aiden"]
# http://www.ranker.com/list/list-of-common-diseases-most-common-illnesses/diseases-and-medications-info
# http://www.medicinenet.com/diseases_and_conditions/a
diseases = ["Acne", "Cancer", "Dandruff", "Epilepsy", "Asthma", "Hair Loss"]
# http://www.studentartguide.com/articles/art-careers-list
jobs2 = ["Systems analyst", "Programmer", "System admin", "IT manager", "Designer", "Printmaker", "Sculptor", "Artist",
"Physician", "Nurse", "Therapist", "Pharmacist", "Dentist", "Technician", "Pathologist", "Psychiatric",
"Surgeon", "Novelist", "Painter", "Engineer", "Supervisor", "Support", "Architect", "Technical Lead"]
jobs = ["Programmer"]*50
jobs += ["System admin"]*50
jobs += ["Designer"]*50
jobs += ["Printmaker"]*50
jobs += ["Artist"]*50
jobs += ["Physician"]*50
jobs += ["Therapist"]*50
jobs += ["Painter"]*50
jobs += ["Engineer"]*50
jobs += ["Supervisor"]*35
jobs += ["Support"]*15
cities = ["Tallinn", "Rapla", "Vändra", "Türi", "Paide", "Põltsamaa", "Viljandi", "Elva", "Tartu", "Jõgeva", "Viljandi"]
# http://www.infoplease.com/ipa/A0778413.html
last_names = ["Smith", "Johnson", "Williams", "Jones", "Brown", "Davis", "Miller", "Wilson", "Moore", "Taylor",
"Anderson", "Thomas", "Jackson", "White", "Harris", "Martin", "Thompson", "Garcia", "Martinez",
"Robinson"]
table_person = "id,name,last name,age,gender\n"
table_job_rel = "id,job\n"
used_names = []
def generateName(gender):
global used_names
while True:
if (gender == "Male"):
name = random.choice(boys)
else:
name = random.choice(girls)
last_name = random.choice(last_names)
if (not (name + last_name in used_names)):
used_names.append(name + last_name)
return name, last_name
for i in range(0, 400):
gender = random.choice(genders)
#name, last_name = generateName(gender)
#job = random.choice(jobs)
job = jobs.pop()
disease = random.choice(diseases)
age = randrange(20, 70)
salary = (randrange(1000, 3000)/100)*100
num_diseases = randrange(1, 4)
city = random.choice(cities)
#for j in range(0, num_diseases):
# table_xy += str(name) + "," + str(last_name) + "," + str(age) + "," + gender + "," + city + "," + random.choice(
# diseases) + "\n"
if i < 1500:
#table_disease += str(name) + "," + str(last_name) + "," + str(
# age) + "," + gender + "," + city + "," + disease + "\n"
#table_job += str(name) + "," + str(last_name) + "," + str(age) + "," + gender + "," + city + "," + job + "\n"
table_job += str(i) + "," + str(age) + "," + gender + "," + city + "," + job + "," + str(salary) + "\n"
#table_job_rel += str(i) + "," + str(job) + "\n"
#table_person += str(i) + "," + str(name) + "," + str(last_name) + "," + str(age) + "," + gender + "\n"
"""
with open("src/main/resources/schema/disease_xy.csv","w") as f:
f.write(table_xy)
with open("src/main/resources/schema/disease.csv","w") as f:
f.write(table_disease)
with open("src/main/resources/schema/job.csv","w") as f:
f.write(table_job)
with open("src/main/resources/schema/job_rel.csv","w") as f:
f.write(table_job_rel)
with open("src/main/resources/schema/person_rel.csv","w") as f:
f.write(table_person)
with open("src/main/resources/schema/disease_xy.csv","w") as f:
f.write(table_xy)
"""
with open("src/main/resources/schema/Job_Large_Salary.csv", "w") as f:
f.write(table_job)
# print(table_xy)