-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmain.py
More file actions
109 lines (89 loc) · 3.43 KB
/
main.py
File metadata and controls
109 lines (89 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import yaml
import logging
import shutil
from datetime import datetime
from sqlalchemy.engine import URL
#from sqlalchemy.engine.url import URL
from sqlalchemy.pool import NullPool
from triage.util.db import create_engine
from triage.component.timechop import Timechop
from triage.component.timechop.plotting import visualize_chops
from triage.component.architect.feature_generators import FeatureGenerator
from triage.experiments import MultiCoreExperiment, SingleThreadedExperiment
# import os
# os.chdir('donors-choose')
now = datetime.now()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(name)-30s %(asctime)s %(levelname)10s %(process)6d %(filename)-24s %(lineno)4d: %(message)s', '%d/%m/%Y %I:%M:%S %p')
fh = logging.FileHandler(f'triage_{now}.log', mode='w')
fh.setFormatter(formatter)
logger.addHandler(fh)
# creating db engine
## check if credentials are stored as environment variables or on yaml file.
environment_variable = os.getenv('PGUSER')
if environment_variable is not None:
db_url = URL.create(
'postgresql+psycopg2',
host=os.getenv('PGHOST'),
username=os.getenv('PGUSER'),
database=os.getenv('PGDATABASE'),
password=os.getenv('PGPASSWORD'),
port=5432,
)
## db credentials not setup on environment variables
else:
with open("database.yaml", "r") as f:
config = yaml.safe_load(f)
db_url = URL.create(
'postgresql+psycopg2',
host=config['host'],
username=config['user'],
password=config['pass'],
database=config['db'],
port=config['port']
)
db_engine = create_engine(db_url)
# loading config file
config_file = 'donors-choose-config.yaml'
with open(config_file, 'r') as fin:
config = yaml.safe_load(fin)
# generating temporal config plot
chopper = Timechop(**config['temporal_config'])
# We aren't interested in seeing the entire feature_start_time represented
# in our timechop plot. That would hide the interesting information. So we
# set it to equal label_start_time for the plot.
chopper.feature_start_time = chopper.label_start_time
visualize_chops(chopper, save_target = 'triage_output/timechop.png')
# creating experiment object
## Multi core
experiment = MultiCoreExperiment(
config = config,
db_engine = db_engine,
project_path = 's3://dsapp-education-migrated/donors-choose',
n_processes=2,
n_db_processes=2,
replace=True,
save_predictions=False
)
# If single threaded, uncomment the following lines
# experiment = SingleThreadedExperiment(
# config = config,
# db_engine = db_engine,
# project_path = 's3://dsapp-education-migrated/donors-choose',
# replace=True,
# save_predictions=False
# )
# Creating the Triage experiment Report
def generate_experiment_report():
# Path to where you save the notebook template in
template_path = './notebooks/experiment_summary_report_template.ipynb'
# Specify where you will save the executed notebook (recommend not to overwrite the template)
output_path = f'./notebooks/experiment_summary_report_{now.strftime("%Y%m%d")}.ipynb'
shutil.copyfile(template_path, output_path)
os.system(f'jupyter nbconvert --execute --inplace --to notebook {output_path}')
os.system(f'jupyter nbconvert {output_path} --to html')
experiment.validate()
experiment.run()
generate_experiment_report()