-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathController.py
159 lines (102 loc) · 5.81 KB
/
Controller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from Rss_crawler import Crawler
import header
import ConfigParser
import os.path
import logging
import sys
from AnalysisEngine import AnalysisEngine
import time
#
home = "D://SRA//Data//"
settings = ConfigParser.ConfigParser()
#settings._interpolation = ConfigParser.ExtendedInterpolation()
settings.read('Settings.config')
settings
def controller():
LEVELS = {'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'critical': logging.CRITICAL}
def setLoggingLevel():
try:
level = LEVELS.get(header.levelName , logging.NOTSET)
logging.basicConfig(filename=header.logFileName , level=level)
if level == logging.NOTSET:
logging.warning("Logging level not set properly. Allowed levels are : critical , error , warning , info , debug ")
except Exception as e:
print e.message
return
#home dir, default directory
home = "D://SRA//Data//"
settings = ConfigParser.ConfigParser()
#settings._interpolation = ConfigParser.ExtendedInterpolation()
settings.read('D://SRA//Data//Settings.config')
# Initialize level of logging.
header.levelName = settings.get('LOGGING_INPUTS', 'LEVEL_NAME')
# Initialize file name to which logging will be dumped.
log_dir_path = settings.get('LOGGING_INPUTS', 'LOG_DIR_PATH')
if not os.path.isdir(log_dir_path):
logging.warning("LOG_DIR_PATH not set properly. logs are redirected to log.txt placed at:" + home)
log_dir_path = home
header.logFileName = os.path.join(log_dir_path , "log.txt")
setLoggingLevel()
# File containing site information.
header.siteInfoFilePath = settings.get('CRAWLER_INPUTS', 'SITE_INFO_FILE_PATH')
if not os.path.isfile(header.siteInfoFilePath):
logging.warning("SITE_INFO_FILE_PATH not set properly")
header.siteInfoFilePath = os.path.join(home , "SiteTimestampInfo.p")
#sys.exit
# File storing list of important keywords.
header.keywordFilePath = settings.get('CRAWLER_INPUTS', 'KEYWORD_FILE_PATH')
if not os.path.isfile(header.keywordFilePath):
logging.critical("KEYWORD_FILE_PATH not set properly")
sys.exit
# File which will store crawled data.
out_dir_path = settings.get('CRAWLER_INPUTS', 'OUTPUT_DIR_PATH')
if not os.path.isdir(out_dir_path):
logging.warning("OUTPUT_DIR_PATH not set properly. Output.csv will be stored in " + home)
out_dir_path = home
header.outputFilePath = os.path.join(out_dir_path , "Output.csv")
#TODO it shud be dir rather than file
header.siteTimestampPickleFile = settings.get('CRAWLER_INPUTS' , 'SITE_PICKLE_DUMP_FILE')
# if not os.path.isfile(header.siteTimestampPickleFile):
# logging.critical("SITE_PICKLE_DUMP_FILE not set properly. Make sure path is correct")
# sys.exit
header.nounfilter = settings.get('ANALYSIS_ENGINE_INPUTS','NOUNFILTER')
header.trainingFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'TRAINING_FILE_PATH')
header.labelColNameRiskClassifier = settings.get('ANALYSIS_ENGINE_INPUTS' , 'LABEL_COL_NAME_RISK_CLASSIFIER')
header.textFieldRiskClassifier = (settings.get('ANALYSIS_ENGINE_INPUTS' , 'TEXT_FIELDS_RISK_CLASSIFIER')).split(',')
header.riskClassifierModelFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'RISK_CLASSIFIER_MODEL_FILE_PATH')
header.textFieldTopicClassifier = (settings.get('ANALYSIS_ENGINE_INPUTS' , 'TEXT_FIELDS_TOPIC_CLASSIFIER')).split(',')
header.labelColNameTopicClassifier = settings.get('ANALYSIS_ENGINE_INPUTS' , 'LABEL_COL_NAME_TOPIC_CLASSIFIER')
header.topicClassifierModelFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'TOPIC_CLASSIFIER_MODEL_FILE_PATH')
header.labelColNameSubTopicClassifier = settings.get('ANALYSIS_ENGINE_INPUTS' , 'LABEL_COL_NAME_SUB_TOPIC_CLASSIFIER' )
header.subTopicOneClassifierModelFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'SUB_TOPIC_ONE_CLASSIFIER_MODEL_FILE_PATH' )
header.subTopicTwoClassifierModelFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'SUB_TOPIC_TWO_CLASSIFIER_MODEL_FILE_PATH' )
header.subTopicThreeClassifierModelFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'SUB_TOPIC_THREE_CLASSIFIER_MODEL_FILE_PATH' )
header.subTopicFourClassifierModelFilePath = settings.get('ANALYSIS_ENGINE_INPUTS' , 'SUB_TOPIC_FOUR_CLASSIFIER_MODEL_FILE_PATH' )
if "true" in (settings.get('ANALYSIS_ENGINE_INPUTS' , 'IS_PARAM_TUNE')).lower():
header.isParamTune = True
header.riskClassifierRatioRemoveSkewness = float(settings.get('ANALYSIS_ENGINE_INPUTS' , 'RISK_CLASSIFIER_RATIO_REMOVE_SKEWNESS' ))
if "true" in (settings.get('ANALYSIS_ENGINE_INPUTS' , 'IS_REMOVE_SKEWNESS')).lower():
header.isRemoveSkewness = True
# Invoke Crawler to crawl data from sites
#while(True):
#crawler = Crawler()
#del crawler
# Run every 6hours
#time.sleep(21600)
# Commented to execute only analysis engine
# crawler = Crawler()
# del crawler
analyzer = AnalysisEngine()
## #
analyzer.performTrainingForAllTasks()
##
analyzer.analyze()
sys.exit
# Invoke Analysis Engine to process content of outputFilePath
#########################################################################
if __name__ == '__main__':
controller()