forked from openeventdata/phoenix_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpipeline.py
More file actions
109 lines (90 loc) · 4.02 KB
/
Copy pathpipeline.py
File metadata and controls
109 lines (90 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from __future__ import print_function
from __future__ import unicode_literals
import sys
import logging
import datetime
import uploader
import utilities
import formatter
import oneaday_formatter
import scraper_connection
from petrarch import petrarch
def main(file_details, server_details, logger_file=None, run_filter=None):
"""
Main function to run all the things.
Parameters
----------
file_details: Named tuple.
All the other config information not in ``server_details``.
server_details: Named tuple.
Config information specifically related to the remote
server for FTP uploading.
logger_file: String.
Path to a log file. Defaults to ``None`` and opens a
``PHOX_pipeline.log`` file in the current working
directory.
run_filter: String.
Whether to run the ``oneaday_formatter``. Takes True or False
(strings) as values.
"""
if logger_file:
utilities.init_logger(logger_file)
else:
utilities.init_logger('PHOX_pipeline.log')
# get a local copy for the pipeline
logger = logging.getLogger('pipeline_log')
print('\nPHOX.pipeline run:', datetime.datetime.utcnow())
if len(sys.argv) > 1:
date_string = sys.argv[1]
process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
logger.info('Date string: {}'.format(date_string))
print('Date string:', date_string)
else:
process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
process_date.month,
process_date.day)
logger.info('Date string: {}'.format(date_string))
print('Date string:', date_string)
results, scraperfilename = scraper_connection.main(process_date,
file_details)
if scraperfilename:
logger.info("Scraper file name: " + scraperfilename)
print("Scraper file name:", scraperfilename)
logger.info("Running Mongo.formatter.py")
print("Running Mongo.formatter.py")
formatted = formatter.main(results, file_details,
process_date, date_string)
logger.info("Running PETRARCH")
file_details.fullfile_stem + date_string
if run_filter == 'False':
print('Running PETRARCH and writing to a file. No one-a-day.')
logger.info('Running PETRARCH and writing to a file. No one-a-day.')
petrarch.run_pipeline(formatted,
'{}{}.txt'.format(file_details.fullfile_stem,
date_string), parsed=True)
results = ''
elif run_filter == 'True':
print('Running PETRARCH and returning output.')
logger.info('Running PETRARCH and returning output.')
petr_results = petrarch.run_pipeline(formatted, write_output=False,
parsed=True)
else:
print("Can't run with the options you've specified. You need to fix something.")
logger.warning("Can't run with the options you've specified. Exiting.")
sys.exit()
if run_filter == 'True':
logger.info("Running oneaday_formatter.py")
print("Running oneaday_formatter.py")
oneaday_formatter.main(petr_results, date_string, server_details,
file_details)
logger.info("Running phox_uploader.py")
print("Running phox_uploader.py")
uploader.main(date_string, server_details, file_details)
logger.info('PHOX.pipeline end')
print('PHOX.pipeline end:', datetime.datetime.utcnow())
if __name__ == '__main__':
# initialize the various utilities globals
server_details, file_details = utilities.parse_config('PHOX_config.ini')
main(file_details, server_details, file_details.log_file,
run_filter=file_details.oneaday_filter)