realtime log:
- python realtime_data.py -t "2017-01-01 05:00:00" -p "/var/lib/docker/volumes/hadoop/_data/weblog/log"
- python realtime_data.py -t "2017-01-01 05:00:00"
history log:
- python realtime_data_history.py -t "2017-01-01 05:00:00" -p "/var/lib/docker/volumes/hadoop/_data/weblog/history-log"
files:
- user_agent.csv: user agent
- uuid.csv: user id
- host_url.csv: url and category
- hivename.csv: uuid and user name
- inteval.csv: time inteval
folders:
- es-log: elasticsearch log (hot-dataflow)
- hadoop-log: hadoop hdfs (cold-dataflow)
- history-log: history log
- log: realtime log
- SH: ETL shell
- SQL: ETL SQL