@@ -24,24 +24,71 @@ touch analysis/main.py
2424curl -Lo rawdata.zip https://github.com/UofT-DSI/shell/raw/refs/heads/main/02_activities/assignments/rawdata.zip
2525unzip -q rawdata.zip
2626
27+ # ##########################################
28+ # Complete assignment here
29+ # 1. Create a directory named data
30+ mkdir data
31+
32+ # 2. Move the ./rawdata directory to ./data/raw
33+ mv rawdata data/raw
34+
35+ # 3. List the contents of the ./data/raw directory
36+ ls data/raw
37+
38+ # 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs
39+ mkdir -p data/processed/{server_logs,user_logs,event_logs}
40+
41+ # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs
42+ cp data/raw/* server* .log data/processed/server_logs/ 2> /dev/null || true
43+
44+ # 6. Repeat the above step for user logs and event logs
45+ cp data/raw/* user* .log data/processed/user_logs/ 2> /dev/null || true
46+ cp data/raw/* event* .log data/processed/event_logs/ 2> /dev/null || true
47+
48+ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs
49+ rm -f data/raw/* ipaddr* 2> /dev/null || true
50+ rm -f data/processed/user_logs/* ipaddr* 2> /dev/null || true
51+
52+ # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed
53+ find data/processed -type f > data/inventory.txt
54+
55+ # ##########################################
56+
57+ echo " Project setup is complete!"
58+
59+
60+
61+
2762# ##########################################
2863# Complete assignment here
2964
3065# 1. Create a directory named data
66+ mkdir data
3167
3268# 2. Move the ./rawdata directory to ./data/raw
69+ mv rawdata data/raw
3370
3471# 3. List the contents of the ./data/raw directory
72+ ls data/raw
3573
3674# 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs
75+ mkdir -p data/processed/server_logs
76+ mkdir -p data/processed/user_logs
77+ mkdir -p data/processed/event_logs
3778
3879# 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs
80+ cp data/raw/* server* .log data/processed/server_logs/
3981
4082# 6. Repeat the above step for user logs and event logs
83+ cp data/raw/* user* .log data/processed/user_logs/
84+ cp data/raw/* event* .log data/processed/event_logs/
4185
4286# 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs
87+ rm data/raw/* ipaddr*
88+ rm data/processed/user_logs/* ipaddr*
4389
4490# 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed
91+ find data/processed -type f > data/inventory.txt
4592
4693
4794# ##########################################
0 commit comments