Skip to content

Commit 09e0d12

Browse files
authored
Merge pull request #451 from jspeed-meyers/add_more_comments
added more comments
2 parents 839b890 + 43d6a12 commit 09e0d12

3 files changed

Lines changed: 59 additions & 30 deletions

File tree

Dockerfile

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:slim
1+
FROM python:3.7-slim
22
LABEL maintainer="Charlie Lewis <clewis@iqt.org>"
33

44
COPY requirements.txt requirements.txt
@@ -10,12 +10,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1010
python3 \
1111
python3-dev \
1212
tcpdump \
13-
&& pip3 install --no-cache-dir --upgrade pip==19.1.1 \
13+
&& pip3 install --no-cache-dir --upgrade pip==19.3.1 \
1414
&& pip3 install wheel \
1515
&& pip3 install --no-cache-dir -r requirements.txt\
16-
&& curl -O https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.14.0-cp37-cp37m-linux_x86_64.whl \
17-
&& pip3 install tensorflow-1.14.0-cp37-cp37m-linux_x86_64.whl \
18-
&& rm -rf tensorflow-1.14.0-cp37-cp37m-linux_x86_64.whl \
1916
&& apt-get remove --purge --auto-remove -y curl gcc git python3-dev \
2017
&& apt-get clean \
2118
&& apt-get autoclean \

networkml/algorithms/base.py

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,38 @@
1717

1818
class BaseAlgorithm:
1919
"""
20-
Base algorithm that rreads a pcap and updates the stored representation of
21-
the source, to be used by more specific algorithms.
20+
Base algorithm class that reads a PCAP (packet capture file) and updates the
21+
stored representation of the source. The class can then be used by more
22+
specific algorithms.
2223
"""
2324

24-
def __init__(self, files=None, config=None, model=None, model_hash=None, model_path=None):
25+
def __init__(self, files=None, config=None, model=None, model_hash=None,
26+
model_path=None):
27+
28+
## Initiate logging information on this instance
2529
self.logger = logging.getLogger(__name__)
2630
logging.basicConfig(level=logging.INFO)
2731
logging.getLogger('pika').setLevel(logging.WARNING)
28-
2932
self.logger = Common().setup_logger(self.logger)
3033
self.common = Common(config=config)
34+
35+
## RabbitMQ acts as a message broker
3136
if self.common.use_rabbit:
32-
self.common.connect_rabbit(host=self.common.rabbit_host, port=self.common.rabbit_port, exchange=self.common.rabbit_exchange,
33-
routing_key=self.common.rabbit_routing_key, queue=self.common.rabbit_queue, queue_name=self.common.rabbit_queue_name)
37+
self.common.connect_rabbit(host=self.common.rabbit_host,
38+
port=self.common.rabbit_port,
39+
exchange=self.common.rabbit_exchange,
40+
routing_key=self.common.rabbit_routing_key,
41+
queue=self.common.rabbit_queue,
42+
queue_name=self.common.rabbit_queue_name)
43+
44+
## Redis provides a storage capability
3445
if self.common.use_redis:
3546
self.common.connect_redis(host=self.common.redis_host)
3647

3748
if config:
3849
try:
50+
## For description of these configuration values, see the
51+
## README.md file in the networkml/configs folder
3952
self.time_const = config['time constant']
4053
self.state_size = config['state size']
4154
self.look_time = config['look time']
@@ -47,12 +60,31 @@ def __init__(self, files=None, config=None, model=None, model_hash=None, model_p
4760
self.logger.error(
4861
'Unable to read config properly because: %s', str(e))
4962

50-
self.files = files if files else []
63+
self.files = files if files else [] ## Store network capture files
5164
self.model = model
5265
self.model_hash = model_hash
5366
self.model_path = model_path
5467

5568
def eval(self, algorithm):
69+
"""
70+
This operation uses a specified algorithm to predict--for particular
71+
network traffic--what devices types are present and whether the device
72+
is acting normally or abnormally. This is the function that should be
73+
used in production when a user wants to actually employ networkML to
74+
classify and assess traffic.
75+
76+
Args:
77+
algorithm: type of algorithm (random forest, neural network, or
78+
stochastic outlier selection (SOS).
79+
"""
80+
81+
## The parsing operation below assumes a specific file naming
82+
## convention trace_DeviceName-deviceID-time-duration-flags.pcap
83+
## Explanation: All files coming from Poseidon have trace_ at their
84+
## beginning. The device name and deviceID colums are self explanatory.
85+
## Time refers to the day of the week and time of day. Duration refers
86+
## to the length of the network traffic capture. The flags aspect
87+
## refers to an unknown characteristic.
5688
for fi in self.files:
5789
self.logger.info('Processing {0}...'.format(fi))
5890
source_mac = None
@@ -66,22 +98,25 @@ def eval(self, algorithm):
6698
except Exception as e: # pragma: no cover
6799
self.logger.debug('Could not get key because %s', str(e))
68100

69-
# ignore misc files
101+
## Ignore misc files
70102
if (split_path[-1] == 'miscellaneous'):
71103
continue
72104

73-
# Get representations from the model
105+
## Get representations from the model
74106
reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation(
75107
str(fi),
76108
source_ip=source_mac,
77109
mean=False
78110
)
111+
112+
## If no predictions are made, send a message with explanation
79113
if preds is None:
80114
message = {}
81115
message[key] = {'valid': False, 'pcap': os.path.split(fi)[-1]}
82116
uid = os.getenv('id', 'None')
83117
file_path = os.getenv('file_path', 'None')
84-
message = {'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': message,
118+
message = {'id': uid, 'type': 'metadata', 'file_path': file_path,
119+
'data': message,
85120
'results': {'tool': 'networkml', 'version': networkml.__version__}}
86121
message = json.dumps(message)
87122
self.logger.info(
@@ -94,21 +129,12 @@ def eval(self, algorithm):
94129
delivery_mode=2,))
95130
continue
96131

97-
else:
132+
else: ## If a prediction is made, send message with prediction
98133
self.logger.debug('Generating predictions')
99134
last_update, prev_rep = self.common.get_previous_state(
100135
source_mac, timestamps[0])
101136

102-
# TODO are these calls actually needed???
103-
_, mean_rep = self.common.average_representation(
104-
reps,
105-
timestamps,
106-
prev_representation=prev_rep,
107-
last_update=last_update
108-
)
109-
mean_preds = self.model.classify_representation(mean_rep)
110-
111-
# Update the stored representation
137+
## Update the stored representation
112138
if reps is not None:
113139
self.logger.debug('Updating stored data')
114140
r_key = self.common.update_data(
@@ -120,9 +146,9 @@ def eval(self, algorithm):
120146
self.model_hash
121147
)
122148

123-
# Get the sessions that the model looked at
149+
## Get the sessions that the model looked at
124150
sessions = self.model.sessions
125-
# Clean the sessions
151+
## Clean the sessions
126152
clean_sessions = []
127153
inferred_mac = None
128154
for session_dict in sessions:
@@ -136,11 +162,15 @@ def eval(self, algorithm):
136162
if source_mac is None:
137163
source_mac = inferred_mac
138164

139-
# Make simple decisions based on vector differences and update times
165+
## Make simple decisions based on vector differences and update
166+
## times
140167
timestamp = timestamps[0].timestamp()
141168
labels, confs = zip(*preds)
142169
abnormality = 0.0
143170
has_avx = False
171+
172+
## Check if CPU supports AVX (advanced vector extension),
173+
## which speeds up certain calculations
144174
if 'flags' in get_cpu_info() and ('avx' in get_cpu_info()['flags'] or 'avx2' in get_cpu_info()['flags']):
145175
has_avx = True
146176
if has_avx:
@@ -151,6 +181,8 @@ def eval(self, algorithm):
151181
else:
152182
self.logger.warning(
153183
"Can't run abnormality detection because this CPU doesn't support AVX")
184+
185+
##
154186
prev_s = self.common.get_address_info(
155187
source_mac,
156188
timestamp

networkml/utils/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import ast
1+
import ast ## Abstract syntax tree - this module helps in parsing code
22
import logging
33
import os
44

0 commit comments

Comments
 (0)