1717
1818class BaseAlgorithm :
1919 """
20- Base algorithm that rreads a pcap and updates the stored representation of
21- the source, to be used by more specific algorithms.
20+ Base algorithm class that reads a PCAP (packet capture file) and updates the
21+ stored representation of the source. The class can then be used by more
22+ specific algorithms.
2223 """
2324
24- def __init__ (self , files = None , config = None , model = None , model_hash = None , model_path = None ):
25+ def __init__ (self , files = None , config = None , model = None , model_hash = None ,
26+ model_path = None ):
27+
28+ ## Initiate logging information on this instance
2529 self .logger = logging .getLogger (__name__ )
2630 logging .basicConfig (level = logging .INFO )
2731 logging .getLogger ('pika' ).setLevel (logging .WARNING )
28-
2932 self .logger = Common ().setup_logger (self .logger )
3033 self .common = Common (config = config )
34+
35+ ## RabbitMQ acts as a message broker
3136 if self .common .use_rabbit :
32- self .common .connect_rabbit (host = self .common .rabbit_host , port = self .common .rabbit_port , exchange = self .common .rabbit_exchange ,
33- routing_key = self .common .rabbit_routing_key , queue = self .common .rabbit_queue , queue_name = self .common .rabbit_queue_name )
37+ self .common .connect_rabbit (host = self .common .rabbit_host ,
38+ port = self .common .rabbit_port ,
39+ exchange = self .common .rabbit_exchange ,
40+ routing_key = self .common .rabbit_routing_key ,
41+ queue = self .common .rabbit_queue ,
42+ queue_name = self .common .rabbit_queue_name )
43+
44+ ## Redis provides a storage capability
3445 if self .common .use_redis :
3546 self .common .connect_redis (host = self .common .redis_host )
3647
3748 if config :
3849 try :
50+ ## For description of these configuration values, see the
51+ ## README.md file in the networkml/configs folder
3952 self .time_const = config ['time constant' ]
4053 self .state_size = config ['state size' ]
4154 self .look_time = config ['look time' ]
@@ -47,12 +60,31 @@ def __init__(self, files=None, config=None, model=None, model_hash=None, model_p
4760 self .logger .error (
4861 'Unable to read config properly because: %s' , str (e ))
4962
50- self .files = files if files else []
63+ self .files = files if files else [] ## Store network capture files
5164 self .model = model
5265 self .model_hash = model_hash
5366 self .model_path = model_path
5467
5568 def eval (self , algorithm ):
69+ """
70+ This operation uses a specified algorithm to predict--for particular
71+ network traffic--what devices types are present and whether the device
72+ is acting normally or abnormally. This is the function that should be
73+ used in production when a user wants to actually employ networkML to
74+ classify and assess traffic.
75+
76+ Args:
77+ algorithm: type of algorithm (random forest, neural network, or
78+ stochastic outlier selection (SOS).
79+ """
80+
81+ ## The parsing operation below assumes a specific file naming
82+ ## convention trace_DeviceName-deviceID-time-duration-flags.pcap
83+ ## Explanation: All files coming from Poseidon have trace_ at their
84+ ## beginning. The device name and deviceID colums are self explanatory.
85+ ## Time refers to the day of the week and time of day. Duration refers
86+ ## to the length of the network traffic capture. The flags aspect
87+ ## refers to an unknown characteristic.
5688 for fi in self .files :
5789 self .logger .info ('Processing {0}...' .format (fi ))
5890 source_mac = None
@@ -66,22 +98,25 @@ def eval(self, algorithm):
6698 except Exception as e : # pragma: no cover
6799 self .logger .debug ('Could not get key because %s' , str (e ))
68100
69- # ignore misc files
101+ ## Ignore misc files
70102 if (split_path [- 1 ] == 'miscellaneous' ):
71103 continue
72104
73- # Get representations from the model
105+ ## Get representations from the model
74106 reps , source_mac , timestamps , preds , others , capture_ip_source = self .model .get_representation (
75107 str (fi ),
76108 source_ip = source_mac ,
77109 mean = False
78110 )
111+
112+ ## If no predictions are made, send a message with explanation
79113 if preds is None :
80114 message = {}
81115 message [key ] = {'valid' : False , 'pcap' : os .path .split (fi )[- 1 ]}
82116 uid = os .getenv ('id' , 'None' )
83117 file_path = os .getenv ('file_path' , 'None' )
84- message = {'id' : uid , 'type' : 'metadata' , 'file_path' : file_path , 'data' : message ,
118+ message = {'id' : uid , 'type' : 'metadata' , 'file_path' : file_path ,
119+ 'data' : message ,
85120 'results' : {'tool' : 'networkml' , 'version' : networkml .__version__ }}
86121 message = json .dumps (message )
87122 self .logger .info (
@@ -94,21 +129,12 @@ def eval(self, algorithm):
94129 delivery_mode = 2 ,))
95130 continue
96131
97- else :
132+ else : ## If a prediction is made, send message with prediction
98133 self .logger .debug ('Generating predictions' )
99134 last_update , prev_rep = self .common .get_previous_state (
100135 source_mac , timestamps [0 ])
101136
102- # TODO are these calls actually needed???
103- _ , mean_rep = self .common .average_representation (
104- reps ,
105- timestamps ,
106- prev_representation = prev_rep ,
107- last_update = last_update
108- )
109- mean_preds = self .model .classify_representation (mean_rep )
110-
111- # Update the stored representation
137+ ## Update the stored representation
112138 if reps is not None :
113139 self .logger .debug ('Updating stored data' )
114140 r_key = self .common .update_data (
@@ -120,9 +146,9 @@ def eval(self, algorithm):
120146 self .model_hash
121147 )
122148
123- # Get the sessions that the model looked at
149+ ## Get the sessions that the model looked at
124150 sessions = self .model .sessions
125- # Clean the sessions
151+ ## Clean the sessions
126152 clean_sessions = []
127153 inferred_mac = None
128154 for session_dict in sessions :
@@ -136,11 +162,15 @@ def eval(self, algorithm):
136162 if source_mac is None :
137163 source_mac = inferred_mac
138164
139- # Make simple decisions based on vector differences and update times
165+ ## Make simple decisions based on vector differences and update
166+ ## times
140167 timestamp = timestamps [0 ].timestamp ()
141168 labels , confs = zip (* preds )
142169 abnormality = 0.0
143170 has_avx = False
171+
172+ ## Check if CPU supports AVX (advanced vector extension),
173+ ## which speeds up certain calculations
144174 if 'flags' in get_cpu_info () and ('avx' in get_cpu_info ()['flags' ] or 'avx2' in get_cpu_info ()['flags' ]):
145175 has_avx = True
146176 if has_avx :
@@ -151,6 +181,8 @@ def eval(self, algorithm):
151181 else :
152182 self .logger .warning (
153183 "Can't run abnormality detection because this CPU doesn't support AVX" )
184+
185+ ##
154186 prev_s = self .common .get_address_info (
155187 source_mac ,
156188 timestamp
0 commit comments