1
+ import json
1
2
import os
2
- from ipaddress import IPv4Address , IPv6Address
3
3
import pandas as pd
4
4
5
5
from navv .zeek import perform_zeekcut
6
- from navv .utilities import get_mac_vendor
6
+ from navv .utilities import get_mac_vendor , timeit
7
7
from navv .validators import is_ipv4_address , is_ipv6_address
8
8
9
9
10
+ MAC_VENDORS_JSON_FILE = os .path .abspath (__file__ + "/../" + "data/mac-vendors.json" )
11
+
12
+
10
13
def get_zeek_data (zeek_logs ):
11
14
"""Return a list of Zeek conn.log data."""
12
15
return (
@@ -27,16 +30,31 @@ def get_zeek_data(zeek_logs):
27
30
)
28
31
29
32
30
- def get_zeek_df (zeek_data : list ):
31
- """Return a pandas dataframe of the conn.log data."""
33
+ def get_zeek_df (zeek_data : list , dns_data : dict ):
34
+ """Return a pandas dataframe of the conn.log data with its dns data ."""
32
35
zeek_data = [row .split ("\t " ) for row in zeek_data ]
36
+ # Insert dns data to zeek data
37
+ for row in zeek_data :
38
+ row .insert (1 , dns_data .get (row [0 ], "" ))
39
+ row .insert (3 , dns_data .get (row [2 ], "" ))
33
40
34
41
return pd .DataFrame (
35
42
zeek_data ,
36
- columns = ["src_ip" , "dst_ip" , "port" , "proto" , "conn" , "src_mac" , "dst_mac" ],
43
+ columns = [
44
+ "src_ip" ,
45
+ "src_hostname" ,
46
+ "dst_ip" ,
47
+ "dst_hostname" ,
48
+ "port" ,
49
+ "proto" ,
50
+ "conn" ,
51
+ "src_mac" ,
52
+ "dst_mac" ,
53
+ ],
37
54
)
38
55
39
56
57
+ @timeit
40
58
def get_inventory_report_df (zeek_df : pd .DataFrame ):
41
59
"""Return a pandas dataframe of the inventory report data."""
42
60
zeek_df ["port_and_proto" ] = zeek_df ["port" ] + "/" + zeek_df ["proto" ]
@@ -56,12 +74,30 @@ def get_inventory_report_df(zeek_df: pd.DataFrame):
56
74
)
57
75
58
76
src_df = zeek_df [
59
- ["src_mac" , "src_ipv4" , "src_ipv6" , "dst_ipv4" , "dst_ipv6" , "port_and_proto" ]
77
+ [
78
+ "src_mac" ,
79
+ "src_ipv4" ,
80
+ "src_hostname" ,
81
+ "src_ipv6" ,
82
+ "dst_ipv4" ,
83
+ "dst_hostname" ,
84
+ "dst_ipv6" ,
85
+ "port_and_proto" ,
86
+ ]
60
87
].reset_index (drop = True )
61
88
src_df ["mac" ] = src_df ["src_mac" ]
62
89
63
90
dst_df = zeek_df [
64
- ["dst_mac" , "src_ipv4" , "src_ipv6" , "dst_ipv4" , "dst_ipv6" , "port_and_proto" ]
91
+ [
92
+ "dst_mac" ,
93
+ "src_ipv4" ,
94
+ "src_hostname" ,
95
+ "src_ipv6" ,
96
+ "dst_ipv4" ,
97
+ "dst_hostname" ,
98
+ "dst_ipv6" ,
99
+ "port_and_proto" ,
100
+ ]
65
101
].reset_index (drop = True )
66
102
dst_df ["mac" ] = dst_df ["dst_mac" ]
67
103
@@ -70,33 +106,60 @@ def get_inventory_report_df(zeek_df: pd.DataFrame):
70
106
.reset_index (drop = True )
71
107
.drop (columns = ["src_mac" , "dst_mac" ])
72
108
.drop_duplicates (
73
- subset = ["src_ipv4" , "src_ipv6" , "dst_ipv4" , "dst_ipv6" , "port_and_proto" ]
109
+ subset = [
110
+ "src_ipv4" ,
111
+ "src_hostname" ,
112
+ "src_ipv6" ,
113
+ "dst_ipv4" ,
114
+ "dst_hostname" ,
115
+ "dst_ipv6" ,
116
+ "port_and_proto" ,
117
+ ]
74
118
)
75
119
)
76
- df ["vendor" ] = df ["mac" ].apply (lambda mac : get_mac_vendor (mac ))
77
120
78
121
grouped_df = (
79
122
df .groupby ("mac" , as_index = False )
80
123
.agg (
81
124
{
82
125
"src_ipv4" : list ,
126
+ "src_hostname" : list ,
83
127
"src_ipv6" : list ,
84
128
"dst_ipv4" : list ,
129
+ "dst_hostname" : list ,
85
130
"dst_ipv6" : list ,
86
131
"port_and_proto" : list ,
87
132
}
88
133
)
89
134
.reset_index ()
90
135
)
91
- grouped_df ["vendor" ] = grouped_df ["mac" ].apply (lambda mac : get_mac_vendor (mac ))
136
+
137
+ mac_vendors = {}
138
+ with open (MAC_VENDORS_JSON_FILE ) as f :
139
+ mac_vendors = json .load (f )
140
+ grouped_df ["vendor" ] = grouped_df ["mac" ].apply (
141
+ lambda mac : get_mac_vendor (mac_vendors , mac )
142
+ )
92
143
grouped_df ["ipv4" ] = (grouped_df ["src_ipv4" ] + grouped_df ["dst_ipv4" ]).apply (
93
144
lambda ip : list (set (ip ))
94
145
)
95
146
grouped_df ["ipv6" ] = (grouped_df ["src_ipv6" ] + grouped_df ["dst_ipv6" ]).apply (
96
147
lambda ip : list (set (ip ))
97
148
)
149
+ grouped_df ["hostname" ] = (
150
+ grouped_df ["src_hostname" ] + grouped_df ["dst_hostname" ]
151
+ ).apply (lambda hostname : list (set (hostname )))
152
+
98
153
grouped_df .drop (
99
- columns = ["src_ipv4" , "src_ipv6" , "dst_ipv4" , "dst_ipv6" ], inplace = True
154
+ columns = [
155
+ "src_ipv4" ,
156
+ "src_hostname" ,
157
+ "src_ipv6" ,
158
+ "dst_ipv4" ,
159
+ "dst_hostname" ,
160
+ "dst_ipv6" ,
161
+ ],
162
+ inplace = True ,
100
163
)
101
164
102
165
return grouped_df
0 commit comments