Skip to content

Commit bc7013c

Browse files
authored
Merge pull request #46 from cisagov/feature/excel-updates
feature: excel updates
2 parents 427b3a5 + 773018d commit bc7013c

File tree

6 files changed

+163
-133
lines changed

6 files changed

+163
-133
lines changed

src/navv/bll.py

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1+
import json
12
import os
2-
from ipaddress import IPv4Address, IPv6Address
33
import pandas as pd
44

55
from navv.zeek import perform_zeekcut
6-
from navv.utilities import get_mac_vendor
6+
from navv.utilities import get_mac_vendor, timeit
77
from navv.validators import is_ipv4_address, is_ipv6_address
88

99

10+
MAC_VENDORS_JSON_FILE = os.path.abspath(__file__ + "/../" + "data/mac-vendors.json")
11+
12+
1013
def get_zeek_data(zeek_logs):
1114
"""Return a list of Zeek conn.log data."""
1215
return (
@@ -27,16 +30,31 @@ def get_zeek_data(zeek_logs):
2730
)
2831

2932

30-
def get_zeek_df(zeek_data: list):
31-
"""Return a pandas dataframe of the conn.log data."""
33+
def get_zeek_df(zeek_data: list, dns_data: dict):
34+
"""Return a pandas dataframe of the conn.log data with its dns data."""
3235
zeek_data = [row.split("\t") for row in zeek_data]
36+
# Insert dns data to zeek data
37+
for row in zeek_data:
38+
row.insert(1, dns_data.get(row[0], ""))
39+
row.insert(3, dns_data.get(row[2], ""))
3340

3441
return pd.DataFrame(
3542
zeek_data,
36-
columns=["src_ip", "dst_ip", "port", "proto", "conn", "src_mac", "dst_mac"],
43+
columns=[
44+
"src_ip",
45+
"src_hostname",
46+
"dst_ip",
47+
"dst_hostname",
48+
"port",
49+
"proto",
50+
"conn",
51+
"src_mac",
52+
"dst_mac",
53+
],
3754
)
3855

3956

57+
@timeit
4058
def get_inventory_report_df(zeek_df: pd.DataFrame):
4159
"""Return a pandas dataframe of the inventory report data."""
4260
zeek_df["port_and_proto"] = zeek_df["port"] + "/" + zeek_df["proto"]
@@ -56,12 +74,30 @@ def get_inventory_report_df(zeek_df: pd.DataFrame):
5674
)
5775

5876
src_df = zeek_df[
59-
["src_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
77+
[
78+
"src_mac",
79+
"src_ipv4",
80+
"src_hostname",
81+
"src_ipv6",
82+
"dst_ipv4",
83+
"dst_hostname",
84+
"dst_ipv6",
85+
"port_and_proto",
86+
]
6087
].reset_index(drop=True)
6188
src_df["mac"] = src_df["src_mac"]
6289

6390
dst_df = zeek_df[
64-
["dst_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
91+
[
92+
"dst_mac",
93+
"src_ipv4",
94+
"src_hostname",
95+
"src_ipv6",
96+
"dst_ipv4",
97+
"dst_hostname",
98+
"dst_ipv6",
99+
"port_and_proto",
100+
]
65101
].reset_index(drop=True)
66102
dst_df["mac"] = dst_df["dst_mac"]
67103

@@ -70,33 +106,60 @@ def get_inventory_report_df(zeek_df: pd.DataFrame):
70106
.reset_index(drop=True)
71107
.drop(columns=["src_mac", "dst_mac"])
72108
.drop_duplicates(
73-
subset=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
109+
subset=[
110+
"src_ipv4",
111+
"src_hostname",
112+
"src_ipv6",
113+
"dst_ipv4",
114+
"dst_hostname",
115+
"dst_ipv6",
116+
"port_and_proto",
117+
]
74118
)
75119
)
76-
df["vendor"] = df["mac"].apply(lambda mac: get_mac_vendor(mac))
77120

78121
grouped_df = (
79122
df.groupby("mac", as_index=False)
80123
.agg(
81124
{
82125
"src_ipv4": list,
126+
"src_hostname": list,
83127
"src_ipv6": list,
84128
"dst_ipv4": list,
129+
"dst_hostname": list,
85130
"dst_ipv6": list,
86131
"port_and_proto": list,
87132
}
88133
)
89134
.reset_index()
90135
)
91-
grouped_df["vendor"] = grouped_df["mac"].apply(lambda mac: get_mac_vendor(mac))
136+
137+
mac_vendors = {}
138+
with open(MAC_VENDORS_JSON_FILE) as f:
139+
mac_vendors = json.load(f)
140+
grouped_df["vendor"] = grouped_df["mac"].apply(
141+
lambda mac: get_mac_vendor(mac_vendors, mac)
142+
)
92143
grouped_df["ipv4"] = (grouped_df["src_ipv4"] + grouped_df["dst_ipv4"]).apply(
93144
lambda ip: list(set(ip))
94145
)
95146
grouped_df["ipv6"] = (grouped_df["src_ipv6"] + grouped_df["dst_ipv6"]).apply(
96147
lambda ip: list(set(ip))
97148
)
149+
grouped_df["hostname"] = (
150+
grouped_df["src_hostname"] + grouped_df["dst_hostname"]
151+
).apply(lambda hostname: list(set(hostname)))
152+
98153
grouped_df.drop(
99-
columns=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6"], inplace=True
154+
columns=[
155+
"src_ipv4",
156+
"src_hostname",
157+
"src_ipv6",
158+
"dst_ipv4",
159+
"dst_hostname",
160+
"dst_ipv6",
161+
],
162+
inplace=True,
100163
)
101164

102165
return grouped_df

src/navv/commands.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""CLI Commands."""
2-
import json
32
import os
43
import webbrowser
54

@@ -22,12 +21,11 @@
2221
write_conn_states_sheet,
2322
write_externals_sheet,
2423
write_inventory_report_sheet,
25-
write_macs_sheet,
2624
write_stats_sheet,
2725
write_unknown_internals_sheet,
2826
)
29-
from navv.zeek import run_zeek, perform_zeekcut
30-
from navv.utilities import pushd, trim_dns_data
27+
from navv.zeek import get_dns_data, run_zeek, perform_zeekcut
28+
from navv.utilities import pushd
3129

3230

3331
@click.command("generate")
@@ -73,26 +71,21 @@ def generate(customer_name, output_dir, pcap, zeek_logs):
7371

7472
# Get zeek data
7573
zeek_data = get_zeek_data(zeek_logs)
76-
zeek_df = get_zeek_df(zeek_data)
74+
75+
# Get dns data for resolution
76+
json_path = os.path.join(output_dir, f"{customer_name}_dns_data.json")
77+
78+
# Get dns data from zeek logs
79+
dns_filtered = get_dns_data(customer_name, output_dir, zeek_logs)
80+
81+
# Get zeek dataframe
82+
zeek_df = get_zeek_df(zeek_data, dns_filtered)
7783

7884
# Get inventory report dataframe
7985
inventory_df = get_inventory_report_df(zeek_df)
8086

8187
# Turn zeekcut data into rows for spreadsheet
82-
rows, mac_dict = create_analysis_array(zeek_data, timer=timer_data)
83-
84-
# Get dns data for resolution
85-
json_path = os.path.join(output_dir, f"{customer_name}_dns_data.json")
86-
87-
if os.path.exists(json_path):
88-
with open(json_path, "rb") as json_file:
89-
dns_filtered = json.load(json_file)
90-
else:
91-
dns_data = perform_zeekcut(
92-
fields=["query", "answers", "qtype", "rcode_name"],
93-
log_file=os.path.join(zeek_logs, "dns.log"),
94-
)
95-
dns_filtered = trim_dns_data(dns_data)
88+
rows = create_analysis_array(zeek_data, timer=timer_data)
9689

9790
ext_IPs = set()
9891
unk_int_IPs = set()
@@ -112,8 +105,6 @@ def generate(customer_name, output_dir, pcap, zeek_logs):
112105

113106
write_inventory_report_sheet(inventory_df, wb)
114107

115-
write_macs_sheet(mac_dict, wb)
116-
117108
write_externals_sheet(ext_IPs, wb)
118109

119110
write_unknown_internals_sheet(unk_int_IPs, wb)

src/navv/network_analysis.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# package imports
1212
from navv.commands import generate, launch
1313
from navv.message_handler import info_msg
14-
from navv import utilities
1514
from navv._version import __version__
1615

1716

@@ -31,7 +30,6 @@ def cli(ctx):
3130
pass
3231

3332

34-
@utilities.timeit
3533
def main():
3634
"""Main function for performing zeek-cut commands and sorting the output"""
3735

0 commit comments

Comments
 (0)