Skip to content

Commit d4e768c

Browse files
Test apply multiprocessing code
1 parent 8e38964 commit d4e768c

File tree

1 file changed

+35
-19
lines changed

1 file changed

+35
-19
lines changed

scripts/process.py

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import ipaddress
44
import pycountry
55
import time
6+
from multiprocessing import Pool
67

78
# Some global stats
89
totalIPs = 0
@@ -52,6 +53,30 @@ def deduplicate(ip_data_list):
5253

5354
return result
5455

56+
def should_keep(args_tuple):
57+
kept_entry, current_entry = args_tuple
58+
keep_network = True
59+
was_in_subnet = False
60+
existing_range = ipaddress.ip_network(kept_entry['ip_range'], strict=False)
61+
current_network = ipaddress.ip_network(current_entry['ip_range'], strict=False)
62+
if current_network.subnet_of(existing_range):
63+
test_data_1 = current_entry.copy()
64+
test_data_2 = kept_entry.copy()
65+
del(test_data_1['ip_range'])
66+
del(test_data_2['ip_range'])
67+
# If a subnet has the same info as the supernet, remove it entirely.
68+
if test_data_1 == test_data_2:
69+
keep_network = False
70+
else:
71+
# A subnet can have separate info from its larger network and as such should be handled as correct
72+
keep_network = True
73+
was_in_subnet = True
74+
elif current_network.overlaps(existing_range):
75+
# Print a warning and discard the overlapping network
76+
keep_network = False
77+
print(f'{current_network} was discarded for overlapping with {existing_range}')
78+
return [keep_network, was_in_subnet]
79+
5580
def process(json_file):
5681
global totalIPs, duplicatedCIDRs, overlappedCIDRs, ignoredPrivateCIDRs
5782

@@ -92,27 +117,18 @@ def process(json_file):
92117
if len(entry['country_code']) == 3:
93118
entry['country_code'] = convert_to_2_letter_code(entry['country_code'])
94119

95-
# Handle overlaps / subnets
96120
keep_network = True
97121
was_in_subnet = False
98-
99-
for kept_entry in result:
100-
existing_range = ipaddress.ip_network(kept_entry['ip_range'], strict=False)
101-
if ip_network.subnet_of(existing_range):
102-
test_data_2 = kept_entry.copy()
103-
del(test_data_2['ip_range'])
104-
# If a subnet has the same info as the supernet, remove it entirely.
105-
if entry_copy == test_data_2:
106-
keep_network = False
107-
else:
108-
# A subnet can have separate info from its larger network and as such should be handled as correct
109-
keep_network = True
110-
was_in_subnet = True
111-
elif ip_network.overlaps(existing_range):
112-
# Print a warning and discard the overlapping network
113-
keep_network = False
114-
overlappedCIDRs += 1
115-
print(f'{ip_network} was discarded for overlapping with {existing_range}')
122+
with Pool() as pool:
123+
args = zip(result, [entry] * len(result))
124+
results = pool.imap_unordered(should_keep, args, chunksize=50)
125+
for keep, was_subnet in results:
126+
if not keep:
127+
keep_network= False
128+
pool.terminate()
129+
break
130+
if was_subnet:
131+
was_in_subnet= True
116132

117133
if keep_network:
118134
unique_ranges.add(ip_network)

0 commit comments

Comments
 (0)