|
3 | 3 | import ipaddress |
4 | 4 | import pycountry |
5 | 5 | import time |
| 6 | +from multiprocessing import Pool |
6 | 7 |
|
7 | 8 | # Some global stats |
8 | 9 | totalIPs = 0 |
@@ -52,6 +53,30 @@ def deduplicate(ip_data_list): |
52 | 53 |
|
53 | 54 | return result |
54 | 55 |
|
| 56 | +def should_keep(args_tuple): |
| 57 | + kept_entry, current_entry = args_tuple |
| 58 | + keep_network = True |
| 59 | + was_in_subnet = False |
| 60 | + existing_range = ipaddress.ip_network(kept_entry['ip_range'], strict=False) |
| 61 | + current_network = ipaddress.ip_network(current_entry['ip_range'], strict=False) |
| 62 | + if current_network.subnet_of(existing_range): |
| 63 | + test_data_1 = current_entry.copy() |
| 64 | + test_data_2 = kept_entry.copy() |
| 65 | + del(test_data_1['ip_range']) |
| 66 | + del(test_data_2['ip_range']) |
| 67 | + # If a subnet has the same info as the supernet, remove it entirely. |
| 68 | + if test_data_1 == test_data_2: |
| 69 | + keep_network = False |
| 70 | + else: |
| 71 | + # A subnet can have separate info from its larger network and as such should be handled as correct |
| 72 | + keep_network = True |
| 73 | + was_in_subnet = True |
| 74 | + elif current_network.overlaps(existing_range): |
| 75 | + # Print a warning and discard the overlapping network |
| 76 | + keep_network = False |
| 77 | + print(f'{current_network} was discarded for overlapping with {existing_range}') |
| 78 | + return [keep_network, was_in_subnet] |
| 79 | + |
55 | 80 | def process(json_file): |
56 | 81 | global totalIPs, duplicatedCIDRs, overlappedCIDRs, ignoredPrivateCIDRs |
57 | 82 |
|
@@ -92,27 +117,18 @@ def process(json_file): |
92 | 117 | if len(entry['country_code']) == 3: |
93 | 118 | entry['country_code'] = convert_to_2_letter_code(entry['country_code']) |
94 | 119 |
|
95 | | - # Handle overlaps / subnets |
96 | 120 | keep_network = True |
97 | 121 | was_in_subnet = False |
98 | | - |
99 | | - for kept_entry in result: |
100 | | - existing_range = ipaddress.ip_network(kept_entry['ip_range'], strict=False) |
101 | | - if ip_network.subnet_of(existing_range): |
102 | | - test_data_2 = kept_entry.copy() |
103 | | - del(test_data_2['ip_range']) |
104 | | - # If a subnet has the same info as the supernet, remove it entirely. |
105 | | - if entry_copy == test_data_2: |
106 | | - keep_network = False |
107 | | - else: |
108 | | - # A subnet can have separate info from its larger network and as such should be handled as correct |
109 | | - keep_network = True |
110 | | - was_in_subnet = True |
111 | | - elif ip_network.overlaps(existing_range): |
112 | | - # Print a warning and discard the overlapping network |
113 | | - keep_network = False |
114 | | - overlappedCIDRs += 1 |
115 | | - print(f'{ip_network} was discarded for overlapping with {existing_range}') |
| 122 | + with Pool() as pool: |
| 123 | + args = zip(result, [entry] * len(result)) |
| 124 | + results = pool.imap_unordered(should_keep, args, chunksize=50) |
| 125 | + for keep, was_subnet in results: |
| 126 | + if not keep: |
| 127 | + keep_network= False |
| 128 | + pool.terminate() |
| 129 | + break |
| 130 | + if was_subnet: |
| 131 | + was_in_subnet= True |
116 | 132 |
|
117 | 133 | if keep_network: |
118 | 134 | unique_ranges.add(ip_network) |
|
0 commit comments