|
| 1 | +import math |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +from sklearn.decomposition import PCA |
| 5 | + |
| 6 | + |
| 7 | +def solution(array: np.ndarray, n=1): |
| 8 | + """Solve the closest pairs problem. |
| 9 | + Note: Code borrowed from Andriy Lazorenko's Medium post. |
| 10 | + """ |
| 11 | + if array.shape[1] > 2: |
| 12 | + array = reduce_dims(array) |
| 13 | + |
| 14 | + pairs = [] |
| 15 | + mis = [] |
| 16 | + |
| 17 | + x, y = np.split(array, 2, axis=1) |
| 18 | + for i in range(n): |
| 19 | + a = list(zip(x, y)) # This produces list of tuples |
| 20 | + ax = sorted(a, key=lambda x: x[0]) # Presorting x-wise |
| 21 | + ay = sorted(a, key=lambda x: x[1]) # Presorting y-wise |
| 22 | + p1, p2, mi = closest_pair(ax, ay) # Recursive D&C function |
| 23 | + |
| 24 | + p1_arg = np.where(array[:, 0] == p1[0])[0] |
| 25 | + p2_arg = np.where(array[:, 0] == p2[0])[0] |
| 26 | + |
| 27 | + pairs.append((p1_arg, p2_arg)) |
| 28 | + mis.append(mi) |
| 29 | + |
| 30 | + # Remove point from array |
| 31 | + x = np.delete(x, p1_arg) |
| 32 | + y = np.delete(y, p1_arg) |
| 33 | + |
| 34 | + return np.array(pairs), np.array(mis) |
| 35 | + |
| 36 | + |
| 37 | +def reduce_dims(array: np.ndarray): |
| 38 | + pca = PCA(n_components=2) |
| 39 | + array_2d = pca.fit(array).transform(array) |
| 40 | + |
| 41 | + return array_2d |
| 42 | + |
| 43 | + |
| 44 | +def closest_pair(ax: np.ndarray, ay: np.ndarray): |
| 45 | + """Find the closest pair. |
| 46 | + Note: Code borrowed from Andriy Lazorenko. |
| 47 | + """ |
| 48 | + ln_ax = len(ax) # It's quicker to assign variable |
| 49 | + if ln_ax <= 3: |
| 50 | + return brute(ax) # A call to bruteforce comparison |
| 51 | + mid = ln_ax // 2 # Division without remainder, need int |
| 52 | + Qx = ax[:mid] # Two-part split |
| 53 | + Rx = ax[mid:] |
| 54 | + # Determine midpoint on x-axis |
| 55 | + midpoint = ax[mid][0] |
| 56 | + Qy = list() |
| 57 | + Ry = list() |
| 58 | + for x in ay: # split ay into 2 arrays using midpoint |
| 59 | + if x[0] <= midpoint: |
| 60 | + Qy.append(x) |
| 61 | + else: |
| 62 | + Ry.append(x) |
| 63 | + # Call recursively both arrays after split |
| 64 | + (p1, q1, mi1) = closest_pair(Qx, Qy) |
| 65 | + (p2, q2, mi2) = closest_pair(Rx, Ry) |
| 66 | + # Determine smaller distance between points of 2 arrays |
| 67 | + if mi1 <= mi2: |
| 68 | + d = mi1 |
| 69 | + mn = (p1, q1) |
| 70 | + else: |
| 71 | + d = mi2 |
| 72 | + mn = (p2, q2) |
| 73 | + # Call function to account for points on the boundary |
| 74 | + (p3, q3, mi3) = closest_split_pair(ax, ay, d, mn) |
| 75 | + # Determine smallest distance for the array |
| 76 | + if d <= mi3: |
| 77 | + return mn[0], mn[1], d |
| 78 | + else: |
| 79 | + return p3, q3, mi3 |
| 80 | + |
| 81 | + |
| 82 | +def brute(ax: np.ndarray): |
| 83 | + mi = dist(ax[0], ax[1]) |
| 84 | + p1 = ax[0] |
| 85 | + p2 = ax[1] |
| 86 | + ln_ax = len(ax) |
| 87 | + if ln_ax == 2: |
| 88 | + return p1, p2, mi |
| 89 | + for i in range(ln_ax - 1): |
| 90 | + for j in range(i + 1, ln_ax): |
| 91 | + if i != 0 and j != 1: |
| 92 | + d = dist(ax[i], ax[j]) |
| 93 | + if d < mi: # Update min_dist and points |
| 94 | + mi = d |
| 95 | + p1, p2 = ax[i], ax[j] |
| 96 | + return p1, p2, mi |
| 97 | + |
| 98 | + |
| 99 | +def dist(p1: np.ndarray, p2: np.ndarray): |
| 100 | + return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) |
| 101 | + |
| 102 | + |
| 103 | +def closest_split_pair(p_x: list, p_y: np.ndarray, delta: float, best_pair: tuple): |
| 104 | + """Find the closest_split_pair. |
| 105 | + Note: Code modified from Andriy Lazorenko. |
| 106 | + """ |
| 107 | + ln_x = len(p_x) # store length - quicker |
| 108 | + mx_x = p_x[ln_x // 2][0] # select midpoint on x-sorted array |
| 109 | + # Create a subarray of points not further than delta from |
| 110 | + # midpoint on x-sorted array |
| 111 | + s_y = [x for x in p_y if mx_x - delta <= x[0] <= mx_x + delta] |
| 112 | + best = delta # assign best value to delta |
| 113 | + ln_y = len(s_y) # store length of subarray for quickness |
| 114 | + for i in range(ln_y - 1): |
| 115 | + for j in range(i + 1, min(i + 7, ln_y)): |
| 116 | + p, q = s_y[i], s_y[j] |
| 117 | + dst = dist(p, q) |
| 118 | + if dst < best: |
| 119 | + best_pair = p, q |
| 120 | + best = dst |
| 121 | + return best_pair[0], best_pair[1], best |
0 commit comments