|
| 1 | +import { UMAP, PaCMAP, LocalMAP, type ParametersUMAP, type ParametersPaCMAP, type ParametersLocalMAP, type ParametersAnnoy, type ParametersHNSW } from "@saehrimnir/druidjs"; |
| 2 | + |
| 3 | +export type ReducerAlgorithm = "umap" | "pacmap" | "localmap"; |
| 4 | + |
| 5 | +export const REDUCER_LABELS: Record<ReducerAlgorithm, string> = { |
| 6 | + umap: "UMAP", |
| 7 | + pacmap: "PaCMAP", |
| 8 | + localmap: "LocalMAP", |
| 9 | +}; |
| 10 | + |
| 11 | +export type ParamDef = { |
| 12 | + label: string; |
| 13 | + min: number; |
| 14 | + max: number; |
| 15 | + step: number; |
| 16 | + default: number; |
| 17 | +}; |
| 18 | + |
| 19 | +export const REDUCER_PARAM_DEFS: Record<ReducerAlgorithm, Record<string, ParamDef>> = { |
| 20 | + umap: { |
| 21 | + n_neighbors: { label: "Neighbors", min: 2, max: 200, step: 1, default: 15 }, |
| 22 | + min_dist: { label: "Min dist", min: 0, max: 1, step: 0.01, default: 0.1 }, |
| 23 | + _spread: { label: "Spread", min: 0.1, max: 10, step: 0.1, default: 1.0 }, |
| 24 | + }, |
| 25 | + pacmap: { |
| 26 | + n_neighbors: { label: "Neighbors", min: 2, max: 200, step: 1, default: 10 }, |
| 27 | + MN_ratio: { label: "MN ratio", min: 0.1, max: 5, step: 0.1, default: 0.5 }, |
| 28 | + FP_ratio: { label: "FP ratio", min: 0.5, max: 10, step: 0.5, default: 2.0 }, |
| 29 | + }, |
| 30 | + localmap: { |
| 31 | + n_neighbors: { label: "Neighbors", min: 2, max: 200, step: 1, default: 10 }, |
| 32 | + MN_ratio: { label: "MN ratio", min: 0.1, max: 5, step: 0.1, default: 0.5 }, |
| 33 | + FP_ratio: { label: "FP ratio", min: 0.5, max: 10, step: 0.5, default: 2.0 }, |
| 34 | + low_dist_thres: { label: "Low dist thresh", min: 1, max: 50, step: 1, default: 10 }, |
| 35 | + }, |
| 36 | +}; |
| 37 | + |
| 38 | +export const REDUCER_ADVANCED_PARAM_DEFS: Record<ReducerAlgorithm, Record<string, ParamDef>> = { |
| 39 | + umap: { |
| 40 | + _n_epochs: { label: "Epochs", min: 50, max: 2000, step: 10, default: 350 }, |
| 41 | + seed: { label: "Seed", min: 0, max: 99999, step: 1, default: 1212 }, |
| 42 | + local_connectivity: { label: "Local connectivity", min: 1, max: 20, step: 1, default: 1 }, |
| 43 | + _initial_alpha: { label: "Initial LR", min: 0.01, max: 5, step: 0.01, default: 1 }, |
| 44 | + _repulsion_strength: { label: "Repulsion strength", min: 0, max: 5, step: 0.1, default: 1 }, |
| 45 | + _negative_sample_rate: { label: "Neg. sample rate", min: 1, max: 20, step: 1, default: 5 }, |
| 46 | + _set_op_mix_ratio: { label: "Set-op mix ratio", min: 0, max: 1, step: 0.01, default: 1 }, |
| 47 | + }, |
| 48 | + pacmap: { |
| 49 | + seed: { label: "Seed", min: 0, max: 99999, step: 1, default: 1212 }, |
| 50 | + lr: { label: "Learning rate", min: 0.001, max: 10, step: 0.001, default: 1.0 }, |
| 51 | + }, |
| 52 | + localmap: { |
| 53 | + seed: { label: "Seed", min: 0, max: 99999, step: 1, default: 1212 }, |
| 54 | + lr: { label: "Learning rate", min: 0.001, max: 10, step: 0.001, default: 1.0 }, |
| 55 | + }, |
| 56 | +}; |
| 57 | + |
| 58 | +export const KNN_BACKEND_ALGORITHMS: ReducerAlgorithm[] = ["pacmap", "localmap"]; |
| 59 | +export type KnnBackend = "annoy" | "hnsw"; |
| 60 | +export const KNN_BACKENDS: { value: KnnBackend; label: string }[] = [ |
| 61 | + { value: "annoy", label: "Annoy" }, |
| 62 | + { value: "hnsw", label: "HNSW (broken?)" }, |
| 63 | +]; |
| 64 | + |
| 65 | +export function defaultParamsFor(algorithm: ReducerAlgorithm): Record<string, number> { |
| 66 | + return Object.fromEntries( |
| 67 | + Object.entries(REDUCER_PARAM_DEFS[algorithm]).map(([key, def]) => [key, def.default]) |
| 68 | + ); |
| 69 | +} |
| 70 | + |
| 71 | +export function defaultAdvancedParamsFor(algorithm: ReducerAlgorithm): Record<string, number> { |
| 72 | + return Object.fromEntries( |
| 73 | + Object.entries(REDUCER_ADVANCED_PARAM_DEFS[algorithm]).map(([key, def]) => [key, def.default]) |
| 74 | + ); |
| 75 | +} |
| 76 | + |
| 77 | +export const KNN_PARAM_DEFS: Record<KnnBackend, Record<string, ParamDef>> = { |
| 78 | + annoy: { |
| 79 | + numTrees: { label: "Num trees", min: 1, max: 200, step: 1, default: 10 }, |
| 80 | + maxPointsPerLeaf: { label: "Max pts/leaf", min: 1, max: 200, step: 1, default: 10 }, |
| 81 | + seed: { label: "Seed", min: 0, max: 99999, step: 1, default: 1212 }, |
| 82 | + }, |
| 83 | + // Defaults match the voyager (Spotify) HNSW library used by pacmap-python: |
| 84 | + // https://github.com/spotify/voyager/blob/main/cpp/src/TypedIndex.h#L127 |
| 85 | + // https://spotify.github.io/voyager/python/reference.html#voyager.Index |
| 86 | + hnsw: { |
| 87 | + ef: { label: "ef (search)", min: 10, max: 1000, step: 10, default: 10 }, |
| 88 | + ef_construction: { label: "ef_construct", min: 10, max: 2000, step: 10, default: 200 }, |
| 89 | + m: { label: "m", min: 2, max: 100, step: 1, default: 12 }, |
| 90 | + seed: { label: "Seed", min: 0, max: 99999, step: 1, default: 1212 }, |
| 91 | + }, |
| 92 | +}; |
| 93 | + |
| 94 | +export function defaultKnnParamsFor(backend: KnnBackend): Record<string, number> { |
| 95 | + return Object.fromEntries( |
| 96 | + Object.entries(KNN_PARAM_DEFS[backend]).map(([key, def]) => [key, def.default]) |
| 97 | + ); |
| 98 | +} |
| 99 | + |
| 100 | +/** @deprecated Use KNN_PARAM_DEFS["hnsw"] */ |
| 101 | +export const HNSW_PARAM_DEFS = KNN_PARAM_DEFS["hnsw"]; |
| 102 | +/** @deprecated Use defaultKnnParamsFor("hnsw") */ |
| 103 | +export function defaultHnswParams(): Record<string, number> { return defaultKnnParamsFor("hnsw"); } |
| 104 | + |
| 105 | +export type { ParametersAnnoy, ParametersHNSW }; |
| 106 | + |
| 107 | +export type ReducerRequest = { |
| 108 | + type: "reduce"; |
| 109 | + matrix: number[][]; |
| 110 | + algorithm: ReducerAlgorithm; |
| 111 | + params: Record<string, number>; |
| 112 | + knnBackend?: KnnBackend; |
| 113 | + knnParams?: Record<string, number>; |
| 114 | +}; |
| 115 | + |
| 116 | +export type ReducerResponse = |
| 117 | + | { type: "done"; coords: [number, number][] } |
| 118 | + | { type: "progress"; iteration: number; total: number } |
| 119 | + | { type: "error"; message: string }; |
| 120 | + |
| 121 | +export const REDUCER_DEFAULT_ITERATIONS: Record<ReducerAlgorithm, number> = { |
| 122 | + umap: 350, |
| 123 | + pacmap: 450, |
| 124 | + localmap: 450, |
| 125 | +}; |
| 126 | + |
| 127 | +export const PROGRESS_INTERVAL = 10; |
| 128 | + |
| 129 | +/** Zeroes out columns in-place at indices where mask[j] is true. */ |
| 130 | +export function zeroMaskedColumns(matrix: number[][], mask: boolean[]): void { |
| 131 | + const nObs = matrix.length; |
| 132 | + for (let j = 0; j < mask.length; j++) { |
| 133 | + if (!mask[j]) continue; |
| 134 | + for (let i = 0; i < nObs; i++) matrix[i][j] = 0; |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +/** Replaces NaN cells in-place with the column mean of observed values. Falls back to 0 for all-NaN columns. */ |
| 139 | +export function imputeColumnMeans(matrix: number[][]): void { |
| 140 | + const nObs = matrix.length; |
| 141 | + const nVars = matrix[0]?.length ?? 0; |
| 142 | + for (let j = 0; j < nVars; j++) { |
| 143 | + let sum = 0, count = 0; |
| 144 | + for (let i = 0; i < nObs; i++) { |
| 145 | + if (!isNaN(matrix[i][j])) { sum += matrix[i][j]; count++; } |
| 146 | + } |
| 147 | + const colMean = count > 0 ? sum / count : 0; |
| 148 | + for (let i = 0; i < nObs; i++) { |
| 149 | + if (isNaN(matrix[i][j])) matrix[i][j] = colMean; |
| 150 | + } |
| 151 | + } |
| 152 | +} |
| 153 | + |
| 154 | +/** Pure generator — yields progress ticks then a final done event. Usable in a web worker or directly in Node.js. */ |
| 155 | +export function* runReducer(req: ReducerRequest): Generator<ReducerResponse> { |
| 156 | + const { matrix, algorithm, params, knnBackend, knnParams } = req; |
| 157 | + const n = matrix.length; |
| 158 | + if (n < 3) { |
| 159 | + throw new Error(`Need at least 3 rows to run dimensional reduction (got ${n}).`); |
| 160 | + } |
| 161 | + const nNeighbors = Math.max(2, Math.min(Math.round(params.n_neighbors), n - 1)); |
| 162 | + const total = params._n_epochs ?? REDUCER_DEFAULT_ITERATIONS[algorithm]; |
| 163 | + |
| 164 | + let gen: Generator<unknown, unknown, unknown>; |
| 165 | + if (algorithm === "umap") { |
| 166 | + const dr = new UMAP(matrix, { |
| 167 | + d: 2, |
| 168 | + ...(params as Partial<ParametersUMAP>), |
| 169 | + n_neighbors: nNeighbors, |
| 170 | + _n_epochs: total, |
| 171 | + }); |
| 172 | + gen = dr.generator(total); |
| 173 | + } else if (algorithm === "localmap") { |
| 174 | + const dr = new LocalMAP(matrix, { |
| 175 | + d: 2, |
| 176 | + ...(params as Partial<ParametersLocalMAP>), |
| 177 | + n_neighbors: nNeighbors, |
| 178 | + // knn_backend/knn_params are not in DruidJS types but accepted at runtime |
| 179 | + knn_backend: knnBackend ?? "annoy", |
| 180 | + knn_params: (knnParams ?? defaultKnnParamsFor(knnBackend ?? "annoy")) as Partial<ParametersAnnoy> | Partial<ParametersHNSW>, |
| 181 | + } as Partial<ParametersLocalMAP>); |
| 182 | + gen = dr.generator(); |
| 183 | + } else { |
| 184 | + const dr = new PaCMAP(matrix, { |
| 185 | + d: 2, |
| 186 | + ...(params as Partial<ParametersPaCMAP>), |
| 187 | + n_neighbors: nNeighbors, |
| 188 | + // knn_backend/knn_params are not in DruidJS types but accepted at runtime |
| 189 | + knn_backend: knnBackend ?? "annoy", |
| 190 | + knn_params: (knnParams ?? defaultKnnParamsFor(knnBackend ?? "annoy")) as Partial<ParametersAnnoy> | Partial<ParametersHNSW>, |
| 191 | + } as Partial<ParametersPaCMAP>); |
| 192 | + gen = dr.generator(); |
| 193 | + } |
| 194 | + |
| 195 | + let iteration = 0; |
| 196 | + let lastProjection: number[][] = []; |
| 197 | + for (const projection of gen) { |
| 198 | + iteration++; |
| 199 | + lastProjection = projection as number[][]; |
| 200 | + if (iteration % PROGRESS_INTERVAL === 0) { |
| 201 | + yield { type: "progress", iteration, total }; |
| 202 | + } |
| 203 | + } |
| 204 | + |
| 205 | + const coords = lastProjection.map((row) => [row[0], row[1]] as [number, number]); |
| 206 | + yield { type: "done", coords }; |
| 207 | +} |
0 commit comments