Skip to content

Commit 3c7a7dd

Browse files
authored
Add LMCLUS model (#950)
1 parent 9c44911 commit 3c7a7dd

File tree

6 files changed

+326
-1
lines changed

6 files changed

+326
-1
lines changed

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {
121121

122122
| task | model |
123123
| ---- | ----- |
124-
| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, NMF, Autoencoder |
124+
| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, LMCLUS, NMF, Autoencoder |
125125
| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, ELM, LMNN |
126126
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
127127
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MARS, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |

Diff for: js/model_selector.js

+1
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ const AIMethods = [
153153
{ value: 'clues', title: 'CLUES' },
154154
{ value: 'chameleon', title: 'CHAMELEON' },
155155
{ value: 'coll', title: 'COLL' },
156+
{ value: 'lmclus', title: 'LMCLUS' },
156157
{ value: 'plsa', title: 'PLSA' },
157158
{ value: 'latent_dirichlet_allocation', title: 'Latent Dirichlet Allocation' },
158159
{ value: 'nmf', title: 'NMF' },

Diff for: js/view/lmclus.js

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import LMCLUS from '../../lib/model/lmclus.js'
2+
import Controller from '../controller.js'
3+
4+
export default function (platform) {
5+
platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.'
6+
platform.setting.ml.reference = {
7+
author: 'R. Haralick, R. Harpaz',
8+
title: 'Linear manifold clustering in high dimensional spaces by stochastic search',
9+
year: 2007,
10+
}
11+
const controller = new Controller(platform)
12+
13+
const fitModel = () => {
14+
const model = new LMCLUS(k.value, s.value, gamma.value)
15+
16+
model.fit(platform.trainInput)
17+
const pred = model.predict().map(v => v + 1)
18+
platform.trainResult = pred
19+
clusters.value = model.size
20+
}
21+
22+
const k = controller.input.number({ label: ' k ', min: 1, max: 1000, value: 2 })
23+
const s = controller.input.number({ label: ' s ', min: 1, max: 1000, value: 1.5, step: 0.1 })
24+
const gamma = controller.input.number({ label: ' gamma ', min: 0, max: 1, value: 0.4, step: 0.01 })
25+
controller.input.button('Fit').on('click', fitModel)
26+
const clusters = controller.text({ label: ' Clusters: ' })
27+
}

Diff for: lib/model/lmclus.js

+228
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
import Matrix from '../util/matrix.js'
2+
3+
/**
4+
* Linear manifold clustering
5+
*/
6+
export default class LMCLUS {
7+
// Linear manifold clustering in high dimensional spaces by stochastic search
8+
// https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=76925699b88b9e3c599269f214b0b50fb02bb1f6
9+
/**
10+
* @param {number} k Max LM dim
11+
* @param {number} s Sampling level
12+
* @param {number} gamma Sensitivity threshold
13+
*/
14+
constructor(k, s, gamma) {
15+
this._k = k
16+
this._s = s
17+
this._gamma = gamma
18+
}
19+
20+
/**
21+
* Number of clusters.
22+
* @type {number}
23+
*/
24+
get size() {
25+
return this._c.length
26+
}
27+
28+
/**
29+
* Fit model
30+
* @param {Array<Array<number>>} datas Training data
31+
*/
32+
fit(datas) {
33+
this._c = []
34+
this._dims = []
35+
const d = datas.concat()
36+
const labeled = Array(d.length).fill(false)
37+
while (labeled.some(v => !v)) {
38+
let ddIndexes = []
39+
for (let i = 0; i < labeled.length; i++) {
40+
if (!labeled[i]) {
41+
ddIndexes.push(i)
42+
}
43+
}
44+
let dd = ddIndexes.map(i => d[i])
45+
let lmDim = 1
46+
for (let k = 1; k <= this._k; k++) {
47+
const [g, tau, phi, beta] = this._findSeparation(dd, k, this._s)
48+
if (g <= this._gamma) {
49+
break
50+
}
51+
const newdd = []
52+
const newddidx = []
53+
for (let j = 0; j < dd.length; j++) {
54+
const xd = new Matrix(
55+
dd[j].length,
56+
1,
57+
dd[j].map((v, i) => v - phi[i])
58+
)
59+
const bxd = beta.dot(xd)
60+
const dist = xd.tDot(xd).toScaler() - bxd.tDot(bxd).toScaler()
61+
if (dist < tau) {
62+
newdd.push(dd[j])
63+
newddidx.push(ddIndexes[j])
64+
}
65+
}
66+
dd = newdd
67+
lmDim = k
68+
ddIndexes = newddidx
69+
}
70+
if (dd.length === 0) {
71+
continue
72+
}
73+
74+
this._c.push(ddIndexes)
75+
this._dims.push(lmDim)
76+
for (let i = 0; i < ddIndexes.length; i++) {
77+
labeled[ddIndexes[i]] = true
78+
}
79+
}
80+
}
81+
82+
_sampleidx(n, k) {
83+
const idx = []
84+
for (let i = 0; i < k; i++) {
85+
idx.push(Math.floor(Math.random() * (n - i)))
86+
}
87+
for (let i = idx.length - 1; i >= 0; i--) {
88+
for (let j = idx.length - 1; j > i; j--) {
89+
if (idx[i] <= idx[j]) {
90+
idx[j]++
91+
}
92+
}
93+
}
94+
return idx
95+
}
96+
97+
_findSeparation(d, k, s) {
98+
let gamma = -Infinity
99+
let tau = -Infinity
100+
let phi = null
101+
let beta = null
102+
const eps = 1.0e-8
103+
const c = 1
104+
const N = Math.min(Math.log(eps) / Math.log(1 - (1 / s) ** k), c * d.length)
105+
106+
for (let i = 0; i < N; i++) {
107+
const idxes = this._sampleidx(d.length, k + 1)
108+
const m = idxes.map(idx => d[idx])
109+
const [b] = Matrix.fromArray(m).qrGramSchmidt()
110+
const distances = []
111+
for (let j = 0; j < d.length; j++) {
112+
if (idxes.includes(j)) {
113+
continue
114+
}
115+
const xd = new Matrix(d[j].length, 1, d[j])
116+
const bxd = b.dot(xd)
117+
const dist = xd.tDot(xd).toScaler() - bxd.tDot(bxd).toScaler()
118+
distances.push(dist)
119+
}
120+
121+
const [hist, ranges] = this._makeHistogram(distances)
122+
const [t, g] = this._findMinimumErrorThreshold(hist, ranges)
123+
if (g > gamma) {
124+
gamma = g
125+
tau = t
126+
phi = m[0]
127+
beta = b
128+
}
129+
}
130+
return [gamma, tau, phi, beta]
131+
}
132+
133+
_makeHistogram(d) {
134+
let max = -Infinity
135+
let min = Infinity
136+
let sum = 0
137+
for (let i = 0; i < d.length; i++) {
138+
max = Math.max(max, d[i])
139+
min = Math.min(min, d[i])
140+
sum += d[i]
141+
}
142+
const mean = sum / d.length
143+
let vari = 0
144+
for (let i = 0; i < d.length; i++) {
145+
vari += (mean - d[i]) ** 2
146+
}
147+
vari /= d.length
148+
const std = Math.sqrt(vari)
149+
const step = std * Math.cbrt((24 * Math.sqrt(Math.PI)) / d.length)
150+
const ranges = [min]
151+
while (ranges[ranges.length - 1] < max) {
152+
ranges[ranges.length] = ranges[ranges.length - 1] + step
153+
}
154+
const count = ranges.length - 1
155+
156+
const hist = Array(count).fill(0)
157+
for (let i = 0; i < d.length; i++) {
158+
if (d[i] === max) {
159+
hist[count - 1]++
160+
} else {
161+
hist[Math.floor((d[i] - min) / step)]++
162+
}
163+
}
164+
return [hist, ranges]
165+
}
166+
167+
_findMinimumErrorThreshold(h, r) {
168+
let tau = -1
169+
let minj = Infinity
170+
let maxj = -Infinity
171+
let discriminability = 0
172+
for (let t = 0; t < h.length - 1; t++) {
173+
let p1 = 0
174+
let p2 = 0
175+
let m1 = 0
176+
let m2 = 0
177+
for (let i = 0; i < h.length; i++) {
178+
if (i <= t) {
179+
p1 += h[i]
180+
m1 += i * h[i]
181+
} else {
182+
p2 += h[i]
183+
m2 += i * h[i]
184+
}
185+
}
186+
m1 /= p1
187+
m2 /= p2
188+
let s1 = 0
189+
let s2 = 0
190+
for (let i = 0; i < h.length; i++) {
191+
if (i <= t) {
192+
s1 += (i - m1) ** 2 * h[i]
193+
} else {
194+
s2 += (i - m2) ** 2 * h[i]
195+
}
196+
}
197+
s1 /= p1
198+
s2 /= p2
199+
200+
const j =
201+
1 +
202+
2 * (p1 * Math.log(Math.sqrt(s1)) + p2 * Math.log(Math.sqrt(s2))) -
203+
2 * (p1 * Math.log(p1) + p2 * Math.log(p2))
204+
if (j < minj) {
205+
minj = j
206+
tau = t
207+
discriminability = (m1 - m2) ** 2 / (s1 + s2)
208+
}
209+
maxj = Math.max(maxj, j)
210+
}
211+
const g = discriminability * (maxj - minj)
212+
return [r[tau + 1], g]
213+
}
214+
215+
/**
216+
* Returns predicted categories.
217+
* @returns {number[]} Predicted values
218+
*/
219+
predict() {
220+
const pred = []
221+
for (let k = 0; k < this._c.length; k++) {
222+
for (let i = 0; i < this._c[k].length; i++) {
223+
pred[this._c[k][i]] = k
224+
}
225+
}
226+
return pred
227+
}
228+
}

Diff for: tests/gui/view/lmclus.test.js

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { getPage } from '../helper/browser'
2+
3+
describe('clustering', () => {
4+
/** @type {Awaited<ReturnType<getPage>>} */
5+
let page
6+
beforeEach(async () => {
7+
page = await getPage()
8+
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
9+
await taskSelectBox.selectOption('CT')
10+
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
11+
await modelSelectBox.selectOption('lmclus')
12+
})
13+
14+
afterEach(async () => {
15+
await page?.close()
16+
})
17+
18+
test('initialize', async () => {
19+
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
20+
const buttons = await methodMenu.waitForSelector('.buttons')
21+
22+
const k = await buttons.waitForSelector('input:nth-of-type(1)')
23+
await expect(k.getAttribute('value')).resolves.toBe('2')
24+
const s = await buttons.waitForSelector('input:nth-of-type(2)')
25+
await expect(s.getAttribute('value')).resolves.toBe('1.5')
26+
const gamma = await buttons.waitForSelector('input:nth-of-type(3)')
27+
await expect(gamma.getAttribute('value')).resolves.toBe('0.4')
28+
})
29+
30+
test('learn', async () => {
31+
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
32+
const buttons = await methodMenu.waitForSelector('.buttons')
33+
34+
const clusters = await buttons.waitForSelector('span:last-child', { state: 'attached' })
35+
await expect(clusters.textContent()).resolves.toBe('')
36+
37+
const fitButton = await buttons.waitForSelector('input[value=Fit]')
38+
await fitButton.evaluate(el => el.click())
39+
40+
await expect(clusters.textContent()).resolves.toMatch(/^[0-9]+$/)
41+
})
42+
})

Diff for: tests/lib/model/lmclus.test.js

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { jest } from '@jest/globals'
2+
jest.retryTimes(5)
3+
4+
import Matrix from '../../../lib/util/matrix.js'
5+
import LMCLUS from '../../../lib/model/lmclus.js'
6+
7+
import { randIndex } from '../../../lib/evaluate/clustering.js'
8+
9+
test('clustering', () => {
10+
const model = new LMCLUS(5, 1.5, 0.4)
11+
const n = 50
12+
const x = Matrix.concat(
13+
Matrix.concat(Matrix.randn(n, 5, 0, 0.1), Matrix.randn(n, 5, 5, 0.1)),
14+
Matrix.randn(n, 5, [0, 5, 0, 5, 0], 0.1)
15+
).toArray()
16+
17+
model.fit(x)
18+
const y = model.predict()
19+
expect(y).toHaveLength(x.length)
20+
21+
const t = []
22+
for (let i = 0; i < x.length; i++) {
23+
t[i] = Math.floor(i / n)
24+
}
25+
const ri = randIndex(y, t)
26+
expect(ri).toBeGreaterThan(0.8)
27+
})

0 commit comments

Comments
 (0)