Add MARS (#897)

ishii-norimi · web-flow · commit e096a57f8512 · 2024-09-07T13:49:24.000+09:00
diff --git a/README.md b/README.md
@@ -124,7 +124,7 @@ for (let i = 0; i < n; i++) {
 | clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, NMF, Autoencoder |
 | classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, ELM, LMNN |
 | semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
-| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
+| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MARS, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
 | interpolation | Nearest neighbor, IDW, (Spherical) Linear, Brahmagupta, Logarithmic, Cosine, (Inverse) Smoothstep, Cubic, (Centripetal) Catmull-Rom, Hermit, Polynomial, Lagrange, Trigonometric, Spline, RBF Network, Akima, Natural neighbor, Delaunay |
 | learning to rank | Ordered logistic, Ordered probit, PRank, OAP-BPM, RankNet |
 | anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, NOF, RKOF, ABOD, PCA, OCSVM, KDE, GMM, Isolation forest, Autoencoder, GAN |
diff --git a/js/model_selector.js b/js/model_selector.js
@@ -319,6 +319,7 @@ const AIMethods = [
 				{ value: 'rbf', title: 'RBF Network' },
 				{ value: 'rvm', title: 'RVM' },
 				{ value: 'svr', title: 'Support vector regression' },
+				{ value: 'mars', title: 'MARS' },
 				{ value: 'mlp', title: 'Multi-layer perceptron' },
 				{ value: 'elm', title: 'Extreme learning machine' },
 				{ value: 'neuralnetwork', title: 'Neuralnetwork' },
diff --git a/js/view/mars.js b/js/view/mars.js
@@ -0,0 +1,23 @@
+import MARS from '../../lib/model/mars.js'
+import Controller from '../controller.js'
+
+export default function (platform) {
+	platform.setting.ml.usage = 'Click and add data point. Next, click "Fit" button.'
+	platform.setting.ml.reference = {
+		author: 'J. H. Friedman',
+		title: 'MULTIVARIATE ADAPTIVE REGRESSION SPLINES',
+		year: 1990,
+	}
+	const controller = new Controller(platform)
+	const fitModel = () => {
+		const model = new MARS(mmax.value)
+		model.fit(platform.trainInput, platform.trainOutput)
+
+		const pred = model.predict(platform.testInput(2))
+		platform.testResult(pred)
+	}
+
+	const mmax = controller.input.number({ label: 'M max', max: 100, min: 1, value: 5 })
+
+	controller.input.button('Fit').on('click', fitModel)
+}
diff --git a/lib/model/mars.js b/lib/model/mars.js
@@ -0,0 +1,139 @@
+import Matrix from '../util/matrix.js'
+
+class Term {
+	constructor(s = [], t = [], v = []) {
+		this._s = s
+		this._t = t
+		this._v = v
+	}
+
+	prod(s, t, v) {
+		return new Term(this._s.concat(s), this._t.concat(t), this._v.concat(v))
+	}
+
+	calc(x) {
+		let val = 1
+		for (let i = 0; i < this._s.length; i++) {
+			val *= Math.max(0, this._s[i] * (x[this._v[i]] - this._t[i]))
+		}
+		return val
+	}
+}
+
+/**
+ * Multivariate Adaptive Regression Splines
+ */
+export default class MultivariateAdaptiveRegressionSplines {
+	// Multivariate Adaptive Regression Splines
+	// https://www.slac.stanford.edu/pubs/slacpubs/4750/slac-pub-4960.pdf
+	// https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline
+	/**
+	 * @param {number} mmax Maximum number of terms
+	 */
+	constructor(mmax) {
+		this._mmax = mmax
+		this._b = [new Term()]
+		this._a = null
+	}
+
+	/**
+	 * Fit model.
+	 * @param {Array<Array<number>>} x Training data
+	 * @param {Array<Array<number>>} y Target values
+	 */
+	fit(x, y) {
+		const n = x.length
+		const d = x[0].length
+		y = Matrix.fromArray(y)
+
+		let z = Matrix.ones(n, 1)
+		let best_lof = Infinity
+		let best_w = null
+		while (this._b.length <= this._mmax) {
+			let best_term = null
+			let best_z = null
+			for (let m = 0; m < this._b.length; m++) {
+				for (let v = 0; v < d; v++) {
+					for (let i = 0; i < n; i++) {
+						if (this._b[m].calc(x[i]) === 0) continue
+						const t = x[i][v]
+						const termp = this._b[m].prod(1, t, v)
+						const termm = this._b[m].prod(-1, t, v)
+						const z1 = Matrix.resize(z, n, z.cols + 2)
+
+						for (let j = 0; j < n; j++) {
+							z1.set(j, z1.cols - 2, termp.calc(x[j]))
+							z1.set(j, z1.cols - 1, termm.calc(x[j]))
+						}
+
+						const w = z1.tDot(z1).solve(z1.tDot(y))
+						const yt = z1.dot(w)
+						yt.sub(y)
+						const e = yt.norm()
+						if (e < best_lof) {
+							best_term = { m, v, t }
+							best_z = z1
+							best_w = w
+							best_lof = e
+						}
+					}
+				}
+			}
+
+			this._b.push(
+				this._b[best_term.m].prod(1, best_term.t, best_term.v),
+				this._b[best_term.m].prod(-1, best_term.t, best_term.v)
+			)
+			z = best_z
+			this._a = best_w
+		}
+
+		let best_w_b = this._b
+		let best_k = z
+		let best_k_b = this._b
+		for (let i = this._b.length - 1; i >= 1; i--) {
+			let b = Infinity
+			const l = best_k
+			const l_b = best_k_b
+			for (let m = 1; m <= i; m++) {
+				const z1 = l.copy()
+				z1.remove(m, 1)
+				const w = z1.tDot(z1).solve(z1.tDot(y))
+				const yt = z1.dot(w)
+				yt.sub(y)
+				const e = yt.norm()
+
+				if (e < b) {
+					b = e
+					best_k = z1
+					best_k_b = l_b.concat()
+					best_k_b.splice(m, 1)
+				}
+				if (e < best_lof) {
+					best_lof = e
+					best_w = w
+					best_w_b = l_b.concat()
+					best_w_b.splice(m, 1)
+				}
+			}
+		}
+		this._a = best_w
+		this._b = best_w_b
+	}
+
+	/**
+	 * Returns predicted values.
+	 * @param {Array<Array<number>>} x Sample data
+	 * @returns {Array<Array<number>>} Predicted values
+	 */
+	predict(x) {
+		const n = x.length
+		const z = Matrix.ones(n, this._b.length)
+		for (let i = 0; i < n; i++) {
+			for (let m = 0; m < this._b.length; m++) {
+				z.set(i, m, this._b[m].calc(x[i]))
+			}
+		}
+		return z.dot(this._a).toArray()
+	}
+}
diff --git a/tests/gui/view/mars.test.js b/tests/gui/view/mars.test.js
@@ -0,0 +1,38 @@
+import { getPage } from '../helper/browser'
+
+describe('regression', () => {
+	/** @type {Awaited<ReturnType<getPage>>} */
+	let page
+	beforeEach(async () => {
+		page = await getPage()
+		const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
+		await taskSelectBox.selectOption('RG')
+		const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
+		await modelSelectBox.selectOption('mars')
+	})
+
+	afterEach(async () => {
+		await page?.close()
+	})
+
+	test('initialize', async () => {
+		const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
+		const buttons = await methodMenu.waitForSelector('.buttons')
+
+		const mmax = await buttons.waitForSelector('input:nth-of-type(1)')
+		await expect(mmax.getAttribute('value')).resolves.toBe('5')
+	})
+
+	test('learn', async () => {
+		const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
+		const buttons = await methodMenu.waitForSelector('.buttons')
+
+		const methodFooter = await page.waitForSelector('#method_footer', { state: 'attached' })
+		await expect(methodFooter.textContent()).resolves.toBe('')
+
+		const fitButton = await buttons.waitForSelector('input[value=Fit]')
+		await fitButton.evaluate(el => el.click())
+
+		await expect(methodFooter.textContent()).resolves.toMatch(/^RMSE:[0-9.]+$/)
+	})
+})
diff --git a/tests/lib/model/mars.test.js b/tests/lib/model/mars.test.js
@@ -0,0 +1,17 @@
+import Matrix from '../../../lib/util/matrix.js'
+import MARS from '../../../lib/model/mars.js'
+
+import { rmse } from '../../../lib/evaluate/regression.js'
+
+test('fit', () => {
+	const model = new MARS(20)
+	const x = Matrix.randn(50, 2, 0, 5).toArray()
+	const t = []
+	for (let i = 0; i < x.length; i++) {
+		t[i] = [x[i][0] + x[i][1] + (Math.random() - 0.5) / 2 + 5]
+	}
+	model.fit(x, t)
+	const y = model.predict(x)
+	const err = rmse(y, t)[0]
+	expect(err).toBeLessThan(0.5)
+})