Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/_sidebar.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
+ [Linear](apis/linear/index.md)
+ [Linear Regression](apis/linear/linearRegression.md)

+ [Decomposition](apis/decomposition.md)
+ [PCA](apis/decomposition/pca.md)

+ [SVM](apis/svm/index.md)
+ [SVC](apis/svm/SVC.md)
+ [NuSVC](apis/svm/NuSVC.md)
Expand Down
3 changes: 3 additions & 0 deletions docs/apis/decomposition.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Decomposition

- [PCA](decomposition/pca.md)
23 changes: 23 additions & 0 deletions docs/apis/decomposition/pca.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## PCA

Linear dimensionality reduction using Singular Value Decomposition of the data to project it to a lower dimensional space. The input data is centered but not scaled for each feature before applying the SVD.

```ts
constructor(nComponents: number | null = null)
```

### Methods
- `fit(X: number[][]): void`
- `transform(X: number[][]): number[][]`
- `fitTransform(X: number[][]): number[][]`
- `inverseTransform(X: number[][]): number[][]`
- `getComponents(): number[][]`
- `getMean(): number[]`
- `getExplainedVariance(): number[]`

### Example
```ts
const pca = new PCA(2);
pca.fit(X);
const T = pca.transform(X_test);
```
3 changes: 2 additions & 1 deletion scripts/gen_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def run(script):
'gen_mean_shift.py',
'gen_dbscan.py',
'gen_optics.py',
'gen_svc.py'
'gen_svc.py',
'gen_pca.py'
]

for s in scripts:
Expand Down
21 changes: 21 additions & 0 deletions scripts/gen_pca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import numpy as np
from sklearn.decomposition import PCA
import json, os

np.random.seed(0)
X = np.random.randn(50, 3)
pca = PCA(n_components=2)
pca.fit(X)
X_test = np.random.randn(10, 3)
trans = pca.transform(X_test)

os.makedirs('test_data', exist_ok=True)
with open('test_data/pca.json', 'w') as f:
json.dump({
'X': X.tolist(),
'X_test': X_test.tolist(),
'expected': trans.tolist(),
'components': pca.components_.tolist(),
'mean': pca.mean_.tolist(),
'explained_variance': pca.explained_variance_.tolist()
}, f)
16 changes: 16 additions & 0 deletions src/decomposition/__test__/pca.compare.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { PCA } from '../pca';
import fs from 'fs';
import path from 'path';

test('compare with sklearn', () => {
const p = path.join(__dirname, '../../../test_data/pca.json');
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
const pca = new PCA(2);
pca.fit(data.X);
const pred = pca.transform(data.X_test);
for (let i = 0; i < pred.length; i++) {
for (let j = 0; j < pred[i].length; j++) {
expect(pred[i][j]).toBeCloseTo(data.expected[i][j], 1);
}
}
});
21 changes: 21 additions & 0 deletions src/decomposition/__test__/pca.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { PCA } from '../pca';

test('basic pca', () => {
const X = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 1],
[2, 4, 5],
[3, 6, 0]
];
const pca = new PCA(2);
const T = pca.fitTransform(X);
expect(T.length).toBe(5);
const Xinv = pca.inverseTransform(T);
expect(Xinv.length).toBe(5);
for (let i = 0; i < X.length; i++) {
for (let j = 0; j < X[i].length; j++) {
expect(Xinv[i][j]).toBeCloseTo(X[i][j], 0);
}
}
});
1 change: 1 addition & 0 deletions src/decomposition/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { PCA } from './pca';
142 changes: 142 additions & 0 deletions src/decomposition/pca.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
export class PCA {
private nComponents: number | null;
private components: number[][];
private mean: number[];
private explainedVariance: number[];

constructor(nComponents: number | null = null) {
this.nComponents = nComponents;
this.components = [];
this.mean = [];
this.explainedVariance = [];
}

private static dot(a: number[], b: number[]): number {
let s = 0;
for (let i = 0; i < a.length; i++) {
s += a[i] * b[i];
}
return s;
}

private static matVecMul(A: number[][], v: number[]): number[] {
return A.map(row => PCA.dot(row, v));
}

private static outer(v1: number[], v2: number[]): number[][] {
const res: number[][] = [];
for (let i = 0; i < v1.length; i++) {
res.push([]);
for (let j = 0; j < v2.length; j++) {
res[i].push(v1[i] * v2[j]);
}
}
return res;
}

private static normalize(v: number[]): number[] {
const norm = Math.sqrt(PCA.dot(v, v));
return v.map(x => x / norm);
}

private static powerIteration(A: number[][], iter: number = 100): {value: number, vector: number[]} {
let v: number[] = Array(A.length).fill(0).map(() => Math.random());
v = PCA.normalize(v);
for (let i = 0; i < iter; i++) {
const Av = PCA.matVecMul(A, v);
v = PCA.normalize(Av);
}
const Av = PCA.matVecMul(A, v);
const value = PCA.dot(v, Av);
return { value, vector: v };
}

private static cloneMatrix(A: number[][]): number[][] {
return A.map(r => r.slice());
}

public fit(X: number[][]): void {
const nSamples = X.length;
const nFeatures = X[0].length;
this.mean = new Array(nFeatures).fill(0);
for (let i = 0; i < nSamples; i++) {
for (let j = 0; j < nFeatures; j++) {
this.mean[j] += X[i][j];
}
}
for (let j = 0; j < nFeatures; j++) {
this.mean[j] /= nSamples;
}
const Xc = X.map(row => row.map((v, j) => v - this.mean[j]));
const cov: number[][] = [];
for (let i = 0; i < nFeatures; i++) {
cov.push(new Array(nFeatures).fill(0));
}
for (let i = 0; i < nSamples; i++) {
for (let j = 0; j < nFeatures; j++) {
for (let k = 0; k < nFeatures; k++) {
cov[j][k] += Xc[i][j] * Xc[i][k];
}
}
}
for (let j = 0; j < nFeatures; j++) {
for (let k = 0; k < nFeatures; k++) {
cov[j][k] /= (nSamples - 1);
}
}
const k = this.nComponents === null ? nFeatures : Math.min(this.nComponents, nFeatures);
let A = PCA.cloneMatrix(cov);
this.components = [];
this.explainedVariance = [];
for (let c = 0; c < k; c++) {
const {value, vector} = PCA.powerIteration(A, 200);
this.components.push(vector.slice());
this.explainedVariance.push(value);
// deflate
const outer = PCA.outer(vector, vector);
for (let i = 0; i < nFeatures; i++) {
for (let j = 0; j < nFeatures; j++) {
A[i][j] -= value * outer[i][j];
}
}
}
}

public transform(X: number[][]): number[][] {
const Xc = X.map(row => row.map((v, j) => v - this.mean[j]));
const componentsT = this.components; // components are stored as vectors
return Xc.map(row => componentsT.map(comp => PCA.dot(row, comp)));
}

public fitTransform(X: number[][]): number[][] {
this.fit(X);
return this.transform(X);
}

public inverseTransform(X: number[][]): number[][] {
return X.map(row => {
const orig = new Array(this.mean.length).fill(0);
for (let i = 0; i < this.components.length; i++) {
for (let j = 0; j < orig.length; j++) {
orig[j] += this.components[i][j] * row[i];
}
}
for (let j = 0; j < orig.length; j++) {
orig[j] += this.mean[j];
}
return orig;
});
}

public getComponents(): number[][] {
return this.components;
}

public getMean(): number[] {
return this.mean;
}

public getExplainedVariance(): number[] {
return this.explainedVariance;
}
}
4 changes: 3 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import * as KMath from './math';
import * as Tree from './tree/index';
import * as Linear from './linear';
import * as SVM from './svm';
import * as Decomposition from './decomposition';

export {
Tree,
Expand All @@ -15,5 +16,6 @@ export {
Algebra,
KMath,
Linear,
SVM
SVM,
Decomposition
}