Skip to content

Commit eac3203

Browse files
feat: add bagging and random forest ensembles
1 parent 1d59388 commit eac3203

21 files changed

Lines changed: 624 additions & 8 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,12 @@ test_data/*
116116
!test_data/complement_nb.json
117117
!test_data/elastic_net.json
118118
!test_data/gaussian_nb.json
119+
!test_data/bagging_classifier.json
119120
!test_data/kneighbors_regressor.json
120121
!test_data/multinomial_nb.json
121122
!test_data/nearest_centroid.json
123+
!test_data/random_forest_classifier.json
124+
!test_data/random_forest_regressor.json
122125
!test_data/radius_neighbors_classifier.json
123126
!test_data/radius_neighbors_regressor.json
124127
!test_data/ridge_classifier.json

scripts/gen_all.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,13 @@ def run(script):
3131
'gen_multinomial_nb.py',
3232
'gen_complement_nb.py',
3333
'gen_svc.py',
34+
'gen_bagging_classifier.py',
3435
'gen_kneighbors_regressor.py',
3536
'gen_radius_neighbors_classifier.py',
3637
'gen_radius_neighbors_regressor.py',
3738
'gen_nearest_centroid.py',
39+
'gen_random_forest_classifier.py',
40+
'gen_random_forest_regressor.py',
3841
'gen_bernoulli_rbm.py',
3942
'gen_pca.py',
4043
'gen_truncated_svd.py',

scripts/gen_bagging_classifier.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from sklearn.datasets import make_classification
2+
from sklearn.ensemble import BaggingClassifier
3+
from sklearn.tree import DecisionTreeClassifier
4+
import json, os
5+
6+
X, y = make_classification(
7+
n_samples=120,
8+
n_features=5,
9+
n_informative=4,
10+
n_redundant=0,
11+
n_clusters_per_class=1,
12+
class_sep=1.5,
13+
random_state=0,
14+
)
15+
trainX = X[:90]
16+
trainY = y[:90]
17+
testX = X[90:]
18+
19+
base = DecisionTreeClassifier(criterion='gini', random_state=0)
20+
try:
21+
clf = BaggingClassifier(estimator=base, n_estimators=15, random_state=0)
22+
except TypeError:
23+
clf = BaggingClassifier(base_estimator=base, n_estimators=15, random_state=0)
24+
clf.fit(trainX, trainY)
25+
pred = clf.predict(testX)
26+
27+
os.makedirs('test_data', exist_ok=True)
28+
with open('test_data/bagging_classifier.json', 'w') as f:
29+
json.dump({
30+
'trainX': trainX.tolist(),
31+
'trainY': trainY.tolist(),
32+
'testX': testX.tolist(),
33+
'expected': pred.tolist()
34+
}, f)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from sklearn.datasets import make_classification
2+
from sklearn.ensemble import RandomForestClassifier
3+
import json, os
4+
5+
X, y = make_classification(
6+
n_samples=120,
7+
n_features=5,
8+
n_informative=4,
9+
n_redundant=0,
10+
n_clusters_per_class=1,
11+
class_sep=1.5,
12+
random_state=0,
13+
)
14+
trainX = X[:90]
15+
trainY = y[:90]
16+
testX = X[90:]
17+
18+
clf = RandomForestClassifier(n_estimators=25, random_state=0, max_features='sqrt')
19+
clf.fit(trainX, trainY)
20+
pred = clf.predict(testX)
21+
22+
os.makedirs('test_data', exist_ok=True)
23+
with open('test_data/random_forest_classifier.json', 'w') as f:
24+
json.dump({
25+
'trainX': trainX.tolist(),
26+
'trainY': trainY.tolist(),
27+
'testX': testX.tolist(),
28+
'expected': pred.tolist()
29+
}, f)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from sklearn.ensemble import RandomForestRegressor
2+
import numpy as np
3+
import json, os
4+
5+
rng = np.random.RandomState(0)
6+
X = rng.uniform(-3, 3, size=(120, 1))
7+
y = 4 * X[:, 0] ** 2 - 2 * X[:, 0] + 1
8+
trainX = X[:90]
9+
trainY = y[:90]
10+
testX = X[90:]
11+
12+
reg = RandomForestRegressor(n_estimators=25, random_state=0, max_features=1.0)
13+
reg.fit(trainX, trainY)
14+
pred = reg.predict(testX)
15+
16+
os.makedirs('test_data', exist_ok=True)
17+
with open('test_data/random_forest_regressor.json', 'w') as f:
18+
json.dump({
19+
'trainX': trainX.tolist(),
20+
'trainY': trainY.tolist(),
21+
'testX': testX.tolist(),
22+
'expected': pred.tolist()
23+
}, f)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import fs from 'fs';
2+
import path from 'path';
3+
import { BaggingClassifier } from '../baggingClassifier';
4+
5+
test('BaggingClassifier compare with sklearn', () => {
6+
const p = path.join(__dirname, '../../../test_data/bagging_classifier.json');
7+
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
8+
const clf = new BaggingClassifier({ nEstimators: 15, randomState: 0, criterion: 'gini' });
9+
clf.fit(data.trainX, data.trainY);
10+
const pred = clf.predict(data.testX);
11+
let correct = 0;
12+
for (let i = 0; i < pred.length; i++) {
13+
if (pred[i] === data.expected[i]) {
14+
correct++;
15+
}
16+
}
17+
expect(correct / pred.length).toBeGreaterThanOrEqual(0.8);
18+
});
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { BaggingClassifier } from '../baggingClassifier';
2+
3+
test('BaggingClassifier initializes', () => {
4+
expect(new BaggingClassifier()).toBeDefined();
5+
});
6+
7+
test('BaggingClassifier fits and predicts a separable dataset', () => {
8+
const X = [[0], [1], [2], [10], [11], [12]];
9+
const y = [0, 0, 0, 1, 1, 1];
10+
11+
const clf = new BaggingClassifier({ nEstimators: 7, randomState: 42 });
12+
clf.fit(X, y);
13+
14+
expect(clf.predict([[0.2], [11.5]])).toEqual([0, 1]);
15+
});
16+
17+
test('BaggingClassifier validates fit lifecycle', () => {
18+
const clf = new BaggingClassifier();
19+
expect(() => clf.predict([[0]])).toThrow('model is not fitted');
20+
});
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import fs from 'fs';
2+
import path from 'path';
3+
import { RandomForestClassifier } from '../randomForestClassifier';
4+
5+
test('RandomForestClassifier compare with sklearn', () => {
6+
const p = path.join(__dirname, '../../../test_data/random_forest_classifier.json');
7+
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
8+
const clf = new RandomForestClassifier({ nEstimators: 25, randomState: 0, maxFeatures: 'sqrt' });
9+
clf.fit(data.trainX, data.trainY);
10+
const pred = clf.predict(data.testX);
11+
let correct = 0;
12+
for (let i = 0; i < pred.length; i++) {
13+
if (pred[i] === data.expected[i]) {
14+
correct++;
15+
}
16+
}
17+
expect(correct / pred.length).toBeGreaterThanOrEqual(0.8);
18+
});
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { RandomForestClassifier } from '../randomForestClassifier';
2+
3+
test('RandomForestClassifier initializes', () => {
4+
expect(new RandomForestClassifier()).toBeDefined();
5+
});
6+
7+
test('RandomForestClassifier fits and predicts a separable dataset', () => {
8+
const X = [[0], [1], [2], [10], [11], [12]];
9+
const y = [0, 0, 0, 1, 1, 1];
10+
11+
const clf = new RandomForestClassifier({ nEstimators: 15, randomState: 42 });
12+
clf.fit(X, y);
13+
14+
expect(clf.predict([[0.2], [11.5]])).toEqual([0, 1]);
15+
});
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import fs from 'fs';
2+
import path from 'path';
3+
import { RandomForestRegressor } from '../randomForestRegressor';
4+
5+
test('RandomForestRegressor compare with sklearn', () => {
6+
const p = path.join(__dirname, '../../../test_data/random_forest_regressor.json');
7+
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
8+
const reg = new RandomForestRegressor({ nEstimators: 25, randomState: 0, maxFeatures: 1 });
9+
reg.fit(data.trainX, data.trainY);
10+
const pred = reg.predict(data.testX);
11+
let mse = 0;
12+
for (let i = 0; i < pred.length; i++) {
13+
mse += (pred[i] - data.expected[i]) ** 2;
14+
}
15+
mse /= pred.length;
16+
expect(mse).toBeLessThan(5000);
17+
});

0 commit comments

Comments
 (0)