Skip to content

Commit ee217d0

Browse files
retraigoload1n9
andauthored
feat: Revamp utilities + sentiment analysis example (#62)
* chore: bump deps (#60) (#61) * update with tfidf * fix classifier output size * remove dropout * updated model * update tester and analyzer * add commands * remove log * completely revamp utilities * delete split --------- Co-authored-by: Dean Srebnik <[email protected]>
1 parent e905984 commit ee217d0

29 files changed

+396
-444
lines changed

deno.jsonc

+5-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
"./utilities/image": "./packages/utilities/src/image/mod.ts",
2323
"./utilities/metrics": "./packages/utilities/src/metrics/mod.ts",
2424
"./utilities/encoding": "./packages/utilities/src/encoding/mod.ts",
25+
"./utilities/mapper": "./packages/utilities/src/mapper/mod.ts",
26+
"./utilities/transformer": "./packages/utilities/src/transformer/mod.ts",
2527
"./utilities/misc": "./packages/utilities/src/utils/mod.ts",
2628
// Tokenizers
2729
"./tokenizers": "./packages/tokenizers/mod.ts",
@@ -40,7 +42,9 @@
4042
"example:multiple-linear": "deno -A ./examples/multiple-linear/student.ts",
4143
"example:binary": "deno -A ./examples/classification/binary_iris.ts",
4244
"example:multiclass": "deno -A ./examples/classification/iris.ts",
43-
"example:text-sentiment": "deno -A ./examples/sentiment-analysis/classifier.ts",
45+
"example:sentiment-train": "deno -A ./examples/sentiment-analysis/classifier.ts",
46+
"example:sentiment-test": "deno -A ./examples/sentiment-analysis/tester.ts",
47+
"example:sentiment-try": "deno -A ./examples/sentiment-analysis/analyzer.ts",
4448
"example:text-spam": "deno -A ./examples/classification/spam.ts",
4549
"example:filters": "deno -A examples/filters/conv.ts ",
4650
"example:train": "deno -A examples/model/train.ts ",

examples/classification/spam.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ net.train(
9494
// Train for 20 epochs
9595
20,
9696
2,
97-
0.01
97+
0.001
9898
);
9999

100100
console.log(`training time: ${performance.now() - time}ms`);
+17-33
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,54 @@
11
import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
22
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
33

4-
import {
5-
useSplit,
6-
ClassificationReport,
7-
MatrixLike,
8-
} from "jsr:@denosaurs/[email protected]/utilities";
4+
import type { MatrixLike } from "jsr:@denosaurs/[email protected]/utilities";
95

106
import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
117
import {
12-
CountVectorizer,
13-
SplitTokenizer,
8+
CountVectorizer,
9+
SplitTokenizer,
10+
TfIdfTransformer,
1411
} from "jsr:@denosaurs/[email protected]/utilities/text";
1512

16-
import Mappings from "./mappings.json" with {type: "json"}
17-
import Vocab from "./vocab.json" with {type: "json"}
18-
19-
20-
console.time("Time Elapsed");
21-
22-
console.log("\nImports loaded.");
23-
13+
import Mappings from "./mappings.json" with { type: "json" };
14+
import Vocab from "./vocab.json" with { type: "json" };
15+
import Idf from "./tfidf.json" with { type: "json" };
2416

2517
const vocab = new Map();
2618

2719
for (const entry of Vocab) {
28-
vocab.set(entry[0], entry[1])
20+
vocab.set(entry[0], entry[1]);
2921
}
3022

3123
const tokenizer = new SplitTokenizer({
32-
skipWords: "english",
33-
vocabulary: vocab,
34-
standardize: { lowercase: true, stripNewlines: true },
24+
skipWords: "english",
25+
vocabulary: vocab,
26+
standardize: { lowercase: true, stripNewlines: true },
3527
});
3628

3729
const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);
38-
39-
console.log("\nX vectorized");
40-
console.timeLog("Time Elapsed");
30+
const transformer = new TfIdfTransformer({ idf: Float64Array.from(Idf) });
4131

4232
const encoder = new CategoricalEncoder<string>();
4333
const mappings = new Map();
4434

4535
for (const entry of Mappings) {
46-
mappings.set(entry[0], entry[1])
36+
mappings.set(entry[0], entry[1]);
4737
}
4838

4939
encoder.mapping = mappings;
5040

51-
console.log("\nCPU Backend Loading");
52-
console.timeLog("Time Elapsed");
53-
5441
await setupBackend(CPU);
5542

56-
console.log("\nCPU Backend Loaded");
57-
console.timeLog("Time Elapsed");
58-
59-
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")
43+
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st");
6044

61-
const text = prompt("Text to analyze?") || "hello world"
45+
const text = prompt("Text to analyze?") || "hello world";
6246

6347
const predYSoftmax = await net.predict(
64-
tensor(vectorizer.transform(tokenizer.transform([text]), "f32"))
48+
tensor(transformer.transform<"f32">(vectorizer.transform(tokenizer.transform([text]), "f32"))),
6549
);
6650

6751
CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
6852
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);
6953

70-
console.log(`The sentiment predicted is ${predY[0]}`)
54+
console.log(`The sentiment predicted is ${predY[0]}`);

examples/sentiment-analysis/classifier.ts

+3-6
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@ import {
22
AdamOptimizer,
33
Cost,
44
CPU,
5-
Dropout1DLayer,
65
Init,
76
setupBackend,
87
tensor,
98
} from "jsr:@denosaurs/[email protected]";
109
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
11-
import { NadamOptimizer } from "jsr:@denosaurs/[email protected]/core/optimizers";
1210
import {
1311
DenseLayer,
1412
ReluLayer,
@@ -18,7 +16,7 @@ import {
1816
import {
1917
useSplit,
2018
ClassificationReport,
21-
MatrixLike,
19+
type MatrixLike,
2220
} from "jsr:@denosaurs/[email protected]/utilities";
2321

2422
import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
@@ -103,7 +101,7 @@ Deno.writeTextFileSync(
103101
);
104102
Deno.writeTextFileSync(
105103
"examples/sentiment-analysis/tfidf.json",
106-
JSON.stringify(transformer.idf)
104+
JSON.stringify(Array.from(transformer.idf as Float64Array))
107105
);
108106

109107
console.log("\nCPU Backend Loading");
@@ -115,7 +113,7 @@ console.log("\nCPU Backend Loaded");
115113
console.timeLog("Time Elapsed");
116114

117115
const net = new Sequential({
118-
size: [4, vecX.nCols],
116+
size: [4, tfidfX.nCols],
119117
layers: [
120118
DenseLayer({ size: [256], init: Init.Kaiming }),
121119
ReluLayer(),
@@ -127,7 +125,6 @@ const net = new Sequential({
127125
ReluLayer(),
128126
DenseLayer({ size: [16], init: Init.Kaiming }),
129127
ReluLayer(),
130-
Dropout1DLayer({ probability: 0.5 }),
131128
DenseLayer({ size: [encoder.mapping.size], init: Init.Kaiming }),
132129
SoftmaxLayer(),
133130
],
+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[["empty",0],["sadness",1],["neutral",2],["worry",3],["surprise",4],["fun",5],["hate",6],["happiness",7],["enthusiasm",8],["love",9],["relief",10],["boredom",11],["anger",12]]
1+
[["empty",0],["sadness",1],["enthusiasm",2],["neutral",3],["worry",4],["surprise",5],["love",6],["fun",7],["hate",8],["happiness",9],["boredom",10],["relief",11],["anger",12]]
-259 KB
Binary file not shown.

examples/sentiment-analysis/tester.ts

+31-48
Original file line numberDiff line numberDiff line change
@@ -2,96 +2,79 @@ import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
22
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
33

44
import {
5-
useSplit,
6-
ClassificationReport,
7-
MatrixLike,
5+
ClassificationReport,
6+
type MatrixLike,
7+
useSplit,
88
} from "jsr:@denosaurs/[email protected]/utilities";
99

1010
import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
1111
import {
12-
CountVectorizer,
13-
SplitTokenizer,
12+
CountVectorizer,
13+
SplitTokenizer,
14+
TfIdfTransformer,
1415
} from "jsr:@denosaurs/[email protected]/utilities/text";
1516

16-
import Mappings from "./mappings.json" with {type: "json"}
17-
import Vocab from "./vocab.json" with {type: "json"}
17+
import Mappings from "./mappings.json" with { type: "json" };
18+
import Vocab from "./vocab.json" with { type: "json" };
19+
import Idf from "./tfidf.json" with { type: "json" };
1820

1921
import { parse as parseCsv } from "jsr:@std/[email protected]/parse";
2022

21-
22-
console.time("Time Elapsed");
23-
24-
console.log("\nImports loaded.");
25-
2623
const file = Deno.readTextFileSync(
27-
"examples/sentiment-analysis/text_emotion.csv"
28-
);
29-
30-
console.log("\nData file loaded.");
31-
console.timeLog("Time Elapsed");
32-
33-
const data = parseCsv(file, { skipFirstRow: true }) as {
24+
"examples/sentiment-analysis/text_emotion.csv",
25+
);
26+
27+
const data = parseCsv(file, { skipFirstRow: true }) as {
3428
sentiment: string;
3529
content: string;
36-
}[];
37-
const text = data.map((x) => x.content);
30+
}[];
31+
const text = data.map((x) => x.content);
3832
const labels = data.map((x) => x.sentiment);
3933

40-
console.log("\nCSV Parsed");
41-
console.timeLog("Time Elapsed");
42-
43-
const [[trainX, trainY], [testX, testY]] = useSplit(
44-
{ shuffle: true, ratio: [7, 3] },
45-
text,
46-
labels
34+
const [[_trainX, _trainY], [testX, testY]] = useSplit(
35+
{ shuffle: true, ratio: [7, 3] },
36+
text,
37+
labels,
4738
);
4839

49-
console.log("Data Split");
50-
console.timeLog("Time Elapsed");
51-
5240
const vocab = new Map();
5341

5442
for (const entry of Vocab) {
55-
vocab.set(entry[0], entry[1])
43+
vocab.set(entry[0], entry[1]);
5644
}
5745

5846
const tokenizer = new SplitTokenizer({
59-
skipWords: "english",
60-
vocabulary: vocab,
61-
standardize: { lowercase: true, stripNewlines: true },
47+
skipWords: "english",
48+
vocabulary: vocab,
49+
standardize: { lowercase: true, stripNewlines: true },
6250
});
6351

6452
const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);
6553

66-
console.log("\nX vectorized");
67-
console.timeLog("Time Elapsed");
54+
const transformer = new TfIdfTransformer({ idf: Float64Array.from(Idf) });
6855

6956
const encoder = new CategoricalEncoder<string>();
7057
const mappings = new Map();
7158

7259
for (const entry of Mappings) {
73-
mappings.set(entry[0], entry[1])
60+
mappings.set(entry[0], entry[1]);
7461
}
7562

7663
encoder.mapping = mappings;
7764

78-
console.log("\nCPU Backend Loading");
79-
console.timeLog("Time Elapsed");
80-
8165
await setupBackend(CPU);
8266

83-
console.log("\nCPU Backend Loaded");
84-
console.timeLog("Time Elapsed");
85-
86-
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")
67+
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st");
8768

8869
const predYSoftmax = await net.predict(
89-
tensor(vectorizer.transform(tokenizer.transform(testX), "f32"))
70+
tensor(
71+
transformer.transform<"f32">(
72+
vectorizer.transform(tokenizer.transform(testX), "f32"),
73+
),
74+
),
9075
);
9176

9277
CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
9378
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);
9479

9580
console.log(new ClassificationReport(testY, predY));
96-
97-
console.log(testY, predY)

examples/sentiment-analysis/tfidf.json

+1
Large diffs are not rendered by default.

examples/sentiment-analysis/vocab.json

+1-1
Large diffs are not rendered by default.

packages/utilities/src/encoding/categorical.ts

-92
This file was deleted.

0 commit comments

Comments
 (0)