Skip to content

Commit e905984

Browse files
authored
Merge branch 'sentiment-analysis' into main
2 parents a2129d4 + 7cc0b82 commit e905984

File tree

9 files changed

+40350
-9
lines changed

9 files changed

+40350
-9
lines changed

deno.jsonc

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
"example:multiple-linear": "deno -A ./examples/multiple-linear/student.ts",
4141
"example:binary": "deno -A ./examples/classification/binary_iris.ts",
4242
"example:multiclass": "deno -A ./examples/classification/iris.ts",
43-
"example:text": "deno -A ./examples/classification/spam.ts",
43+
"example:text-sentiment": "deno -A ./examples/sentiment-analysis/classifier.ts",
44+
"example:text-spam": "deno -A ./examples/classification/spam.ts",
4445
"example:filters": "deno -A examples/filters/conv.ts ",
4546
"example:train": "deno -A examples/model/train.ts ",
4647
"example:run": "deno -A examples/model/run.ts ",
+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
2+
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
3+
4+
import {
5+
useSplit,
6+
ClassificationReport,
7+
MatrixLike,
8+
} from "jsr:@denosaurs/[email protected]/utilities";
9+
10+
import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
11+
import {
12+
CountVectorizer,
13+
SplitTokenizer,
14+
} from "jsr:@denosaurs/[email protected]/utilities/text";
15+
16+
import Mappings from "./mappings.json" with {type: "json"}
17+
import Vocab from "./vocab.json" with {type: "json"}
18+
19+
20+
console.time("Time Elapsed");
21+
22+
console.log("\nImports loaded.");
23+
24+
25+
const vocab = new Map();
26+
27+
for (const entry of Vocab) {
28+
vocab.set(entry[0], entry[1])
29+
}
30+
31+
const tokenizer = new SplitTokenizer({
32+
skipWords: "english",
33+
vocabulary: vocab,
34+
standardize: { lowercase: true, stripNewlines: true },
35+
});
36+
37+
const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);
38+
39+
console.log("\nX vectorized");
40+
console.timeLog("Time Elapsed");
41+
42+
const encoder = new CategoricalEncoder<string>();
43+
const mappings = new Map();
44+
45+
for (const entry of Mappings) {
46+
mappings.set(entry[0], entry[1])
47+
}
48+
49+
encoder.mapping = mappings;
50+
51+
console.log("\nCPU Backend Loading");
52+
console.timeLog("Time Elapsed");
53+
54+
await setupBackend(CPU);
55+
56+
console.log("\nCPU Backend Loaded");
57+
console.timeLog("Time Elapsed");
58+
59+
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")
60+
61+
const text = prompt("Text to analyze?") || "hello world"
62+
63+
const predYSoftmax = await net.predict(
64+
tensor(vectorizer.transform(tokenizer.transform([text]), "f32"))
65+
);
66+
67+
CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
68+
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);
69+
70+
console.log(`The sentiment predicted is ${predY[0]}`)
+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import {
2+
AdamOptimizer,
3+
Cost,
4+
CPU,
5+
Dropout1DLayer,
6+
Init,
7+
setupBackend,
8+
tensor,
9+
} from "jsr:@denosaurs/[email protected]";
10+
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
11+
import { NadamOptimizer } from "jsr:@denosaurs/[email protected]/core/optimizers";
12+
import {
13+
DenseLayer,
14+
ReluLayer,
15+
SoftmaxLayer,
16+
} from "jsr:@denosaurs/[email protected]/core/layers";
17+
18+
import {
19+
useSplit,
20+
ClassificationReport,
21+
MatrixLike,
22+
} from "jsr:@denosaurs/[email protected]/utilities";
23+
24+
import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
25+
import {
26+
CountVectorizer,
27+
TfIdfTransformer,
28+
SplitTokenizer,
29+
} from "jsr:@denosaurs/[email protected]/utilities/text";
30+
31+
import { parse as parseCsv } from "jsr:@std/[email protected]/parse";
32+
33+
import { format as duration } from "jsr:@std/[email protected]/duration";
34+
35+
console.time("Time Elapsed");
36+
37+
console.log("\nImports loaded.");
38+
39+
const file = Deno.readTextFileSync(
40+
"examples/sentiment-analysis/text_emotion.csv"
41+
);
42+
43+
console.log("\nData file loaded.");
44+
console.timeLog("Time Elapsed");
45+
46+
const data = parseCsv(file, { skipFirstRow: true }) as {
47+
sentiment: string;
48+
content: string;
49+
}[];
50+
51+
const text = data.map((x) => x.content);
52+
const labels = data.map((x) => x.sentiment);
53+
54+
console.log("\nCSV Parsed");
55+
console.timeLog("Time Elapsed");
56+
57+
const [[trainX, trainY], [testX, testY]] = useSplit(
58+
{ shuffle: true, ratio: [7, 3] },
59+
text,
60+
labels
61+
);
62+
63+
console.log("Data Split");
64+
console.timeLog("Time Elapsed");
65+
66+
const tokenizer = new SplitTokenizer({
67+
skipWords: "english",
68+
standardize: { lowercase: true, stripNewlines: true },
69+
});
70+
71+
const tokens = tokenizer.fit(trainX).transform(trainX);
72+
73+
console.log("\nX tokenized");
74+
console.timeLog("Time Elapsed");
75+
76+
const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);
77+
78+
const vecX = vectorizer.transform(tokens, "f32");
79+
80+
tokens.splice(0, tokens.length);
81+
82+
console.log("\nX vectorized");
83+
console.timeLog("Time Elapsed");
84+
85+
const transformer = new TfIdfTransformer();
86+
87+
const tfidfX = transformer.fit(vecX).transform<"f32">(vecX);
88+
89+
console.log("\nX Transformed", tfidfX.shape);
90+
console.timeLog("Time Elapsed");
91+
92+
const encoder = new CategoricalEncoder<string>();
93+
94+
const oneHotY = encoder.fit(trainY).transform(trainY, "f32");
95+
96+
Deno.writeTextFileSync(
97+
"examples/sentiment-analysis/mappings.json",
98+
JSON.stringify(Array.from(encoder.mapping.entries()))
99+
);
100+
Deno.writeTextFileSync(
101+
"examples/sentiment-analysis/vocab.json",
102+
JSON.stringify(Array.from(tokenizer.vocabulary.entries()))
103+
);
104+
Deno.writeTextFileSync(
105+
"examples/sentiment-analysis/tfidf.json",
106+
JSON.stringify(transformer.idf)
107+
);
108+
109+
console.log("\nCPU Backend Loading");
110+
console.timeLog("Time Elapsed");
111+
112+
await setupBackend(CPU);
113+
114+
console.log("\nCPU Backend Loaded");
115+
console.timeLog("Time Elapsed");
116+
117+
const net = new Sequential({
118+
size: [4, vecX.nCols],
119+
layers: [
120+
DenseLayer({ size: [256], init: Init.Kaiming }),
121+
ReluLayer(),
122+
DenseLayer({ size: [32], init: Init.Kaiming }),
123+
ReluLayer(),
124+
DenseLayer({ size: [16], init: Init.Kaiming }),
125+
ReluLayer(),
126+
DenseLayer({ size: [16], init: Init.Kaiming }),
127+
ReluLayer(),
128+
DenseLayer({ size: [16], init: Init.Kaiming }),
129+
ReluLayer(),
130+
Dropout1DLayer({ probability: 0.5 }),
131+
DenseLayer({ size: [encoder.mapping.size], init: Init.Kaiming }),
132+
SoftmaxLayer(),
133+
],
134+
silent: false,
135+
optimizer: AdamOptimizer(),
136+
cost: Cost.CrossEntropy,
137+
patience: 10,
138+
});
139+
140+
console.log("\nStarting");
141+
console.timeLog("Time Elapsed");
142+
const timeStart = performance.now();
143+
144+
net.train(
145+
[{ inputs: tensor(tfidfX), outputs: tensor(oneHotY) }],
146+
100,
147+
2,
148+
0.002
149+
);
150+
151+
console.log(
152+
`Training complete in ${duration(performance.now() - timeStart, {
153+
style: "narrow",
154+
})}.`
155+
);
156+
157+
const predYSoftmax = await net.predict(
158+
tensor(
159+
transformer.transform<"f32">(
160+
vectorizer.transform(tokenizer.transform(testX), "f32")
161+
)
162+
)
163+
);
164+
165+
CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
166+
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);
167+
168+
console.log(new ClassificationReport(testY, predY));
169+
170+
net.saveFile("examples/sentiment-analysis/sentiment.st");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[["empty",0],["sadness",1],["neutral",2],["worry",3],["surprise",4],["fun",5],["hate",6],["happiness",7],["enthusiasm",8],["love",9],["relief",10],["boredom",11],["anger",12]]
62.2 MB
Binary file not shown.

examples/sentiment-analysis/tester.ts

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
2+
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
3+
4+
import {
5+
useSplit,
6+
ClassificationReport,
7+
MatrixLike,
8+
} from "jsr:@denosaurs/[email protected]/utilities";
9+
10+
import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
11+
import {
12+
CountVectorizer,
13+
SplitTokenizer,
14+
} from "jsr:@denosaurs/[email protected]/utilities/text";
15+
16+
import Mappings from "./mappings.json" with {type: "json"}
17+
import Vocab from "./vocab.json" with {type: "json"}
18+
19+
import { parse as parseCsv } from "jsr:@std/[email protected]/parse";
20+
21+
22+
console.time("Time Elapsed");
23+
24+
console.log("\nImports loaded.");
25+
26+
const file = Deno.readTextFileSync(
27+
"examples/sentiment-analysis/text_emotion.csv"
28+
);
29+
30+
console.log("\nData file loaded.");
31+
console.timeLog("Time Elapsed");
32+
33+
const data = parseCsv(file, { skipFirstRow: true }) as {
34+
sentiment: string;
35+
content: string;
36+
}[];
37+
const text = data.map((x) => x.content);
38+
const labels = data.map((x) => x.sentiment);
39+
40+
console.log("\nCSV Parsed");
41+
console.timeLog("Time Elapsed");
42+
43+
const [[trainX, trainY], [testX, testY]] = useSplit(
44+
{ shuffle: true, ratio: [7, 3] },
45+
text,
46+
labels
47+
);
48+
49+
console.log("Data Split");
50+
console.timeLog("Time Elapsed");
51+
52+
const vocab = new Map();
53+
54+
for (const entry of Vocab) {
55+
vocab.set(entry[0], entry[1])
56+
}
57+
58+
const tokenizer = new SplitTokenizer({
59+
skipWords: "english",
60+
vocabulary: vocab,
61+
standardize: { lowercase: true, stripNewlines: true },
62+
});
63+
64+
const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);
65+
66+
console.log("\nX vectorized");
67+
console.timeLog("Time Elapsed");
68+
69+
const encoder = new CategoricalEncoder<string>();
70+
const mappings = new Map();
71+
72+
for (const entry of Mappings) {
73+
mappings.set(entry[0], entry[1])
74+
}
75+
76+
encoder.mapping = mappings;
77+
78+
console.log("\nCPU Backend Loading");
79+
console.timeLog("Time Elapsed");
80+
81+
await setupBackend(CPU);
82+
83+
console.log("\nCPU Backend Loaded");
84+
console.timeLog("Time Elapsed");
85+
86+
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")
87+
88+
const predYSoftmax = await net.predict(
89+
tensor(vectorizer.transform(tokenizer.transform(testX), "f32"))
90+
);
91+
92+
CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
93+
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);
94+
95+
console.log(new ClassificationReport(testY, predY));
96+
97+
console.log(testY, predY)

0 commit comments

Comments
 (0)