Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b0bc71c

Browse files
committedMay 16, 2017
Fixed bugs around recognizing choices:
- Emoji's weren't being recognized. - allowPartialMatches option wasn't being passed all the way down.
1 parent 4f8f376 commit b0bc71c

File tree

2 files changed

+61
-15
lines changed

2 files changed

+61
-15
lines changed
 

‎Node/core/lib/dialogs/PromptRecognizers.js

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Object.defineProperty(exports, "__esModule", { value: true });
33
var EntityRecognizer_1 = require("./EntityRecognizer");
44
var consts = require("../consts");
5-
var simpleTokenizer = /\w+/ig;
5+
var breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./";
66
var PromptRecognizers = (function () {
77
function PromptRecognizers() {
88
}
@@ -164,7 +164,7 @@ var PromptRecognizers = (function () {
164164
values.push(action.value);
165165
}
166166
}
167-
var match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values));
167+
var match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values, options));
168168
if (match) {
169169
entities.push({
170170
type: consts.Entities.Match,
@@ -214,12 +214,12 @@ var PromptRecognizers = (function () {
214214
options = options || {};
215215
var entities = [];
216216
var text = utterance.trim().toLowerCase();
217-
var tokens = matchAll(simpleTokenizer, text);
217+
var tokens = tokenize(text);
218218
var maxDistance = options.hasOwnProperty('maxTokenDistance') ? options.maxTokenDistance : 2;
219219
values.forEach(function (value, index) {
220220
if (typeof value === 'string') {
221221
var topScore = 0.0;
222-
var vTokens = matchAll(simpleTokenizer, value.trim().toLowerCase());
222+
var vTokens = tokenize(value.trim().toLowerCase());
223223
for (var i = 0; i < tokens.length; i++) {
224224
var score = matchValue(vTokens, i);
225225
if (score > topScore) {
@@ -278,3 +278,25 @@ function matchAll(exp, text) {
278278
}
279279
return matches;
280280
}
281+
function tokenize(text) {
282+
var tokens = [];
283+
if (text && text.length > 0) {
284+
var token = '';
285+
for (var i = 0; i < text.length; i++) {
286+
var chr = text[i];
287+
if (breakingChars.indexOf(chr) >= 0) {
288+
if (token.length > 0) {
289+
tokens.push(token);
290+
}
291+
token = '';
292+
}
293+
else {
294+
token += chr;
295+
}
296+
}
297+
if (token.length > 0) {
298+
tokens.push(token);
299+
}
300+
}
301+
return tokens;
302+
}

‎Node/core/src/dialogs/PromptRecognizers.ts

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ import { EntityRecognizer, IFindMatchResult } from './EntityRecognizer';
3636
import * as consts from '../consts';
3737
import * as chrono from 'chrono-node';
3838

39-
const simpleTokenizer = /\w+/ig;
39+
const breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./";
4040

4141
export type StringOrRegExp = string|RegExp;
4242

@@ -114,8 +114,8 @@ export class PromptRecognizers {
114114
// Ensure cached
115115
let key = namespace + ':' + expId;
116116
let entities: IEntity<string>[] = [];
117-
let locale = context.preferredLocale();
118-
let utterance = context.message.text ? context.message.text.trim() : '';
117+
const locale = context.preferredLocale();
118+
const utterance = context.message.text ? context.message.text.trim() : '';
119119
let cache = this.expCache[key];
120120
if (!cache) {
121121
this.expCache[key] = cache = {};
@@ -142,8 +142,8 @@ export class PromptRecognizers {
142142
// Ensure cached
143143
let key = namespace + ':' + listId;
144144
let entities: IEntity<string>[] = [];
145-
let locale = context.preferredLocale();
146-
let utterance = context.message.text ? context.message.text.trim() : '';
145+
const locale = context.preferredLocale();
146+
const utterance = context.message.text ? context.message.text.trim() : '';
147147
let cache = this.choiceCache[key];
148148
if (!cache) {
149149
this.expCache[key] = cache = {};
@@ -272,7 +272,7 @@ export class PromptRecognizers {
272272
options = options || {};
273273
let refData = options.refDate ? new Date(options.refDate) : null;
274274
let entities: IEntity<string>[] = [];
275-
let utterance = context.message.text ? context.message.text.trim() : '';
275+
const utterance = context.message.text ? context.message.text.trim() : '';
276276
let entity = EntityRecognizer.recognizeTime(utterance, refData);
277277
if (entity) {
278278
entity.score = PromptRecognizers.calculateScore(utterance, entity.entity);
@@ -302,7 +302,7 @@ export class PromptRecognizers {
302302
}
303303

304304
// Recognize matched values.
305-
let match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values));
305+
let match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values, options));
306306
if (match) {
307307
// Push the choice onto the list of matches.
308308
entities.push({
@@ -352,7 +352,7 @@ export class PromptRecognizers {
352352
let score = 0.0;
353353
if (matched > 0 && (matched == vTokens.length || options.allowPartialMatches)) {
354354
// Percentage of tokens matched. If matching "second last" in
355-
// "the second from the last one" the completness would be 1.0 since
355+
// "the second from the last one" the completeness would be 1.0 since
356356
// all tokens were found.
357357
let completeness = matched / vTokens.length;
358358

@@ -376,14 +376,14 @@ export class PromptRecognizers {
376376
options = options || {};
377377
let entities: IEntity<number>[] = [];
378378
let text = utterance.trim().toLowerCase();
379-
let tokens = matchAll(simpleTokenizer, text);
379+
let tokens = tokenize(text);
380380
let maxDistance = options.hasOwnProperty('maxTokenDistance') ? options.maxTokenDistance : 2;
381381
values.forEach((value, index) => {
382382
if (typeof value === 'string') {
383383
// To match "last one" in "the last time I chose the last one" we need
384384
// to recursively search the utterance starting from each token position.
385385
let topScore = 0.0;
386-
let vTokens = matchAll(simpleTokenizer, (<string>value).trim().toLowerCase());
386+
let vTokens = tokenize((<string>value).trim().toLowerCase());
387387
for (let i = 0; i < tokens.length; i++) {
388388
let score = matchValue(vTokens, i);
389389
if (score > topScore) {
@@ -439,4 +439,28 @@ function matchAll(exp: RegExp, text: string): string[] {
439439
matches.push(match[0]);
440440
}
441441
return matches;
442-
}
442+
}
443+
444+
/** Breaks a string of text into an array of tokens. */
445+
function tokenize(text: string): string[] {
446+
let tokens: string[] = [];
447+
if (text && text.length > 0) {
448+
let token = '';
449+
for (let i = 0; i < text.length; i++) {
450+
const chr = text[i];
451+
if (breakingChars.indexOf(chr) >= 0) {
452+
if (token.length > 0) {
453+
tokens.push(token);
454+
}
455+
token = '';
456+
} else {
457+
token += chr;
458+
}
459+
}
460+
if (token.length > 0) {
461+
tokens.push(token);
462+
}
463+
}
464+
return tokens;
465+
}
466+

0 commit comments

Comments
 (0)
Please sign in to comment.