Fixed bugs around recognizing choices:

Stevenic · Stevenic · commit b0bc71c487cb · 2017-05-16T16:16:30.000-07:00
- Emoji's weren't being recognized.
- allowPartialMatches option wasn't being passed all the way down.
diff --git a/Node/core/lib/dialogs/PromptRecognizers.js b/Node/core/lib/dialogs/PromptRecognizers.js
@@ -2,7 +2,7 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 var EntityRecognizer_1 = require("./EntityRecognizer");
 var consts = require("../consts");
-var simpleTokenizer = /\w+/ig;
+var breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./";
 var PromptRecognizers = (function () {
     function PromptRecognizers() {
     }
@@ -164,7 +164,7 @@ var PromptRecognizers = (function () {
                     values.push(action.value);
                 }
             }
-            var match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values));
+            var match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values, options));
             if (match) {
                 entities.push({
                     type: consts.Entities.Match,
@@ -214,12 +214,12 @@ var PromptRecognizers = (function () {
         options = options || {};
         var entities = [];
         var text = utterance.trim().toLowerCase();
-        var tokens = matchAll(simpleTokenizer, text);
+        var tokens = tokenize(text);
         var maxDistance = options.hasOwnProperty('maxTokenDistance') ? options.maxTokenDistance : 2;
         values.forEach(function (value, index) {
             if (typeof value === 'string') {
                 var topScore = 0.0;
-                var vTokens = matchAll(simpleTokenizer, value.trim().toLowerCase());
+                var vTokens = tokenize(value.trim().toLowerCase());
                 for (var i = 0; i < tokens.length; i++) {
                     var score = matchValue(vTokens, i);
                     if (score > topScore) {
@@ -278,3 +278,25 @@ function matchAll(exp, text) {
     }
     return matches;
 }
+function tokenize(text) {
+    var tokens = [];
+    if (text && text.length > 0) {
+        var token = '';
+        for (var i = 0; i < text.length; i++) {
+            var chr = text[i];
+            if (breakingChars.indexOf(chr) >= 0) {
+                if (token.length > 0) {
+                    tokens.push(token);
+                }
+                token = '';
+            }
+            else {
+                token += chr;
+            }
+        }
+        if (token.length > 0) {
+            tokens.push(token);
+        }
+    }
+    return tokens;
+}
diff --git a/Node/core/src/dialogs/PromptRecognizers.ts b/Node/core/src/dialogs/PromptRecognizers.ts
@@ -36,7 +36,7 @@ import { EntityRecognizer, IFindMatchResult } from './EntityRecognizer';
 import * as consts from '../consts';
 import * as chrono from 'chrono-node';
 
-const simpleTokenizer = /\w+/ig;
+const breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./";
 
 export type StringOrRegExp = string|RegExp;
 
@@ -114,8 +114,8 @@ export class PromptRecognizers {
         // Ensure cached
         let key = namespace + ':' + expId;
         let entities: IEntity<string>[] = [];
-        let locale = context.preferredLocale();
-        let utterance = context.message.text ? context.message.text.trim() : '';
+        const locale = context.preferredLocale();
+        const utterance = context.message.text ? context.message.text.trim() : '';
         let cache = this.expCache[key];
         if (!cache) {
             this.expCache[key] = cache = {};
@@ -142,8 +142,8 @@ export class PromptRecognizers {
         // Ensure cached
         let key = namespace + ':' + listId;
         let entities: IEntity<string>[] = [];
-        let locale = context.preferredLocale();
-        let utterance = context.message.text ? context.message.text.trim() : '';
+        const locale = context.preferredLocale();
+        const utterance = context.message.text ? context.message.text.trim() : '';
         let cache = this.choiceCache[key];
         if (!cache) {
             this.expCache[key] = cache = {};
@@ -272,7 +272,7 @@ export class PromptRecognizers {
         options = options || {};
         let refData = options.refDate ? new Date(options.refDate) : null;
         let entities: IEntity<string>[] = [];
-        let utterance = context.message.text ? context.message.text.trim() : '';
+        const utterance = context.message.text ? context.message.text.trim() : '';
         let entity = EntityRecognizer.recognizeTime(utterance, refData);
         if (entity) {
             entity.score = PromptRecognizers.calculateScore(utterance, entity.entity);
@@ -302,7 +302,7 @@ export class PromptRecognizers {
             }
 
             // Recognize matched values.
-            let match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values));
+            let match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values, options));
             if (match) {
                 // Push the choice onto the list of matches. 
                 entities.push({
@@ -352,7 +352,7 @@ export class PromptRecognizers {
             let score = 0.0;
             if (matched > 0 && (matched == vTokens.length || options.allowPartialMatches)) {
                 // Percentage of tokens matched. If matching "second last" in 
-                // "the second from the last one" the completness would be 1.0 since
+                // "the second from the last one" the completeness would be 1.0 since
                 // all tokens were found.
                 let completeness = matched / vTokens.length;
 
@@ -376,14 +376,14 @@ export class PromptRecognizers {
         options = options || {};
         let entities: IEntity<number>[] = [];
         let text = utterance.trim().toLowerCase();
-        let tokens = matchAll(simpleTokenizer, text);
+        let tokens = tokenize(text);
         let maxDistance = options.hasOwnProperty('maxTokenDistance') ? options.maxTokenDistance : 2;
         values.forEach((value, index) => {
             if (typeof value === 'string') {
                 // To match "last one" in "the last time I chose the last one" we need 
                 // to recursively search the utterance starting from each token position.
                 let topScore = 0.0;
-                let vTokens = matchAll(simpleTokenizer, (<string>value).trim().toLowerCase());
+                let vTokens = tokenize((<string>value).trim().toLowerCase());
                 for (let i = 0; i < tokens.length; i++) {
                     let score = matchValue(vTokens, i);
                     if (score > topScore) {
@@ -439,4 +439,28 @@ function matchAll(exp: RegExp, text: string): string[] {
         matches.push(match[0]);
     }
     return matches;
-}
+}
+
+/** Breaks a string of text into an array of tokens. */
+function tokenize(text: string): string[] {
+    let tokens: string[] = [];
+    if (text && text.length > 0) {
+        let token = '';
+        for (let i = 0; i < text.length; i++) {
+            const chr = text[i];
+            if (breakingChars.indexOf(chr) >= 0) {
+                if (token.length > 0) {
+                    tokens.push(token);
+                }
+                token = '';
+            } else {
+                token += chr;
+            }
+        }
+        if (token.length > 0) {
+            tokens.push(token);
+        }
+    }
+    return tokens;
+}
+