@@ -36,7 +36,7 @@ import { EntityRecognizer, IFindMatchResult } from './EntityRecognizer';
36
36
import * as consts from '../consts' ;
37
37
import * as chrono from 'chrono-node' ;
38
38
39
- const simpleTokenizer = / \w + / ig ;
39
+ const breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./" ;
40
40
41
41
export type StringOrRegExp = string | RegExp ;
42
42
@@ -114,8 +114,8 @@ export class PromptRecognizers {
114
114
// Ensure cached
115
115
let key = namespace + ':' + expId ;
116
116
let entities : IEntity < string > [ ] = [ ] ;
117
- let locale = context . preferredLocale ( ) ;
118
- let utterance = context . message . text ? context . message . text . trim ( ) : '' ;
117
+ const locale = context . preferredLocale ( ) ;
118
+ const utterance = context . message . text ? context . message . text . trim ( ) : '' ;
119
119
let cache = this . expCache [ key ] ;
120
120
if ( ! cache ) {
121
121
this . expCache [ key ] = cache = { } ;
@@ -142,8 +142,8 @@ export class PromptRecognizers {
142
142
// Ensure cached
143
143
let key = namespace + ':' + listId ;
144
144
let entities : IEntity < string > [ ] = [ ] ;
145
- let locale = context . preferredLocale ( ) ;
146
- let utterance = context . message . text ? context . message . text . trim ( ) : '' ;
145
+ const locale = context . preferredLocale ( ) ;
146
+ const utterance = context . message . text ? context . message . text . trim ( ) : '' ;
147
147
let cache = this . choiceCache [ key ] ;
148
148
if ( ! cache ) {
149
149
this . expCache [ key ] = cache = { } ;
@@ -272,7 +272,7 @@ export class PromptRecognizers {
272
272
options = options || { } ;
273
273
let refData = options . refDate ? new Date ( options . refDate ) : null ;
274
274
let entities : IEntity < string > [ ] = [ ] ;
275
- let utterance = context . message . text ? context . message . text . trim ( ) : '' ;
275
+ const utterance = context . message . text ? context . message . text . trim ( ) : '' ;
276
276
let entity = EntityRecognizer . recognizeTime ( utterance , refData ) ;
277
277
if ( entity ) {
278
278
entity . score = PromptRecognizers . calculateScore ( utterance , entity . entity ) ;
@@ -302,7 +302,7 @@ export class PromptRecognizers {
302
302
}
303
303
304
304
// Recognize matched values.
305
- let match = PromptRecognizers . findTopEntity ( PromptRecognizers . recognizeValues ( utterance , values ) ) ;
305
+ let match = PromptRecognizers . findTopEntity ( PromptRecognizers . recognizeValues ( utterance , values , options ) ) ;
306
306
if ( match ) {
307
307
// Push the choice onto the list of matches.
308
308
entities . push ( {
@@ -352,7 +352,7 @@ export class PromptRecognizers {
352
352
let score = 0.0 ;
353
353
if ( matched > 0 && ( matched == vTokens . length || options . allowPartialMatches ) ) {
354
354
// Percentage of tokens matched. If matching "second last" in
355
- // "the second from the last one" the completness would be 1.0 since
355
+ // "the second from the last one" the completeness would be 1.0 since
356
356
// all tokens were found.
357
357
let completeness = matched / vTokens . length ;
358
358
@@ -376,14 +376,14 @@ export class PromptRecognizers {
376
376
options = options || { } ;
377
377
let entities : IEntity < number > [ ] = [ ] ;
378
378
let text = utterance . trim ( ) . toLowerCase ( ) ;
379
- let tokens = matchAll ( simpleTokenizer , text ) ;
379
+ let tokens = tokenize ( text ) ;
380
380
let maxDistance = options . hasOwnProperty ( 'maxTokenDistance' ) ? options . maxTokenDistance : 2 ;
381
381
values . forEach ( ( value , index ) => {
382
382
if ( typeof value === 'string' ) {
383
383
// To match "last one" in "the last time I chose the last one" we need
384
384
// to recursively search the utterance starting from each token position.
385
385
let topScore = 0.0 ;
386
- let vTokens = matchAll ( simpleTokenizer , ( < string > value ) . trim ( ) . toLowerCase ( ) ) ;
386
+ let vTokens = tokenize ( ( < string > value ) . trim ( ) . toLowerCase ( ) ) ;
387
387
for ( let i = 0 ; i < tokens . length ; i ++ ) {
388
388
let score = matchValue ( vTokens , i ) ;
389
389
if ( score > topScore ) {
@@ -439,4 +439,28 @@ function matchAll(exp: RegExp, text: string): string[] {
439
439
matches . push ( match [ 0 ] ) ;
440
440
}
441
441
return matches ;
442
- }
442
+ }
443
+
444
+ /** Breaks a string of text into an array of tokens. */
445
+ function tokenize ( text : string ) : string [ ] {
446
+ let tokens : string [ ] = [ ] ;
447
+ if ( text && text . length > 0 ) {
448
+ let token = '' ;
449
+ for ( let i = 0 ; i < text . length ; i ++ ) {
450
+ const chr = text [ i ] ;
451
+ if ( breakingChars . indexOf ( chr ) >= 0 ) {
452
+ if ( token . length > 0 ) {
453
+ tokens . push ( token ) ;
454
+ }
455
+ token = '' ;
456
+ } else {
457
+ token += chr ;
458
+ }
459
+ }
460
+ if ( token . length > 0 ) {
461
+ tokens . push ( token ) ;
462
+ }
463
+ }
464
+ return tokens ;
465
+ }
466
+
0 commit comments