Skip to content

Commit b654318

Browse files
authored
Merge pull request #3738 from rbayet/feat_analysis_tweaks
[Core] Analysis: introducing constant and token filters for special characters handling
2 parents 597705e + cf42f42 commit b654318

File tree

2 files changed

+17
-0
lines changed

2 files changed

+17
-0
lines changed

src/module-elasticsuite-core/Api/Index/Mapping/FieldInterface.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,17 @@ interface FieldInterface
5252
const ANALYZER_REFERENCE = 'reference';
5353
const ANALYZER_EDGE_NGRAM = 'standard_edge_ngram';
5454

55+
/**
56+
* Token filter types declarations.
57+
*/
58+
const TOKEN_FILTER_TYPE_WORD_DELIMITER = 'word_delimiter_graph';
59+
const TOKEN_FILTER_TYPE_REMOVE_DUPLICATES = 'remove_duplicates';
60+
61+
/**
62+
* Tokenizer declarations.
63+
*/
64+
const TOKENIZER_WHITESPACE = 'whitespace';
65+
5566
/**
5667
* Field filter logical operators.
5768
*/

src/module-elasticsuite-core/etc/elasticsuite_analysis.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
<length>8192</length>
3030
</filter>
3131
<filter name="lowercase" type="lowercase" language="default"/>
32+
<filter name="word_delimiter_before" type="trim" language="default" />
3233
<filter name="word_delimiter" type="word_delimiter_graph" language="default">
3334
<generate_word_parts>true</generate_word_parts>
3435
<generate_number_parts>true</generate_number_parts>
@@ -39,6 +40,7 @@
3940
<split_on_numerics>true</split_on_numerics>
4041
<preserve_original>true</preserve_original>
4142
</filter>
43+
<filter name="remove_duplicates" type="remove_duplicates" language="default" />
4244
<filter name="shingle" type="shingle" language="default">
4345
<min_shingle_size>2</min_shingle_size>
4446
<max_shingle_size>4</max_shingle_size>
@@ -216,6 +218,7 @@
216218
<filter ref="ascii_folding" />
217219
<filter ref="trim" />
218220
<filter ref="elision" />
221+
<filter ref="word_delimiter_before" />
219222
<filter ref="word_delimiter" />
220223
<filter ref="lowercase" />
221224
<filter ref="stemmer_before" />
@@ -232,6 +235,7 @@
232235
<filter ref="ascii_folding" />
233236
<filter ref="trim" />
234237
<filter ref="elision" />
238+
<filter ref="word_delimiter_before" />
235239
<filter ref="word_delimiter" />
236240
<filter ref="lowercase" />
237241
</filters>
@@ -260,6 +264,7 @@
260264
<filter ref="ascii_folding" />
261265
<filter ref="trim" />
262266
<filter ref="elision" />
267+
<filter ref="word_delimiter_before" />
263268
<filter ref="word_delimiter" />
264269
<filter ref="lowercase" />
265270
<filter ref="stemmer_before" />
@@ -300,6 +305,7 @@
300305
<filter ref="ascii_folding" />
301306
<filter ref="trim" />
302307
<filter ref="elision" />
308+
<filter ref="word_delimiter_before" />
303309
<filter ref="word_delimiter" />
304310
<filter ref="lowercase" />
305311
<filter ref="stemmer_before" />

0 commit comments

Comments
 (0)