2525 " ^" ,
2626 " &" ,
2727 " (" ,
28- " )"
28+ " )"
2929 ]
3030 }
3131 },
32- "filter" : {
33- "english_stemmer" : {
34- "type" : " stemmer" ,
35- "language" : " german"
36- },
37- "german_stemmer" : {
38- "type" : " stemmer" ,
39- "language" : " german"
40- },
41- "french_stemmer" : {
42- "type" : " stemmer" ,
43- "language" : " german"
44- }
45- },
32+ "filter" : {
33+ "english_stemmer" : {
34+ "type" : " stemmer" ,
35+ "language" : " german"
36+ },
37+ "german_stemmer" : {
38+ "type" : " stemmer" ,
39+ "language" : " german"
40+ },
41+ "french_stemmer" : {
42+ "type" : " stemmer" ,
43+ "language" : " german"
44+ }
45+ },
4646 "char_filter" : {
47+ "whitespaces_compressor" : {
48+ "type" : " pattern_replace" ,
49+ "pattern" : " \\ s+" ,
50+ "replacement" : " "
51+ },
52+ "transcription_special_signs_filter" : {
53+ "type" : " pattern_replace" ,
54+ "pattern" : " [\\ *]" ,
55+ "replacement" : " "
56+ },
4757 "transcription_brackets_filter" : {
4858 "type" : " pattern_replace" ,
4959 "pattern" : " [\\ [\\ ]\\ (\\ )?\\ u2e2e\\ u2e22\\ u2e23\\ u2329\\ u232a]|\\ {\\ S*\\ }" ,
5262 "transcription_suffix_filter" : {
5363 "type" : " mapping" ,
5464 "mappings" : [
65+ " .t.pl => .wt" ,
66+ " .t:pl => .wt" ,
67+ " .tpl => .wt" ,
68+ " t.du => .tj" ,
69+ " t:du => .tj" ,
70+ " .tdu => .tj" ,
5571 " ,t,pl => ,wt" ,
5672 " ,tpl => ,wt" ,
5773 " t,du => ,tj" ,
6076 " pl => w" ,
6177 " , => ."
6278 ]
79+ },
80+ "transcription_unicode_normalizer" : {
81+ "type" : " mapping" ,
82+ "mappings" : [
83+ " h\\ u0331 => \\ u1e96" ,
84+ " H\\ u0331 => \\ u1e96"
85+ ]
86+ },
87+ "transcription_unicode_workaround" : {
88+ "type" : " mapping" ,
89+ "mappings" : [
90+ " i\\ u032f => i" ,
91+ " u\\ u032f => u" ,
92+ " \\ u0131\\ u0357 => \\ ua7bd" ,
93+ " I\\ u0357 => \\ ua7bd" ,
94+ " h\\ u032d => \\ u0125" ,
95+ " H\\ u032d => \\ u0125"
96+ ]
6397 }
6498 },
6599 "analyzer" : {
66100 "transcription_analyzer" : {
67101 "type" : " custom" ,
68- "tokenizer" : " whitespace" ,
102+ "tokenizer" : " keyword" ,
103+ "filter" : [
104+ " lowercase"
105+ ],
106+ "char_filter" : [
107+ " whitespaces_compressor" ,
108+ " transcription_unicode_normalizer" ,
109+ " transcription_unicode_workaround" ,
110+ " transcription_special_signs_filter" ,
111+ " transcription_brackets_filter" ,
112+ " transcription_suffix_filter"
113+ ]
114+ },
115+ "mdc_analyzer" : {
116+ "type" : " custom" ,
117+ "tokenizer" : " keyword" ,
69118 "char_filter" : [
119+ " whitespaces_compressor" ,
120+ " transcription_special_signs_filter" ,
70121 " transcription_brackets_filter" ,
71122 " transcription_suffix_filter"
72123 ]
75126 "type" : " custom" ,
76127 "tokenizer" : " hieroglyph_tokenizer"
77128 },
78- "english_without_stopwords" : {
79- "type" :" custom" ,
80- "tokenizer" : " standard" ,
81- "filter" : [
82- " lowercase" ,
83- " english_stemmer"
84- ]
85- },
86- "german_without_stopwords" : {
87- "type" :" custom" ,
88- "tokenizer" : " standard" ,
89- "filter" : [
90- " lowercase" ,
91- " german_stemmer"
92- ]
93- },
94- "french_without_stopwords" : {
95- "type" :" custom" ,
96- "tokenizer" : " standard" ,
97- "filter" : [
98- " lowercase" ,
99- " french_stemmer"
100- ]
101- }
129+ "english_without_stopwords" : {
130+ "type" :" custom" ,
131+ "tokenizer" : " standard" ,
132+ "filter" : [
133+ " lowercase" ,
134+ " english_stemmer"
135+ ]
136+ },
137+ "german_without_stopwords" : {
138+ "type" :" custom" ,
139+ "tokenizer" : " standard" ,
140+ "filter" : [
141+ " lowercase" ,
142+ " german_stemmer"
143+ ]
144+ },
145+ "french_without_stopwords" : {
146+ "type" :" custom" ,
147+ "tokenizer" : " standard" ,
148+ "filter" : [
149+ " lowercase" ,
150+ " french_stemmer"
151+ ]
152+ }
102153 }
103154 }
104155 }
105- }
156+ }
157+
0 commit comments