File tree Expand file tree Collapse file tree
analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr
main/java/com/github/oeuvres/alix/lucene/analysis
test/java/com/github/oeuvres/alix/lucene/analysis Expand file tree Collapse file tree Original file line number Diff line number Diff line change 3636import org .apache .lucene .analysis .TokenStream ;
3737import org .apache .lucene .analysis .Tokenizer ;
3838
39- import com .github .oeuvres .alix .lucene .analysis .FilterAposHyphenFr ;
4039import com .github .oeuvres .alix .lucene .analysis .FilterCloud ;
4140import com .github .oeuvres .alix .lucene .analysis .FilterLemmatize ;
4241import com .github .oeuvres .alix .lucene .analysis .FilterLocution ;
@@ -67,7 +66,7 @@ public TokenStreamComponents createComponents(String field)
6766 // interpret html tags as token events like para or section
6867 ts = new MLFilter (ts );
6968 // fr split on ’ and -
70- ts = new FilterAposHyphenFr (ts );
69+ ts = new FrenchCliticSplitFilter (ts );
7170 // pos tagging before lemmatize
7271 ts = new PosTaggingFilter (ts );
7372 // provide lemma+pos
Original file line number Diff line number Diff line change 3737import org .apache .lucene .analysis .Tokenizer ;
3838import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter ;
3939
40- import com .github .oeuvres .alix .lucene .analysis .FilterAposHyphenFr ;
4140import com .github .oeuvres .alix .lucene .analysis .FilterFind ;
4241import com .github .oeuvres .alix .lucene .analysis .FilterLemmatize ;
4342import com .github .oeuvres .alix .lucene .analysis .MLFilter ;
@@ -64,7 +63,7 @@ public TokenStreamComponents createComponents(String field)
6463 final Tokenizer tokenizer = new MLTokenizer (); // segment words
6564 TokenStream ts = tokenizer ;
6665 ts = new MLFilter (ts ); // interpret tags
67- ts = new FilterAposHyphenFr (ts ); // fr split on ’ and -
66+ ts = new FrenchCliticSplitFilter (ts ); // fr split on ’ and -
6867 ts = new FilterLemmatize (ts ); // provide lemma+pos
6968 ts = new FilterFind (ts ); // orthographic form and lemma as term to index
7069 ts = new ASCIIFoldingFilter (ts ); // no accents
Original file line number Diff line number Diff line change 3737import org .apache .lucene .analysis .Tokenizer ;
3838import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter ;
3939
40- import com .github .oeuvres .alix .lucene .analysis .FilterAposHyphenFr ;
4140import com .github .oeuvres .alix .lucene .analysis .MLFilter ;
4241import com .github .oeuvres .alix .lucene .analysis .MLTokenizer ;
4342
@@ -61,7 +60,7 @@ protected TokenStreamComponents createComponents(String fieldName)
6160 final Tokenizer tokenizer = new MLTokenizer (); // segment words
6261 TokenStream ts = tokenizer ;
6362 ts = new MLFilter (ts ); // strip tags
64- ts = new FilterAposHyphenFr (ts ); // fr split on ’ and -
63+ ts = new FrenchCliticSplitFilter (ts ); // fr split on ’ and -
6564 ts = new ASCIIFoldingFilter (ts ); // no accents
6665 return new TokenStreamComponents (tokenizer , ts );
6766 }
Original file line number Diff line number Diff line change 3636import org .apache .lucene .analysis .TokenStream ;
3737import org .apache .lucene .analysis .Tokenizer ;
3838
39- import com .github .oeuvres .alix .lucene .analysis .FilterAposHyphenFr ;
4039import com .github .oeuvres .alix .lucene .analysis .FilterLemmatize ;
4140import com .github .oeuvres .alix .lucene .analysis .FilterLocution ;
4241import com .github .oeuvres .alix .lucene .analysis .FilterOrth ;
@@ -67,7 +66,7 @@ public TokenStreamComponents createComponents(String field)
6766 // interpret html tags as token events like para or section
6867 ts = new MLFilter (ts );
6968 // fr split on ’ and -
70- ts = new FilterAposHyphenFr (ts );
69+ ts = new FrenchCliticSplitFilter (ts );
7170 // provide lemma+pos
7271 ts = new FilterLemmatize (ts );
7372 // group compounds after lemmatization for verbal compounds
Original file line number Diff line number Diff line change 3636import org .apache .lucene .analysis .TokenStream ;
3737import org .apache .lucene .analysis .Tokenizer ;
3838
39- import com .github .oeuvres .alix .lucene .analysis .FilterAposHyphenFr ;
4039import com .github .oeuvres .alix .lucene .analysis .MLFilter ;
4140import com .github .oeuvres .alix .lucene .analysis .MLTokenizer ;
4241import com .github .oeuvres .alix .lucene .analysis .PosTaggingFilter ;
@@ -64,7 +63,7 @@ public TokenStreamComponents createComponents(String field)
6463 // interpret html tags as token events like para or section
6564 ts = new MLFilter (ts );
6665 // fr split on ’ and -
67- ts = new FilterAposHyphenFr (ts );
66+ ts = new FrenchCliticSplitFilter (ts );
6867 // pos tagging before lemmatize
6968 ts = new PosTaggingFilter (ts );
7069 // provide lemma+pos
Original file line number Diff line number Diff line change 3030 * See the License for the specific language governing permissions and
3131 * limitations under the License.
3232 */
33- package com .github .oeuvres .alix .lucene .analysis ;
33+ package com .github .oeuvres .alix .lucene .analysis . fr ;
3434
3535import java .io .IOException ;
3636
5353 *
5454 * Known side effect : qu’en-dira-t-on, donne-m’en, emmène-m’y.
5555 */
56- public class FilterAposHyphenFr extends TokenFilter
56+ public class FrenchCliticSplitFilter extends TokenFilter
5757{
5858 private static final int MAX_STEPS = 16 ;
5959
@@ -128,7 +128,7 @@ public class FilterAposHyphenFr extends TokenFilter
128128 SUFFIX .put ("-y" , "y" .toCharArray ()); // allons-y.
129129 }
130130
131- public FilterAposHyphenFr (TokenStream input ) {
131+ public FrenchCliticSplitFilter (TokenStream input ) {
132132 super (input );
133133 }
134134
Original file line number Diff line number Diff line change 1818
1919import static com .github .oeuvres .alix .common .Upos .*;
2020
21+ import com .github .oeuvres .alix .lucene .analysis .fr .FrenchCliticSplitFilter ;
2122import com .github .oeuvres .alix .util .Char ;
2223import com .github .oeuvres .alix .util .Dir ;
2324
@@ -31,7 +32,7 @@ public TokenStreamComponents createComponents(String field)
3132 {
3233 final Tokenizer tokenizer = new MLTokenizer ();
3334 TokenStream ts = tokenizer ;
34- ts = new FilterAposHyphenFr (tokenizer );
35+ ts = new FrenchCliticSplitFilter (tokenizer );
3536 return new TokenStreamComponents (tokenizer , ts );
3637 }
3738
Original file line number Diff line number Diff line change 1212import org .junit .jupiter .api .Test ;
1313
1414import com .github .oeuvres .alix .common .Upos ;
15+ import com .github .oeuvres .alix .lucene .analysis .fr .FrenchCliticSplitFilter ;
1516
1617
1718public class FilterAposHyphenFrTest
@@ -52,7 +53,7 @@ public TokenStreamComponents createComponents(String field)
5253 {
5354 final Tokenizer tokenizer = new MLTokenizer ();
5455 TokenStream ts = tokenizer ;
55- ts = new FilterAposHyphenFr (tokenizer );
56+ ts = new FrenchCliticSplitFilter (tokenizer );
5657 return new TokenStreamComponents (tokenizer , ts );
5758 }
5859
You can’t perform that action at this time.
0 commit comments