Skip to content

Commit 127940c

Browse files
committed
Better name for l’amer-tue
1 parent c779646 commit 127940c

8 files changed

Lines changed: 12 additions & 15 deletions

File tree

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/AnalyzerCloud.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import org.apache.lucene.analysis.TokenStream;
3737
import org.apache.lucene.analysis.Tokenizer;
3838

39-
import com.github.oeuvres.alix.lucene.analysis.FilterAposHyphenFr;
4039
import com.github.oeuvres.alix.lucene.analysis.FilterCloud;
4140
import com.github.oeuvres.alix.lucene.analysis.FilterLemmatize;
4241
import com.github.oeuvres.alix.lucene.analysis.FilterLocution;
@@ -67,7 +66,7 @@ public TokenStreamComponents createComponents(String field)
6766
// interpret html tags as token events like para or section
6867
ts = new MLFilter(ts);
6968
// fr split on ’ and -
70-
ts = new FilterAposHyphenFr(ts);
69+
ts = new FrenchCliticSplitFilter(ts);
7170
// pos tagging before lemmatize
7271
ts = new PosTaggingFilter(ts);
7372
// provide lemma+pos

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/AnalyzerFind.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.apache.lucene.analysis.Tokenizer;
3838
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
3939

40-
import com.github.oeuvres.alix.lucene.analysis.FilterAposHyphenFr;
4140
import com.github.oeuvres.alix.lucene.analysis.FilterFind;
4241
import com.github.oeuvres.alix.lucene.analysis.FilterLemmatize;
4342
import com.github.oeuvres.alix.lucene.analysis.MLFilter;
@@ -64,7 +63,7 @@ public TokenStreamComponents createComponents(String field)
6463
final Tokenizer tokenizer = new MLTokenizer(); // segment words
6564
TokenStream ts = tokenizer;
6665
ts = new MLFilter(ts); // interpret tags
67-
ts = new FilterAposHyphenFr(ts); // fr split on ’ and -
66+
ts = new FrenchCliticSplitFilter(ts); // fr split on ’ and -
6867
ts = new FilterLemmatize(ts); // provide lemma+pos
6968
ts = new FilterFind(ts); // orthographic form and lemma as term to index
7069
ts = new ASCIIFoldingFilter(ts); // no accents

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/AnalyzerMeta.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.apache.lucene.analysis.Tokenizer;
3838
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
3939

40-
import com.github.oeuvres.alix.lucene.analysis.FilterAposHyphenFr;
4140
import com.github.oeuvres.alix.lucene.analysis.MLFilter;
4241
import com.github.oeuvres.alix.lucene.analysis.MLTokenizer;
4342

@@ -61,7 +60,7 @@ protected TokenStreamComponents createComponents(String fieldName)
6160
final Tokenizer tokenizer = new MLTokenizer(); // segment words
6261
TokenStream ts = tokenizer;
6362
ts = new MLFilter(ts); // strip tags
64-
ts = new FilterAposHyphenFr(ts); // fr split on ’ and -
63+
ts = new FrenchCliticSplitFilter(ts); // fr split on ’ and -
6564
ts = new ASCIIFoldingFilter(ts); // no accents
6665
return new TokenStreamComponents(tokenizer, ts);
6766
}

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/AnalyzerOrth.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import org.apache.lucene.analysis.TokenStream;
3737
import org.apache.lucene.analysis.Tokenizer;
3838

39-
import com.github.oeuvres.alix.lucene.analysis.FilterAposHyphenFr;
4039
import com.github.oeuvres.alix.lucene.analysis.FilterLemmatize;
4140
import com.github.oeuvres.alix.lucene.analysis.FilterLocution;
4241
import com.github.oeuvres.alix.lucene.analysis.FilterOrth;
@@ -67,7 +66,7 @@ public TokenStreamComponents createComponents(String field)
6766
// interpret html tags as token events like para or section
6867
ts = new MLFilter(ts);
6968
// fr split on ’ and -
70-
ts = new FilterAposHyphenFr(ts);
69+
ts = new FrenchCliticSplitFilter(ts);
7170
// provide lemma+pos
7271
ts = new FilterLemmatize(ts);
7372
// group compounds after lemmatization for verbal compounds

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/AnalyzerPos.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import org.apache.lucene.analysis.TokenStream;
3737
import org.apache.lucene.analysis.Tokenizer;
3838

39-
import com.github.oeuvres.alix.lucene.analysis.FilterAposHyphenFr;
4039
import com.github.oeuvres.alix.lucene.analysis.MLFilter;
4140
import com.github.oeuvres.alix.lucene.analysis.MLTokenizer;
4241
import com.github.oeuvres.alix.lucene.analysis.PosTaggingFilter;
@@ -64,7 +63,7 @@ public TokenStreamComponents createComponents(String field)
6463
// interpret html tags as token events like para or section
6564
ts = new MLFilter(ts);
6665
// fr split on ’ and -
67-
ts = new FilterAposHyphenFr(ts);
66+
ts = new FrenchCliticSplitFilter(ts);
6867
// pos tagging before lemmatize
6968
ts = new PosTaggingFilter(ts);
7069
// provide lemma+pos

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/FilterAposHyphenFr.java renamed to analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/FrenchCliticSplitFilter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
* See the License for the specific language governing permissions and
3131
* limitations under the License.
3232
*/
33-
package com.github.oeuvres.alix.lucene.analysis;
33+
package com.github.oeuvres.alix.lucene.analysis.fr;
3434

3535
import java.io.IOException;
3636

@@ -53,7 +53,7 @@
5353
*
5454
* Known side effect : qu’en-dira-t-on, donne-m’en, emmène-m’y.
5555
*/
56-
public class FilterAposHyphenFr extends TokenFilter
56+
public class FrenchCliticSplitFilter extends TokenFilter
5757
{
5858
private static final int MAX_STEPS = 16;
5959

@@ -128,7 +128,7 @@ public class FilterAposHyphenFr extends TokenFilter
128128
SUFFIX.put("-y", "y".toCharArray()); // allons-y.
129129
}
130130

131-
public FilterAposHyphenFr(TokenStream input) {
131+
public FrenchCliticSplitFilter(TokenStream input) {
132132
super(input);
133133
}
134134

test/src/main/java/com/github/oeuvres/alix/lucene/analysis/TokenizerTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import static com.github.oeuvres.alix.common.Upos.*;
2020

21+
import com.github.oeuvres.alix.lucene.analysis.fr.FrenchCliticSplitFilter;
2122
import com.github.oeuvres.alix.util.Char;
2223
import com.github.oeuvres.alix.util.Dir;
2324

@@ -31,7 +32,7 @@ public TokenStreamComponents createComponents(String field)
3132
{
3233
final Tokenizer tokenizer = new MLTokenizer();
3334
TokenStream ts = tokenizer;
34-
ts = new FilterAposHyphenFr(tokenizer);
35+
ts = new FrenchCliticSplitFilter(tokenizer);
3536
return new TokenStreamComponents(tokenizer, ts);
3637
}
3738

test/src/test/java/com/github/oeuvres/alix/lucene/analysis/FilterAposHyphenFrTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.junit.jupiter.api.Test;
1313

1414
import com.github.oeuvres.alix.common.Upos;
15+
import com.github.oeuvres.alix.lucene.analysis.fr.FrenchCliticSplitFilter;
1516

1617

1718
public class FilterAposHyphenFrTest
@@ -52,7 +53,7 @@ public TokenStreamComponents createComponents(String field)
5253
{
5354
final Tokenizer tokenizer = new MLTokenizer();
5455
TokenStream ts = tokenizer;
55-
ts = new FilterAposHyphenFr(tokenizer);
56+
ts = new FrenchCliticSplitFilter(tokenizer);
5657
return new TokenStreamComponents(tokenizer, ts);
5758
}
5859

0 commit comments

Comments
 (0)