Skip to content

Commit 40ac1ba

Browse files
committed
Some API change, go...
1 parent 0a16f93 commit 40ac1ba

14 files changed

Lines changed: 303 additions & 431 deletions

File tree

analysis/src/java/com/github/oeuvres/alix/ingest/AlixSaxHandler.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,10 @@ public void endElement(String uri, String localName, String qName) throws SAXExc
265265
@Override
266266
public void characters(char[] ch, int start, int length) throws SAXException
267267
{
268-
if (fieldMode == FieldMode.NONE)
269-
return;
270-
271268
switch (fieldMode) {
269+
case NONE -> {
270+
return;
271+
}
272272
case SCALAR, DERIVED -> {
273273
if (!isAllWhitespace(ch, start, length)) {
274274
throw new SAXException(

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/FilterLocution.java

Lines changed: 0 additions & 299 deletions
This file was deleted.

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/MweFilter.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ public final class MweFilter extends TokenFilter
8585

8686
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
8787
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
88-
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
8988
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
9089

9190
private boolean inputExhausted = false;

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/TermReplaceFilter.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
import org.apache.lucene.analysis.TokenStream;
4242
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
4343

44+
import com.github.oeuvres.alix.util.CharsMap;
45+
4446
/**
4547
* Rewrite tokens by applying an exact, dictionary-based term mapping.
4648
*
@@ -75,7 +77,7 @@ public final class TermReplaceFilter extends TokenFilter {
7577
* Term rewrite table. Keys are matched against the current token term; values are copied
7678
* into the {@link CharTermAttribute} when a match is found.
7779
*/
78-
private final CharArrayMap<char[]> map;
80+
private final CharsMap map;
7981

8082
/** The current token term. */
8183
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -86,7 +88,7 @@ public final class TermReplaceFilter extends TokenFilter {
8688
* @param input the upstream {@link TokenStream} (tokenizer or previous filter)
8789
* @param map the rewrite table mapping surface forms to replacement forms
8890
*/
89-
public TermReplaceFilter(final TokenStream input, final CharArrayMap<char[]> map) {
91+
public TermReplaceFilter(final TokenStream input, final CharsMap map) {
9092
super(input);
9193
this.map = map;
9294
}
@@ -100,10 +102,13 @@ public TermReplaceFilter(final TokenStream input, final CharArrayMap<char[]> map
100102
@Override
101103
public boolean incrementToken() throws IOException {
102104
if (!input.incrementToken()) return false;
103-
104-
final char[] replacement = map.get(termAtt.buffer(), 0, termAtt.length());
105-
if (replacement != null) {
106-
termAtt.copyBuffer(replacement, 0, replacement.length);
105+
106+
int vOrd = map.valueOrd(termAtt.buffer(), 0, termAtt.length());
107+
if (vOrd >= 0) {
108+
int vLen = map.len(vOrd);
109+
char[] dst = termAtt.resizeBuffer(vLen);
110+
map.copy(vOrd, dst, 0);
111+
termAtt.setLength(vLen);
107112
}
108113
return true;
109114
}

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/FrenchAnalyzer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public class FrenchAnalyzer extends DelegatingAnalyzerWrapper
7979
/** Words with ending dots */
8080
public final CharArraySet brevidots;
8181
/** Term normalizer */
82-
public final CharArrayMap<char[]> normalizer;
82+
public final CharsMap normalizer;
8383
/** Multi-Word Expressions */
8484
public final MweLexicon expressions;
8585
/** Big dic */

analysis/src/java/com/github/oeuvres/alix/lucene/analysis/fr/FrenchLexicons.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
import com.github.oeuvres.alix.lucene.analysis.LexiconHelper;
4242
import com.github.oeuvres.alix.lucene.analysis.LexiconHelper.PosResolver;
43+
import com.github.oeuvres.alix.util.CharsMap;
4344
import com.github.oeuvres.alix.util.LemmaLexicon;
4445
import com.github.oeuvres.alix.util.MweLexicon;
4546
import com.github.oeuvres.alix.util.WordTokenizer;
@@ -135,9 +136,9 @@ public static MweLexicon buildMweLexicon()
135136
}
136137

137138

138-
public static CharArrayMap<char[]> buildNormalizer()
139+
public static CharsMap buildNormalizer()
139140
{
140-
CharArrayMap<char[]> map = new CharArrayMap<char[]>(2000, false);
141+
CharsMap map = new CharsMap(2000);
141142
LexiconHelper.loadMap(map, LexiconHelper.class, "/com/github/oeuvres/alix/fr/norm-1990-classical.csv", LexiconHelper.OnDuplicate.REPLACE);
142143
LexiconHelper.loadMap(map, LexiconHelper.class, "/com/github/oeuvres/alix/fr/norm-aeoe.csv", LexiconHelper.OnDuplicate.REPLACE);
143144
LexiconHelper.loadMap(map, LexiconHelper.class, "/com/github/oeuvres/alix/fr/norm-maj-noacc.csv", LexiconHelper.OnDuplicate.REPLACE);

0 commit comments

Comments
 (0)