Skip to content

Commit 7042b4a

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents d02cbaa + aa2e60f commit 7042b4a

File tree

10 files changed

+172
-15
lines changed

10 files changed

+172
-15
lines changed

opennlp-distr/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<parent>
2525
<groupId>org.apache.opennlp</groupId>
2626
<artifactId>opennlp</artifactId>
27-
<version>2.5.4-SNAPSHOT</version>
27+
<version>2.5.5-SNAPSHOT</version>
2828
<relativePath>../pom.xml</relativePath>
2929
</parent>
3030

opennlp-dl-gpu/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<parent>
2525
<groupId>org.apache.opennlp</groupId>
2626
<artifactId>opennlp</artifactId>
27-
<version>2.5.4-SNAPSHOT</version>
27+
<version>2.5.5-SNAPSHOT</version>
2828
<relativePath>../pom.xml</relativePath>
2929
</parent>
3030
<groupId>org.apache.opennlp</groupId>

opennlp-dl/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<parent>
2525
<groupId>org.apache.opennlp</groupId>
2626
<artifactId>opennlp</artifactId>
27-
<version>2.5.4-SNAPSHOT</version>
27+
<version>2.5.5-SNAPSHOT</version>
2828
<relativePath>../pom.xml</relativePath>
2929
</parent>
3030
<groupId>org.apache.opennlp</groupId>

opennlp-docs/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<parent>
2525
<groupId>org.apache.opennlp</groupId>
2626
<artifactId>opennlp</artifactId>
27-
<version>2.5.4-SNAPSHOT</version>
27+
<version>2.5.5-SNAPSHOT</version>
2828
<relativePath>../pom.xml</relativePath>
2929
</parent>
3030

opennlp-morfologik-addon/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<parent>
2525
<groupId>org.apache.opennlp</groupId>
2626
<artifactId>opennlp</artifactId>
27-
<version>2.5.4-SNAPSHOT</version>
27+
<version>2.5.5-SNAPSHOT</version>
2828
<relativePath>../pom.xml</relativePath>
2929
</parent>
3030

opennlp-tools-models/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<parent>
2525
<groupId>org.apache.opennlp</groupId>
2626
<artifactId>opennlp</artifactId>
27-
<version>2.5.4-SNAPSHOT</version>
27+
<version>2.5.5-SNAPSHOT</version>
2828
</parent>
2929

3030
<artifactId>opennlp-tools-models</artifactId>

opennlp-tools/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
<parent>
2626
<groupId>org.apache.opennlp</groupId>
2727
<artifactId>opennlp</artifactId>
28-
<version>2.5.4-SNAPSHOT</version>
28+
<version>2.5.5-SNAPSHOT</version>
2929
<relativePath>../pom.xml</relativePath>
3030
</parent>
3131

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.postag;
19+
20+
import java.io.IOException;
21+
import java.util.ArrayList;
22+
import java.util.HashMap;
23+
import java.util.List;
24+
import java.util.Map;
25+
import java.util.stream.Stream;
26+
27+
import org.junit.jupiter.api.BeforeAll;
28+
import org.junit.jupiter.params.ParameterizedTest;
29+
import org.junit.jupiter.params.provider.Arguments;
30+
import org.junit.jupiter.params.provider.MethodSource;
31+
32+
import opennlp.tools.tokenize.ThreadSafeTokenizerME;
33+
import opennlp.tools.tokenize.Tokenizer;
34+
35+
import static org.junit.jupiter.api.Assertions.assertEquals;
36+
import static org.junit.jupiter.api.Assertions.assertNotNull;
37+
import static org.junit.jupiter.api.Assertions.assertTrue;
38+
39+
public class POSTaggerMEIT {
40+
41+
private static final String CATALAN = "ca";
42+
private static final String ENGLISH = "en";
43+
private static final String GERMAN = "de";
44+
private static final String POLISH = "pl";
45+
private static final String PORTUGUESE = "pt";
46+
47+
private static final Map<String, Tokenizer> TOKENIZERS = new HashMap<>();
48+
private static final Map<String, POSTagger> TAGGERS = new HashMap<>();
49+
50+
private static final boolean debug = false;
51+
52+
@BeforeAll
53+
public static void initResources() throws IOException {
54+
List<String> langs = List.of(CATALAN, ENGLISH, GERMAN, POLISH, PORTUGUESE);
55+
for (String langCode: langs) {
56+
TOKENIZERS.put(langCode, new ThreadSafeTokenizerME(langCode));
57+
TAGGERS.put(langCode, new ThreadSafePOSTaggerME(langCode));
58+
}
59+
}
60+
61+
@ParameterizedTest(name = "Verify \"{0}\" sample")
62+
@MethodSource(value = "provideData")
63+
void testPOSTagger(String langCode, int allowedDelta, String input, String[] expectedTags) {
64+
65+
final String[] tokens = TOKENIZERS.get(langCode).tokenize(input);
66+
assertNotNull(tokens);
67+
assertEquals(expectedTags.length, tokens.length);
68+
final String[] tags = TAGGERS.get(langCode).tag(tokens);
69+
assertNotNull(tags);
70+
assertEquals(expectedTags.length, tags.length);
71+
StringBuilder fullyTagged = new StringBuilder();
72+
for (int i = 0; i < tags.length; i++) {
73+
fullyTagged.append(tokens[i]).append("_").append(tags[i]).append(" ");
74+
}
75+
if (debug) {
76+
System.out.println(fullyTagged);
77+
}
78+
79+
List<Integer> incorrectTagsPositions = new ArrayList<>();
80+
for (int i = 0; i < tags.length; i++) {
81+
StringBuilder sb = new StringBuilder();
82+
sb.append(tokens[i]).append("[").append(tags[i]).append("]");
83+
if (expectedTags[i].equals(tags[i])) {
84+
sb.append(" <-- " + "OK");
85+
} else {
86+
sb.append(" <-- " + "NOK" + ", pos=").append(i);
87+
incorrectTagsPositions.add(i);
88+
}
89+
if (debug) {
90+
System.out.println(sb);
91+
}
92+
}
93+
assertTrue(incorrectTagsPositions.size() <= allowedDelta);
94+
}
95+
96+
private static Stream<Arguments> provideData() {
97+
return Stream.of(
98+
// see: Dev Manual
99+
Arguments.of(ENGLISH, 0,
100+
"Mr. Vinken is chairman of Elsevier N.V. , the Dutch publishing group .",
101+
new String[]{"PROPN", "PROPN", "AUX", "NOUN", "ADP", "ADJ", "PROPN", "PUNCT", "DET", "PROPN",
102+
"VERB", "NOUN", "PUNCT"}),
103+
// see: 'de-ud-train-sample.conllu'
104+
Arguments.of(GERMAN, 0,
105+
"Fachlich kompetent, sehr gute Beratung und ein freundliches Team .",
106+
new String[]{"ADJ", "ADJ", "PUNCT", "ADV", "ADJ", "NOUN", "CCONJ", "DET", "ADJ", "NOUN", "PUNCT"}),
107+
// see: 'pt-br-ud-sample.conllu'
108+
Arguments.of(PORTUGUESE, 1,
109+
"Numa reunião entre representantes da Secretaria da Criança do DF ea juíza da Vara de Execuções de " +
110+
"Medidas Socioeducativas, Lavínia Tupi Vieira Fonseca, ficou acordado que dos 25 internos, " +
111+
"12 serão internados na Unidade de Planaltina e os outros 13 devem retornar para a Unidade do " +
112+
"Recanto das Emas, antigo Ciago .",
113+
// pos=10 -> NOK
114+
new String[]{"ADP+DET", "NOUN", "ADP", "NOUN", "ADP+DET", "PROPN", "ADP+DET", "PROPN", "ADP+DET",
115+
"PROPN", "CCONJ", "NOUN", "ADP+DET", "PROPN", "ADP", "PROPN", "ADP", "PROPN", "PROPN", "PUNCT",
116+
"PROPN", "PROPN", "PROPN", "PROPN", "PUNCT", "VERB", "ADJ", "CCONJ", "ADP+DET", "NUM", "NOUN",
117+
"PUNCT", "NUM", "AUX", "VERB", "ADP+DET", "PROPN", "ADP", "PROPN", "CCONJ", "DET", "DET", "NUM",
118+
"AUX", "VERB", "ADP", "DET", "PROPN", "ADP+DET", "PROPN", "ADP+DET", "PROPN", "PUNCT", "ADJ",
119+
"PROPN", "PUNCT"}),
120+
// via @alsmolarczyk, original by Lem, Stanisław (1961/2022):
121+
// Solaris, Wydawnictwo Literackie, Kraków, S. 81.
122+
Arguments.of(POLISH, 1,
123+
"Zerwałem się ze stosu zwiniętych spadochronów i pobiegłem prosto do radiostacji .",
124+
new String[]{"VERB+AUX", "PART", "ADP", "NOUN", "ADJ", "NOUN", "CCONJ", "VERB+AUX", "ADV", "ADP",
125+
"NOUN", "PUNCT"}),
126+
// via @alsmolarczyk, original by Tokarczuk, Olga (2009/2021):
127+
// Prowadź swój pług przez kości umarłych, Wydawnictwo Literackie, Kraków, S. 43-44.
128+
Arguments.of(POLISH, 0,
129+
"Więzienie nie tkwi na zewnątrz, ale jest w środku każdego z nas .",
130+
new String[]{"NOUN", "PART", "VERB", "ADP", "ADV", "PUNCT", "CCONJ", "VERB", "ADP", "NOUN",
131+
"DET", "ADP", "PRON", "PUNCT"}),
132+
// via @alsmolarczyk, original by Zalega, Dariusz (2019):
133+
// Śląsk zbuntowany, Wydawnictwo Czarne, Wołowiec, S. 96.
134+
Arguments.of(POLISH, 0,
135+
"Działacze stosowali też różne formy nacisku na polski konsulat , żeby zaopiekował się " +
136+
"bezrobotnymi z Polski albo dał im choćby na bezpłatny bilet do kraju .",
137+
new String[]{"NOUN", "VERB", "PART", "ADJ", "NOUN", "NOUN", "ADP", "ADJ", "NOUN", "PUNCT", "SCONJ",
138+
"VERB", "PRON", "ADJ", "ADP", "PROPN", "CCONJ", "VERB", "PRON", "PART", "ADP", "ADJ", "NOUN",
139+
"ADP", "NOUN", "PUNCT"}),
140+
// via: @kinow
141+
Arguments.of(CATALAN, 1,
142+
"Un gran embossament d'aire fred es comença a despenjar cap al centre d'Europa.",
143+
// OpenNLP, different at: idx pos 2, 3, 5, and 13(+14) -> however, only pos 5 is "wrong" (ref)
144+
new String[]{"DET", "ADJ", "NOUN", "ADP", "NOUN", "ADJ", "PRON", "VERB", "ADP", "VERB", "NOUN",
145+
"ADP+DET", "NOUN", "ADP", "PROPN", "PUNCT"})
146+
// REFERENCE ("gold"):
147+
// "DET", "ADJ", "NOUN", "ADP", "NOUN", "ADJ", "PRON", "VERB", "ADP", "VERB", "NOUN", "ADP+DET",
148+
// "NOUN", "ADP", "PROPN", "PUNCT"})
149+
150+
// Spacy, wrong tags at: idx pos 2, 3 and 14
151+
//"DET", "ADJ", "ADV", "PROPN", "NOUN", "ADJ", "PRON", "VERB", "ADP", "VERB", "NOUN", "ADP" + "DET",
152+
// "NOUN", "PROPN", "PROPN", "PUNCT"
153+
// ok! , ok! , ??? , ??? , ok! , ok! , ok! , ok! , ok! , ok! , ok! , ok! + ok! ,
154+
// ok! , ??? , ok! , ok!
155+
);
156+
}
157+
}

opennlp-uima/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
<parent>
2626
<groupId>org.apache.opennlp</groupId>
2727
<artifactId>opennlp</artifactId>
28-
<version>2.5.4-SNAPSHOT</version>
28+
<version>2.5.5-SNAPSHOT</version>
2929
<relativePath>../pom.xml</relativePath>
3030
</parent>
3131

pom.xml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@
2525
<parent>
2626
<groupId>org.apache</groupId>
2727
<artifactId>apache</artifactId>
28-
<version>33</version>
28+
<version>34</version>
2929
<relativePath />
3030
</parent>
3131

3232
<groupId>org.apache.opennlp</groupId>
3333
<artifactId>opennlp</artifactId>
34-
<version>2.5.4-SNAPSHOT</version>
34+
<version>2.5.5-SNAPSHOT</version>
3535
<packaging>pom</packaging>
3636

3737
<name>Apache OpenNLP Reactor</name>
@@ -40,7 +40,7 @@
4040
<connection>scm:git:https://github.com/apache/opennlp.git</connection>
4141
<developerConnection>scm:git:[email protected]:apache/opennlp.git</developerConnection>
4242
<url>https://github.com/apache/opennlp.git</url>
43-
<tag>opennlp-2.5.3</tag>
43+
<tag>opennlp-2.5.4</tag>
4444
</scm>
4545

4646
<repositories>
@@ -186,17 +186,17 @@
186186
<onnxruntime.version>1.21.0</onnxruntime.version>
187187
<slf4j.version>2.0.17</slf4j.version>
188188
<log4j2.version>2.24.3</log4j2.version>
189-
<logcaptor.version>2.10.1</logcaptor.version>
189+
<logcaptor.version>2.10.2</logcaptor.version>
190190
<classgraph.version>4.8.179</classgraph.version>
191191
<jmh.version>1.37</jmh.version>
192192

193193
<!-- Plugin versions -->
194194
<coveralls.maven.plugin>4.3.0</coveralls.maven.plugin>
195-
<jacoco.maven.plugin>0.8.12</jacoco.maven.plugin>
195+
<jacoco.maven.plugin>0.8.13</jacoco.maven.plugin>
196196
<maven.assembly.plugin>3.7.1</maven.assembly.plugin>
197-
<maven.failsafe.plugin>3.5.2</maven.failsafe.plugin>
197+
<maven.failsafe.plugin>3.5.3</maven.failsafe.plugin>
198198
<maven.javadoc.plugin>3.11.2</maven.javadoc.plugin>
199-
<forbiddenapis.plugin>3.8</forbiddenapis.plugin>
199+
<forbiddenapis.plugin>3.9</forbiddenapis.plugin>
200200
<license-maven-plugin.version>2.5.0</license-maven-plugin.version>
201201
</properties>
202202

0 commit comments

Comments
 (0)