Skip to content

Commit b3608fb

Browse files
committed
Merged version1.2.5 into the master branch.
2 parents a3ee71e + 07a2fe7 commit b3608fb

File tree

89 files changed

+5090
-315
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+5090
-315
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,6 @@ gerbil_data
1010
*.log
1111
google*.html
1212
export
13-
datadump.nt
13+
datadump.nt
14+
indexes
15+
dependency-reduced-pom.xml

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ language: java
33
before_install:
44
- cp src/main/properties/log4j.properties src/test/resources/log4j.properties
55
- mkdir -p "gerbil_data"
6-
- curl --retry 4 -L -o "gerbil_data/gerbil_data.zip" "https://github.com/AKSW/gerbil/releases/download/v1.2.4/gerbil_data.zip"
6+
- curl --retry 4 -L -o "gerbil_data/gerbil_data.zip" "https://github.com/AKSW/gerbil/releases/download/v1.2.5/gerbil_data.zip"
77
- unzip "gerbil_data/gerbil_data.zip"
8+
- touch src/main/properties/gerbil_keys.properties
89
install:
910
- mvn clean compile -DskipTests=true -Dmaven.javadoc.skip=true -B -V
1011
script:

index.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
mkdir dbpedia_dump
2+
cd dbpedia_dump
3+
4+
wget -r --no-parent -R "*.txt, *.html, *.json" -A "*.nt, *.ttl, *.nt.bz2, *.ttl.bz2" http://downloads.dbpedia.org/2016-04/core-i18n/en/
5+
cd downloads.dbpedia.org/2016-04/core-i18n/en/
6+
7+
wget http://www.l3s.de/~minack/rdf2rdf/downloads/rdf2rdf-1.0.1-2.3.1.jar
8+
9+
10+
rm *.json
11+
rm *.txt
12+
rm index.html
13+
14+
for i in *.bz2; do
15+
bzip2 -vd $i
16+
done
17+
18+
for i in *.ttl; do
19+
java -jar rdf2rdf-1.0.1-2.3.1.jar $i .nt
20+
done
21+
22+
rm *.ttl
23+
rm rdf2rdf-1.0.1-2.3.1.jar
24+
25+
cd ../../../../../../
26+
27+
mvn exec:java -Dexec.mainClass="org.aksw.gerbil.tools.InitialIndexTool" -Dexec.args="dbpedia_dump/downloads.dbpedia.org/2016-04/core-i18n/en/"
28+
29+
rm -rf dbpedia_dump/

pom.xml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
1313
<modelVersion>4.0.0</modelVersion>
1414
<groupId>org.aksw</groupId>
15-
<artifactId>gerbil</artifactId>
16-
<version>1.2.4</version>
15+
<artifactId>gerbil</artifactId>
16+
<version>1.2.5</version>
1717
<name>General Entity Annotator Benchmark</name>
1818
<description>This project is a benchmark for entity annotation and disambiguation tools.</description>
1919
<inceptionYear>2014</inceptionYear>
@@ -55,8 +55,8 @@
5555
<!-- NIF transfer lib -->
5656
<dependency>
5757
<groupId>org.aksw</groupId>
58-
<artifactId>gerbil.nif.transfer</artifactId>
59-
<version>1.2.2</version>
58+
<artifactId>gerbil.nif.transfer</artifactId>
59+
<version>1.2.3</version>
6060
</dependency>
6161
<!-- Jena for using JSON-LD -->
6262
<dependency>
@@ -142,7 +142,7 @@
142142
<dependency>
143143
<groupId>org.apache.lucene</groupId>
144144
<artifactId>lucene-core</artifactId>
145-
<version>2.9.1</version>
145+
<version>6.2.0</version>
146146
</dependency>
147147
<dependency>
148148
<groupId>commons-configuration</groupId>
@@ -311,6 +311,11 @@
311311
<artifactId>json</artifactId>
312312
<version>20140107</version>
313313
</dependency>
314+
<dependency>
315+
<groupId>org.apache.lucene</groupId>
316+
<artifactId>lucene-analyzers-common</artifactId>
317+
<version>6.2.0</version>
318+
</dependency>
314319
</dependencies>
315320

316321
<build>

src/main/java/org/aksw/gerbil/annotator/OKETask1Annotator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import org.aksw.gerbil.transfer.nif.Document;
2323
import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity;
2424

25-
public interface OKETask1Annotator extends A2KBAnnotator, EntityTyper {
25+
public interface OKETask1Annotator extends A2KBAnnotator, RT2KBAnnotator {
2626

2727
public List<TypedNamedEntity> performTask1(Document document) throws GerbilException;
2828
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* This file is part of General Entity Annotator Benchmark.
3+
*
4+
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
5+
* it under the terms of the GNU Lesser General Public License as published by
6+
* the Free Software Foundation, either version 3 of the License, or
7+
* (at your option) any later version.
8+
*
9+
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
* GNU Lesser General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU Lesser General Public License
15+
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
package org.aksw.gerbil.annotator;
18+
19+
import java.util.List;
20+
21+
import org.aksw.gerbil.exceptions.GerbilException;
22+
import org.aksw.gerbil.transfer.nif.Document;
23+
import org.aksw.gerbil.transfer.nif.TypedSpan;
24+
25+
public interface RT2KBAnnotator extends EntityRecognizer, EntityTyper {
26+
27+
public List<TypedSpan> performRT2KBTask(Document document) throws GerbilException;
28+
}

src/main/java/org/aksw/gerbil/annotator/decorator/ErrorCountingAnnotatorDecorator.java

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.aksw.gerbil.annotator.EntityTyper;
2828
import org.aksw.gerbil.annotator.OKETask1Annotator;
2929
import org.aksw.gerbil.annotator.OKETask2Annotator;
30+
import org.aksw.gerbil.annotator.RT2KBAnnotator;
3031
import org.aksw.gerbil.datatypes.ErrorTypes;
3132
import org.aksw.gerbil.datatypes.ExperimentType;
3233
import org.aksw.gerbil.evaluate.EvaluationResultContainer;
@@ -52,8 +53,8 @@
5253
* @author Michael R&ouml;der ([email protected])
5354
*
5455
*/
55-
public abstract class ErrorCountingAnnotatorDecorator extends AbstractAnnotatorDecorator
56-
implements Evaluator<Marking>, ErrorCounter {
56+
public abstract class ErrorCountingAnnotatorDecorator extends AbstractAnnotatorDecorator implements Evaluator<Marking>,
57+
ErrorCounter {
5758

5859
private static final Logger LOGGER = LoggerFactory.getLogger(ErrorCountingAnnotatorDecorator.class);
5960

@@ -82,6 +83,8 @@ public static ErrorCountingAnnotatorDecorator createDecorator(ExperimentType typ
8283
return new ErrorCountingOKETask1Annotator((OKETask1Annotator) annotator, maxErrors);
8384
case OKE_Task2:
8485
return new ErrorCountingOKETask2Annotator((OKETask2Annotator) annotator, maxErrors);
86+
case RT2KB:
87+
return new ErrorCountingRT2KBAnnotator((RT2KBAnnotator) annotator, maxErrors);
8588
case Rc2KB:
8689
break;
8790
case Sa2KB:
@@ -125,8 +128,8 @@ public List<MeaningSpan> performD2KBTask(Document document) throws GerbilExcepti
125128
}
126129
}
127130

128-
private static class ErrorCountingEntityRecognizer extends ErrorCountingAnnotatorDecorator
129-
implements EntityRecognizer {
131+
private static class ErrorCountingEntityRecognizer extends ErrorCountingAnnotatorDecorator implements
132+
EntityRecognizer {
130133

131134
public ErrorCountingEntityRecognizer(EntityRecognizer decoratedAnnotator, int maxErrors) {
132135
super(decoratedAnnotator, maxErrors);
@@ -173,8 +176,24 @@ public List<TypedSpan> performTyping(Document document) throws GerbilException {
173176
}
174177
}
175178

176-
private static class ErrorCountingOKETask1Annotator extends ErrorCountingA2KBAnnotator
177-
implements OKETask1Annotator {
179+
private static class ErrorCountingRT2KBAnnotator extends ErrorCountingEntityRecognizer implements RT2KBAnnotator {
180+
181+
protected ErrorCountingRT2KBAnnotator(RT2KBAnnotator decoratedAnnotator, int maxErrors) {
182+
super(decoratedAnnotator, maxErrors);
183+
}
184+
185+
@Override
186+
public List<TypedSpan> performTyping(Document document) throws GerbilException {
187+
return ErrorCountingAnnotatorDecorator.performTyping(this, document);
188+
}
189+
190+
@Override
191+
public List<TypedSpan> performRT2KBTask(Document document) throws GerbilException {
192+
return ErrorCountingAnnotatorDecorator.performRT2KBTask(this, document);
193+
}
194+
}
195+
196+
private static class ErrorCountingOKETask1Annotator extends ErrorCountingA2KBAnnotator implements OKETask1Annotator {
178197

179198
protected ErrorCountingOKETask1Annotator(OKETask1Annotator decoratedAnnotator, int maxErrors) {
180199
super(decoratedAnnotator, maxErrors);
@@ -185,14 +204,19 @@ public List<TypedSpan> performTyping(Document document) throws GerbilException {
185204
return ErrorCountingAnnotatorDecorator.performTyping(this, document);
186205
}
187206

207+
@Override
208+
public List<TypedSpan> performRT2KBTask(Document document) throws GerbilException {
209+
return ErrorCountingAnnotatorDecorator.performRT2KBTask(this, document);
210+
}
211+
188212
@Override
189213
public List<TypedNamedEntity> performTask1(Document document) throws GerbilException {
190214
return ErrorCountingAnnotatorDecorator.performOKETask1(this, document);
191215
}
192216
}
193217

194-
private static class ErrorCountingOKETask2Annotator extends ErrorCountingAnnotatorDecorator
195-
implements OKETask2Annotator {
218+
private static class ErrorCountingOKETask2Annotator extends ErrorCountingAnnotatorDecorator implements
219+
OKETask2Annotator {
196220

197221
protected ErrorCountingOKETask2Annotator(OKETask2Annotator decoratedAnnotator, int maxErrors) {
198222
super(decoratedAnnotator, maxErrors);
@@ -269,8 +293,8 @@ protected static List<MeaningSpan> performD2KBTask(ErrorCountingAnnotatorDecorat
269293
return result;
270294
}
271295

272-
protected static List<MeaningSpan> performExtraction(ErrorCountingAnnotatorDecorator errorCounter,
273-
Document document) throws GerbilException {
296+
protected static List<MeaningSpan> performExtraction(ErrorCountingAnnotatorDecorator errorCounter, Document document)
297+
throws GerbilException {
274298
List<MeaningSpan> result = null;
275299
try {
276300
result = ((A2KBAnnotator) errorCounter.getDecoratedAnnotator()).performA2KBTask(document);
@@ -384,6 +408,29 @@ protected static List<TypedNamedEntity> performOKETask2(ErrorCountingAnnotatorDe
384408
return result;
385409
}
386410

411+
public static List<TypedSpan> performRT2KBTask(ErrorCountingAnnotatorDecorator errorCounter, Document document)
412+
throws GerbilException {
413+
List<TypedSpan> result = null;
414+
try {
415+
result = ((RT2KBAnnotator) errorCounter.getDecoratedAnnotator()).performRT2KBTask(document);
416+
} catch (Exception e) {
417+
if (errorCounter.getErrorCount() == 0) {
418+
// Log only the first exception completely
419+
LOGGER.error("Got an Exception from the annotator (" + errorCounter.getName() + ")", e);
420+
} else {
421+
// Log only the Exception message without the stack trace
422+
LOGGER.error("Got an Exception from the annotator (" + errorCounter.getName() + "): "
423+
+ e.getLocalizedMessage());
424+
}
425+
errorCounter.increaseErrorCount();
426+
return new ArrayList<TypedSpan>(0);
427+
}
428+
if (printDebugMsg && LOGGER.isDebugEnabled()) {
429+
logResult(result, errorCounter.getName(), "TypedNamedEntity");
430+
}
431+
return result;
432+
}
433+
387434
protected int errorCount = 0;
388435
protected int maxErrors;
389436

0 commit comments

Comments
 (0)