Skip to content

Commit 2290711

Browse files
author
Daniel Khashabi
authored
Merge pull request #601 from CogComp/question-type-classfier
Question type classification + ....
2 parents 9071c83 + 0c4702e commit 2290711

File tree

27 files changed

+938
-55
lines changed

27 files changed

+938
-55
lines changed

core-utilities/src/main/java/edu/illinois/cs/cogcomp/core/datastructures/ViewNames.java

+2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ public class ViewNames {
9191
public static final String DATALESS_ESA = "DATALESS_ESA";
9292
public static final String DATALESS_W2V = "DATALESS_W2V";
9393

94+
public static final String QUESTION_TYPE = "QUESTION_TYPE";
95+
9496
/**
9597
* @deprecated Replaced by {@link #CLAUSES_CHARNIAK}, {@link #CLAUSES_BERKELEY},
9698
* {@link #CLAUSES_STANFORD}

dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/ta/ADatalessAnnotator.java

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ protected ADatalessAnnotator(String viewName, ResourceManager config) {
6262
this(viewName, new String[] {ViewNames.TOKENS}, config);
6363
}
6464

65+
protected ADatalessAnnotator(String viewName, ResourceManager config, boolean isLazilyInitialized) {
66+
this(viewName, new String[] {ViewNames.TOKENS}, isLazilyInitialized, config);
67+
}
68+
6569
protected ADatalessAnnotator(String viewName, String[] requiredViews, ResourceManager config) {
6670
this(viewName, requiredViews, false, config);
6771
}

dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/ta/ESADatalessAnnotator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public ESADatalessAnnotator() {
5151
}
5252

5353
public ESADatalessAnnotator(ResourceManager config) {
54-
super(ViewNames.DATALESS_ESA, config);
54+
super(ViewNames.DATALESS_ESA, config, true);
5555
}
5656

5757
public ESADatalessAnnotator(ResourceManager config, JSONObject jsonHierarchy)

dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/ta/W2VDatalessAnnotator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public W2VDatalessAnnotator() {
4545
}
4646

4747
public W2VDatalessAnnotator(ResourceManager config) {
48-
super(ViewNames.DATALESS_W2V, config);
48+
super(ViewNames.DATALESS_W2V, config, true);
4949
}
5050

5151
public W2VDatalessAnnotator(ResourceManager config, JSONObject jsonHierarchy)

edison/src/main/java/edu/illinois/cs/cogcomp/edison/features/FeatureUtilities.java

+24
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
package edu.illinois.cs.cogcomp.edison.features;
99

1010
import edu.illinois.cs.cogcomp.core.datastructures.Pair;
11+
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
1112
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
13+
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
1214
import edu.illinois.cs.cogcomp.core.stats.Counter;
1315
import edu.illinois.cs.cogcomp.edison.utilities.EdisonException;
1416
import edu.illinois.cs.cogcomp.lbjava.classify.Classifier;
@@ -147,6 +149,28 @@ public static Set<String> getFeatureSet(final FeatureExtractor fex, Constituent
147149
return set;
148150
}
149151

152+
public static List<String> getFeaturesFromTextAnnotation(final FeatureExtractor fex, TextAnnotation s) {
153+
List<Constituent> cons = s.getView(ViewNames.TOKENS).getConstituents();
154+
List<String> features = new ArrayList<>();
155+
for(Constituent c : cons) {
156+
try {
157+
features.addAll(getFeatureSet(fex, c));
158+
} catch (EdisonException e) {
159+
e.printStackTrace();
160+
}
161+
}
162+
return features;
163+
}
164+
165+
public static List<String> getFeatureSet(Set<Feature> features) {
166+
List<String> set = new ArrayList<>();
167+
for (Feature f : features) {
168+
if (f instanceof DiscreteFeature)
169+
set.add(f.getName());
170+
}
171+
return set;
172+
}
173+
150174
/**
151175
* Convert a feature set into a pair of arrays of integers and doubles by looking up the feature
152176
* name in the provided lexicon.

edison/src/main/java/edu/illinois/cs/cogcomp/edison/features/factory/WordFeatureExtractorFactory.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ private static String getSafeToken(TextAnnotation ta, int wordPosition) {
493493
}
494494

495495
public static WordNetFeatureExtractor getWordNetFeatureExtractor(
496-
WordNetFeatureClass... wordNetFeatureClasses) throws EdisonException {
496+
WordNetFeatureClass... wordNetFeatureClasses) {
497497
if (wnFeatureGenerator == null) {
498498
synchronized (WordFeatureExtractorFactory.class) {
499499
if (wnFeatureGenerator == null) {
@@ -504,7 +504,8 @@ public static WordNetFeatureExtractor getWordNetFeatureExtractor(
504504
wnFeatureGenerator.addFeatureType(c);
505505

506506
} catch (Exception e) {
507-
throw new EdisonException("Error creating word feature extractor", e);
507+
System.out.println("Error creating word feature extractor");
508+
e.printStackTrace();
508509
}
509510
}
510511
}

md/src/main/java/org/cogcomp/md/BIOReader.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@
4343
* then parse the TextAnnotations into tokens.
4444
*/
4545

46-
public class BIOReader implements Parser
47-
{
46+
public class BIOReader implements Parser {
47+
4848
private List<Constituent> tokenList;
4949
private int tokenIndex;
5050
private List<TextAnnotation> taList;

pipeline-client/README.md

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# CogComp Pipeline Client
2+
3+
A client is a light-weight system to access the pipeline through network. For more on the pipeline server, look [here](pipeline/README.md).
4+
5+
#### Java Client
6+
7+
After setting up the server on a remote machine, we can create a java client to make calls to the server.
8+
Here in the snnippet we show how it is done:
9+
10+
```java
11+
import edu.illinois.cs.cogcomp.pipeline.server.ServerClientAnnotator;
12+
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
13+
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
14+
15+
ServerClientAnnotator annotator = new ServerClientAnnotator();
16+
annotator.setUrl("localhost", "8080"); // set the url and port name of your server here
17+
annotator.setViews(ViewNames.POS, ViewNames.LEMMA); // specify the views that you want
18+
TextAnnotation ta = annotator.annotate("This is the best sentence ever.");
19+
System.out.println(ta.getAvailableViews()); // here you should see that the required views are added.
20+
```
21+
22+
#### Python Client
23+
24+
[CogComp-NLPy](https://github.com/CogComp/cogcomp-nlpy) is our library for accessing our pipeline from Java.
25+

pipeline-client/pom.xml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xmlns="http://maven.apache.org/POM/4.0.0"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<parent>
7+
<artifactId>illinois-cogcomp-nlp</artifactId>
8+
<groupId>edu.illinois.cs.cogcomp</groupId>
9+
<version>4.0.1</version>
10+
</parent>
11+
12+
<artifactId>illinois-pipeline-client</artifactId>
13+
14+
<dependencies>
15+
<dependency>
16+
<groupId>edu.illinois.cs.cogcomp</groupId>
17+
<artifactId>illinois-core-utilities</artifactId>
18+
<version>4.0.1</version>
19+
</dependency>
20+
</dependencies>
21+
22+
</project>

pipeline/README.md

+3-25
Original file line numberDiff line numberDiff line change
@@ -314,29 +314,6 @@ Here are the available APIs:
314314
315315
Note that the current web server is very basic. It does not support parallel processing within a single request, nor across multiple requests.
316316
317-
### Server clients
318-
319-
#### Java Client
320-
321-
After setting up the server on a remote machine, we can create a java client to make calls to the server.
322-
Here in the snnippet we show how it is done:
323-
324-
```java
325-
import edu.illinois.cs.cogcomp.pipeline.server.ServerClientAnnotator;
326-
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
327-
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
328-
329-
ServerClientAnnotator annotator = new ServerClientAnnotator();
330-
annotator.setUrl("localhost", "8080"); // set the url and port name of your server here
331-
annotator.setViews(ViewNames.POS, ViewNames.LEMMA); // specify the views that you want
332-
TextAnnotation ta = annotator.annotate("This is the best sentence ever.");
333-
System.out.println(ta.getAvailableViews()); // here you should see that the required views are added
334-
```
335-
336-
#### Python Client
337-
338-
[CogComp-NLPy](https://github.com/CogComp/cogcomp-nlpy) is our library for accessing our pipeline from Java.
339-
340317
341318
## Frequently Asked Questions (FAQs)
342319
@@ -346,7 +323,7 @@ export MAVEN_OPTS="-Xmx10g"
346323
```
347324
348325
- Between different runs of the Pipeline, if you see the following exception, you should remove the temporary cache folders created by MapDB.
349-
```java
326+
```
350327
Caused by: org.mapdb.DBException$DataCorruption: Header checksum broken. Store was not closed correctly, or is corrupted
351328
```
352329
@@ -368,7 +345,7 @@ public class TestPipeline {
368345
}
369346
```
370347
would lead to the following exception:
371-
```java
348+
```
372349
Exception in thread "main" org.mapdb.DBException$FileLocked: File is already opened and is locked: annotation-cache
373350
at org.mapdb.volume.Volume.lockFile(Volume.java:446)
374351
at org.mapdb.volume.RandomAccessFileVol.<init>(RandomAccessFileVol.java:52)
@@ -399,6 +376,7 @@ public class TestPipeline {
399376
}
400377
```
401378
379+
402380
## LICENSE
403381
404382
To see the full license for this software, see [LICENSE](../master/LICENSE) or visit the [download page](http://cogcomp.cs.illinois.edu/page/software_view/NETagger) for this software

pipeline/pom.xml

+11
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@
5858
<artifactId>illinois-verbsense</artifactId>
5959
<version>4.0.1</version>
6060
</dependency>
61+
<dependency>
62+
<groupId>edu.illinois.cs.cogcomp</groupId>
63+
<artifactId>illinois-question-typer</artifactId>
64+
<version>4.0.1</version>
65+
</dependency>
6166
<dependency>
6267
<groupId>edu.illinois.cs.cogcomp</groupId>
6368
<artifactId>stanford_3.3.1</artifactId>
@@ -167,6 +172,12 @@
167172
</plugin>
168173
</plugins>
169174
</pluginManagement>
175+
<resources>
176+
<resource>
177+
<directory>src/main/resources</directory>
178+
<filtering>true</filtering>
179+
</resource>
180+
</resources>
170181
</build>
171182

172183
</project>

pipeline/src/main/java/edu/illinois/cs/cogcomp/pipeline/common/PipelineConfigurator.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class PipelineConfigurator extends AnnotatorServiceConfigurator {
5050
public static final Property USE_TIMEX3 = new Property("useTimex3", FALSE);
5151
public static final Property USE_DATALESS_ESA = new Property("useDatalessESA", FALSE);
5252
public static final Property USE_DATALESS_W2V = new Property("useDatalessW2V", FALSE);
53+
public static final Property USE_QUESTION_TYPER = new Property("useQuestionTyper", FALSE);
5354

5455
/**
5556
* if 'true', the PipelineFactory will return a sentence-level pipeline that will use all viable
@@ -82,7 +83,7 @@ public ResourceManager getDefaultConfig() {
8283
USE_QUANTIFIER, USE_VERB_SENSE, USE_JSON, USE_RELATION,
8384
USE_LAZY_INITIALIZATION, USE_SRL_INTERNAL_PREPROCESSOR, SPLIT_ON_DASH,
8485
USE_SENTENCE_PIPELINE, USE_TIMEX3, USE_MENTION, USE_TRANSLITERATION,
85-
USE_DATALESS_ESA, USE_DATALESS_W2V};
86+
USE_DATALESS_ESA, USE_DATALESS_W2V, USE_QUESTION_TYPER};
8687

8788
return (new AnnotatorServiceConfigurator().getConfig(new ResourceManager(
8889
generateProperties(properties))));

pipeline/src/main/java/edu/illinois/cs/cogcomp/pipeline/main/PipelineFactory.java

+13
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import edu.illinois.cs.cogcomp.pos.POSAnnotator;
3232
import edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator;
3333
import edu.illinois.cs.cogcomp.quant.driver.Quantifier;
34+
import edu.illinois.cs.cogcomp.question_typer.QuestionTypeAnnotator;
3435
import edu.illinois.cs.cogcomp.srl.SemanticRoleLabeler;
3536
import edu.illinois.cs.cogcomp.srl.config.SrlConfigurator;
3637
import edu.illinois.cs.cogcomp.srl.core.SRLType;
@@ -137,16 +138,23 @@ public static BasicAnnotatorService buildPipeline(Boolean disableCache, String..
137138
case ViewNames.MENTION:
138139
nonDefaultValues.put(PipelineConfigurator.USE_MENTION.key,
139140
Configurator.TRUE);
141+
break;
140142
case ViewNames.RELATION:
141143
nonDefaultValues.put(PipelineConfigurator.USE_RELATION.key,
142144
Configurator.TRUE);
145+
break;
143146
case ViewNames.DATALESS_ESA:
144147
nonDefaultValues.put(PipelineConfigurator.USE_DATALESS_ESA.key,
145148
Configurator.TRUE);
149+
break;
146150
case ViewNames.DATALESS_W2V:
147151
nonDefaultValues.put(PipelineConfigurator.USE_DATALESS_W2V.key,
148152
Configurator.TRUE);
149153
break;
154+
case ViewNames.QUESTION_TYPE:
155+
nonDefaultValues.put(PipelineConfigurator.USE_QUESTION_TYPER.key,
156+
Configurator.TRUE);
157+
break;
150158
default:
151159
logger.warn("View name "
152160
+ vu
@@ -407,6 +415,11 @@ private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefault
407415
W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
408416
viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
409417
}
418+
if(rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
419+
QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
420+
viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
421+
}
422+
410423
return viewGenerators;
411424
}
412425

pipeline/src/main/java/edu/illinois/cs/cogcomp/pipeline/server/MainServer.java

+10
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.HashMap;
2929
import java.util.LinkedHashMap;
3030
import java.util.Map;
31+
import java.util.Properties;
3132

3233
import static spark.Spark.*;
3334

@@ -161,6 +162,15 @@ public static void startServer(String[] args) {
161162
get("/viewNames", (req, res) -> finalViewsString);
162163

163164
post("/viewNames", (req, res) -> finalViewsString);
165+
166+
get("/version", "application/json", (request, response) -> {
167+
logger.info("GET request to retrieve version numbers . . . ");
168+
final Properties properties = new Properties();
169+
properties.load(pipeline.getClass().getClassLoader().getResourceAsStream("project.properties"));
170+
System.out.println(properties.getProperty("version"));
171+
System.out.println(properties.getProperty("artifactId"));
172+
return properties.getProperty("version");
173+
});
164174
}
165175

166176
public static double getHour() {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
version=${project.version}
2+
artifactId=${project.artifactId}

pom.xml

+9-6
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@
3131
<module>commasrl</module>
3232
<module>prepsrl</module>
3333
<module>verbsense</module>
34-
<module>pipeline</module>
34+
<module>question-type</module>
3535
<module>dataless-classifier</module>
36+
<module>pipeline-client</module>
37+
<module>pipeline</module>
3638
<!--external tools -->
3739
<module>external/external-commons</module>
3840
<module>external/clausie</module>
@@ -219,11 +221,12 @@
219221
</excludes>
220222
</configuration>
221223
<executions>
222-
<execution>
223-
<goals>
224-
<goal>check</goal>
225-
</goals>
226-
</execution>
224+
<!--commented out because we don't want to be constantly be stopped because of lack of license headers. -->
225+
<!--<execution>-->
226+
<!--<goals>-->
227+
<!--<goal>check</goal>-->
228+
<!--</goals>-->
229+
<!--</execution>-->
227230
</executions>
228231
</plugin>
229232
</plugins>

prepsrl/src/main/java/edu/illinois/cs/cogcomp/prepsrl/features/PrepSRLFeatures.java

+13-17
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,19 @@ public class PrepSRLFeatures extends LBJavaFeatureExtractor {
3232
private final FeatureExtractor fex;
3333
private static WordNetFeatureExtractor firstSense, wordNetFexes;
3434
static {
35-
try {
36-
firstSense = getWordNetFeatureExtractor(WordNetFeatureClass.synsetsFirstSense);
37-
wordNetFexes =
38-
getWordNetFeatureExtractor(WordNetFeatureClass.existsEntry,
39-
WordNetFeatureClass.synonymsFirstSense,
40-
WordNetFeatureClass.synsetsAllSenses,
41-
WordNetFeatureClass.partHolonymsFirstSense,
42-
WordNetFeatureClass.partHolonymsAllSenses,
43-
WordNetFeatureClass.memberHolonymsFirstSense,
44-
WordNetFeatureClass.memberHolonymsAllSenses,
45-
WordNetFeatureClass.substanceHolonymsFirstSense,
46-
WordNetFeatureClass.substanceHolonymsAllSenses,
47-
WordNetFeatureClass.lexicographerFileNamesFirstSense,
48-
WordNetFeatureClass.lexicographerFileNamesAllSenses);
49-
} catch (EdisonException e) {
50-
System.err.println("Cannot inialise WordNet feature extractors");
51-
}
35+
firstSense = getWordNetFeatureExtractor(WordNetFeatureClass.synsetsFirstSense);
36+
wordNetFexes =
37+
getWordNetFeatureExtractor(WordNetFeatureClass.existsEntry,
38+
WordNetFeatureClass.synonymsFirstSense,
39+
WordNetFeatureClass.synsetsAllSenses,
40+
WordNetFeatureClass.partHolonymsFirstSense,
41+
WordNetFeatureClass.partHolonymsAllSenses,
42+
WordNetFeatureClass.memberHolonymsFirstSense,
43+
WordNetFeatureClass.memberHolonymsAllSenses,
44+
WordNetFeatureClass.substanceHolonymsFirstSense,
45+
WordNetFeatureClass.substanceHolonymsAllSenses,
46+
WordNetFeatureClass.lexicographerFileNamesFirstSense,
47+
WordNetFeatureClass.lexicographerFileNamesAllSenses);
5248
}
5349

5450
private PrepSRLFeatures(FeatureExtractor fex) {

0 commit comments

Comments
 (0)