Skip to content

Commit dbb9637

Browse files
authored
SciX ID Prefix in Grammar (#245)
* Add 'scix:' as a special case in the grammar * Add query processor to special case SciX ID queries
1 parent 52f6c74 commit dbb9637

File tree

3 files changed

+48
-1
lines changed

3 files changed

+48
-1
lines changed

montysolr/src/main/antlr/ADS.g

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,8 @@ identifier
321321
//IDENTIFIER
322322
('doi:' -> QNORMAL["doi"]
323323
|'arxiv:' -> QNORMAL["arxiv"]
324-
|'arXiv:' -> QNORMAL["arxiv"])
324+
|'arXiv:' -> QNORMAL["arxiv"]
325+
|'scix:' -> QNORMAL["scix"])
325326
(TERM_NORMAL -> $identifier TERM_NORMAL
326327
| PHRASE_ANYTHING -> $identifier ^(QPHRASETRUNC PHRASE_ANYTHING)
327328
| PHRASE -> $identifier ^(QPHRASE PHRASE)

montysolr/src/main/java/org/apache/lucene/queryparser/flexible/aqp/AqpAdsabsNodeProcessorPipeline.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ public AqpAdsabsNodeProcessorPipeline(QueryConfigHandler queryConfig) {
7777
add(new AqpAdsabsQTRUNCATEDProcessor());
7878
add(new AqpQANYTHINGProcessor());
7979
add(new AqpQIDENTIFIERProcessor());
80+
add(new AqpScixIDProcessor());
8081
add(new AqpFIELDProcessor()); // sets the field name (if user specified one, or there is a default)
8182

8283

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package org.apache.lucene.queryparser.flexible.aqp.processors;
2+
3+
import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpANTLRNode;
4+
import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpAdsabsIdentifierNode;
5+
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
6+
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
7+
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
8+
9+
import java.util.List;
10+
11+
/**
12+
* Special case processor for SciX IDs
13+
* <br/>
14+
* We'd like "identifier:scix:XXX-XXX-XXX" queries to work, but the other elements of the
15+
* query processing pipeline remove the "scix:" prefix from the query. This adds the prefix
16+
* back in to ensure the query term can match the indexed term.
17+
*/
18+
public class AqpScixIDProcessor extends QueryNodeProcessorImpl {
19+
20+
21+
@Override
22+
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
23+
if (node instanceof AqpAdsabsIdentifierNode identifierNode) {
24+
if (identifierNode.getFieldAsString().equals("scix")) {
25+
return new AqpAdsabsIdentifierNode(
26+
identifierNode.getField(),
27+
identifierNode.getFieldAsString() + ":" + identifierNode.getTextAsString(),
28+
identifierNode.getBegin(),
29+
identifierNode.getEnd());
30+
}
31+
}
32+
33+
return node;
34+
}
35+
36+
@Override
37+
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
38+
return node;
39+
}
40+
41+
@Override
42+
protected List<QueryNode> setChildrenOrder(List<QueryNode> children) throws QueryNodeException {
43+
return children;
44+
}
45+
}

0 commit comments

Comments
 (0)