Skip to content

Commit 2144bcf

Browse files
committed
improve parsing of names and sources; update XML output from viaf in test suite; bump version
1 parent 0a875dc commit 2144bcf

File tree

9 files changed

+43763
-20
lines changed

9 files changed

+43763
-20
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ build the .jar file using maven.
4545
Run this command:
4646

4747
```
48-
java -jar refine_viaf-1.0.jar
48+
java -jar refine_viaf-1.1.jar
4949
```
5050

5151
That's it! You should see some messages as the application starts

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<groupId>org.refine_viaf</groupId>
66
<artifactId>refine_viaf</artifactId>
7-
<version>1.0</version>
7+
<version>1.1</version>
88
<packaging>jar</packaging>
99

1010
<name>refine_viaf</name>
@@ -17,7 +17,7 @@
1717
<parent>
1818
<groupId>org.springframework.boot</groupId>
1919
<artifactId>spring-boot-starter-parent</artifactId>
20-
<version>1.3.0.RELEASE</version>
20+
<version>1.3.2.RELEASE</version>
2121
</parent>
2222

2323
<dependencies>

run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
# the JVM from having to dynamically allocate memory, which takes time.
1515
# -Xms128m -Xmx128m
1616

17-
java -Xms128m -Xmx128m -Dlogging.level.com.codefork.refine=DEBUG -jar target/refine_viaf-1.0.jar
17+
java -Xms128m -Xmx128m -Dlogging.level.com.codefork.refine=DEBUG -jar target/refine_viaf-1.1.jar

src/main/java/com/codefork/refine/viaf/VIAFParser.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ public class VIAFParser extends DefaultHandler {
1717
private boolean captureChars = false;
1818
private boolean insideHeadings = false;
1919
private boolean insideSources = false;
20+
private int depth = 0;
21+
private int headingsDepth = -1;
2022

2123
// viaf's weird indexed namespacing
2224
private int nsIndex = 2;
@@ -52,6 +54,7 @@ public void startElement(String uri, String localName, String qName, Attributes
5254
captureChars = true;
5355
} else if (getElementNameWithNS("mainHeadings").equals(qName)) {
5456
insideHeadings = true;
57+
headingsDepth = depth;
5558
} else if (insideHeadings && getElementNameWithNS("data").equals(qName)) {
5659
getLastResult().addNameEntry();
5760
} else if (insideHeadings && getElementNameWithNS("text").equals(qName)) {
@@ -60,13 +63,14 @@ public void startElement(String uri, String localName, String qName, Attributes
6063
insideSources = true;
6164
} else if (insideSources && getElementNameWithNS("s").equals(qName)) {
6265
captureChars = true;
63-
} else if (insideHeadings) {
66+
} else if (headingsDepth != -1 && depth == headingsDepth + 1) {
6467
// if we got here, we encountered some other child of mainHeadings
6568
// so we want to effectively end the section, otherwise we'll end up
6669
// erroneously picking up other "text" and "sources" elements nested
6770
// under other elements in mainHeadings
6871
insideHeadings = false;
6972
}
73+
depth ++;
7074
}
7175

7276
@Override
@@ -83,6 +87,7 @@ public void endElement(String uri, String localName, String qName) throws SAXExc
8387
captureChars = false;
8488
} else if (getElementNameWithNS("mainHeadings").equals(qName)) {
8589
insideHeadings = false;
90+
headingsDepth = -1;
8691
} else if (insideHeadings && getElementNameWithNS("text").equals(qName)) {
8792
getLastResult().getLastNameEntry().setName(buf.toString());
8893
buf = new StringBuilder();
@@ -94,6 +99,7 @@ public void endElement(String uri, String localName, String qName) throws SAXExc
9499
buf = new StringBuilder();
95100
captureChars = false;
96101
}
102+
depth --;
97103
}
98104

99105
@Override
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package com.codefork.refine.viaf;
2+
3+
import org.junit.Test;
4+
5+
import javax.xml.parsers.SAXParser;
6+
import javax.xml.parsers.SAXParserFactory;
7+
import java.io.InputStream;
8+
import java.util.List;
9+
10+
import static org.junit.Assert.assertEquals;
11+
12+
public class VIAFParserTest {
13+
14+
private static String joinStrings(List<String> strings, String delimiter) {
15+
StringBuilder b = new StringBuilder();
16+
for(String s : strings) {
17+
if(b.length() > 0) {
18+
b.append(delimiter);
19+
}
20+
b.append(s);
21+
}
22+
return b.toString();
23+
}
24+
25+
@Test
26+
public void testParseNames() throws Exception {
27+
SAXParserFactory spf = SAXParserFactory.newInstance();
28+
SAXParser parser = spf.newSAXParser();
29+
VIAFParser viafParser = new VIAFParser();
30+
31+
InputStream is = getClass().getResourceAsStream("/steinbeck_no_type.xml");
32+
parser.parse(is, viafParser);
33+
34+
List<VIAFResult> results = viafParser.getResults();
35+
36+
VIAFResult firstResult = results.get(0);
37+
VIAFResult secondResult = results.get(1);
38+
39+
assertEquals(10, firstResult.getNameEntries().size());
40+
41+
assertEquals("Steinbeck, John, 1902-1968",
42+
firstResult.getNameEntries().get(0).getName());
43+
assertEquals("LC,BIBSYS,BNF,KRNLK,N6I,LAC,BNE,SUDOC,BAV,BNC,NLI,B2Q,PTBNP,NLP,LNB,SELIBR,NLA,ICCU,NDL,DNB,NUKAT,NKC",
44+
joinStrings(firstResult.getNameEntries().get(0).getSources(), ","));
45+
46+
assertEquals("Steinbeck, John (John Ernst), 1902-1968",
47+
firstResult.getNameEntries().get(1).getName());
48+
assertEquals("NTA",
49+
joinStrings(firstResult.getNameEntries().get(1).getSources(), ","));
50+
51+
assertEquals("NSK,SWNL",
52+
joinStrings(firstResult.getNameEntries().get(2).getSources(), ","));
53+
assertEquals("WKP",
54+
joinStrings(firstResult.getNameEntries().get(3).getSources(), ","));
55+
assertEquals("LNL,EGAXA",
56+
joinStrings(firstResult.getNameEntries().get(4).getSources(), ","));
57+
assertEquals("NLI",
58+
joinStrings(firstResult.getNameEntries().get(5).getSources(), ","));
59+
assertEquals("NLI",
60+
joinStrings(firstResult.getNameEntries().get(6).getSources(), ","));
61+
assertEquals("NLI",
62+
joinStrings(firstResult.getNameEntries().get(7).getSources(), ","));
63+
assertEquals("NLR",
64+
joinStrings(firstResult.getNameEntries().get(8).getSources(), ","));
65+
assertEquals("JPG",
66+
joinStrings(firstResult.getNameEntries().get(9).getSources(), ","));
67+
68+
assertEquals(5, secondResult.getNameEntries().size());
69+
70+
assertEquals("Steinbeck, John 1946-1991",
71+
secondResult.getNameEntries().get(0).getName());
72+
assertEquals("NLP,ICCU,DNB,BNF",
73+
joinStrings(secondResult.getNameEntries().get(0).getSources(), ","));
74+
75+
}
76+
77+
}

src/test/java/com/codefork/refine/viaf/VIAFTest.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,15 @@ public void testSearchNoParticularType() throws Exception {
7070
assertFalse(result1.isMatch());
7171

7272
Result result2 = results.get(1);
73-
assertEquals("Steinbeck, John, 1902-1968. | Of mice and men.", result2.getName());
74-
assertEquals(NameType.Book.asVIAFNameType(), result2.getType().get(0));
75-
assertEquals("180993990", result2.getId());
73+
assertEquals("Steinbeck, John 1946-1991", result2.getName());
74+
assertEquals(NameType.Person.asVIAFNameType(), result2.getType().get(0));
75+
assertEquals("19893647", result2.getId());
7676
assertFalse(result2.isMatch());
7777

7878
Result result3 = results.get(2);
79-
assertEquals("Steinbeck, John 1946-1991", result3.getName());
80-
assertEquals(NameType.Person.asVIAFNameType(), result3.getType().get(0));
81-
assertEquals("19893647", result3.getId());
79+
assertEquals("Steinbeck, John, 1902-1968. | Of mice and men.", result3.getName());
80+
assertEquals(NameType.Book.asVIAFNameType(), result3.getType().get(0));
81+
assertEquals("180993990", result3.getId());
8282
assertFalse(result3.isMatch());
8383
}
8484

@@ -105,15 +105,15 @@ public void testSearchWithSource() throws Exception {
105105
assertFalse(result1.isMatch());
106106

107107
Result result2 = results.get(1);
108-
assertEquals("Nabokov, Vladimir Vladimirovič | Volšebnik", result2.getName());
108+
assertEquals("Nabokov, Vladimir Vladimirovič | Lolita", result2.getName());
109109
assertEquals(NameType.Book.asVIAFNameType(), result2.getType().get(0));
110-
assertEquals("316638111", result2.getId());
110+
assertEquals("176671347", result2.getId());
111111
assertFalse(result2.isMatch());
112112

113113
Result result3 = results.get(2);
114114
assertEquals("Nabokov, Vladimir Vladimirovič | Govori, sjećanje!", result3.getName());
115115
assertEquals(NameType.Book.asVIAFNameType(), result3.getType().get(0));
116-
assertEquals("140144814502844904157", result3.getId());
116+
assertEquals("183561595", result3.getId());
117117
assertFalse(result3.isMatch());
118118
}
119119

src/test/resources/nabokov_nsk.xml

Lines changed: 8674 additions & 2 deletions
Large diffs are not rendered by default.

src/test/resources/shakespeare.xml

Lines changed: 25465 additions & 2 deletions
Large diffs are not rendered by default.

src/test/resources/steinbeck_no_type.xml

Lines changed: 9527 additions & 2 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)