Skip to content

Commit be19365

Browse files
author
Kai Pöykiö
committed
remove xml/html entities from BOKR bibs
1 parent b844d44 commit be19365

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import org.apache.commons.lang3.StringEscapeUtils
2+
3+
String qry = """
4+
collection = 'bib'
5+
and deleted = 'false'
6+
and data#>>'{@graph,0,descriptionCreator,@id}' = 'https://libris.kb.se/library/BOKR'
7+
"""
8+
9+
def count = 0
10+
11+
selectBySqlWhere(qry) { b ->
12+
13+
def item = b.getGraph()[1]
14+
15+
item.summary.each { summary ->
16+
if ( summary.'@type' == 'Summary' ) {
17+
//decode xml/html entities
18+
if ( summary.label =~ /&[^ ]+;/ ) {
19+
if (summary.label instanceof List) {
20+
summary.label = StringEscapeUtils.unescapeXml(StringEscapeUtils.unescapeHtml4(summary.label.join("\n")))
21+
} else {
22+
summary.label = StringEscapeUtils.unescapeXml(StringEscapeUtils.unescapeHtml4(summary.label))
23+
}
24+
25+
//resave
26+
b.scheduleSave(loud: true)
27+
28+
//println(b.getGraph()[1])
29+
count++
30+
}
31+
}
32+
}
33+
}
34+
35+
println('Changed:')
36+
println(count)
37+

0 commit comments

Comments
 (0)