Skip to content

Commit b005520

Browse files
authored
Merge pull request #112 from melissalinkert/xml-parsing
Standardize XML parsing
2 parents ee2ceae + 710a919 commit b005520

1 file changed

Lines changed: 69 additions & 11 deletions

File tree

src/main/java/loci/common/xml/XMLTools.java

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,13 @@
5050
import java.util.HashMap;
5151
import java.util.HashSet;
5252
import java.util.Hashtable;
53+
import java.util.Map;
5354
import java.util.Set;
5455
import java.util.StringTokenizer;
5556
import java.util.regex.Matcher;
5657
import java.util.regex.Pattern;
5758

59+
import javax.xml.XMLConstants;
5860
import javax.xml.parsers.DocumentBuilder;
5961
import javax.xml.parsers.DocumentBuilderFactory;
6062
import javax.xml.parsers.ParserConfigurationException;
@@ -118,6 +120,17 @@ private static TransformerFactory createTransformFactory() {
118120
return factory;
119121
};
120122

123+
private static final Map<String, Boolean> FEATURES = createXMLParserFeatures();
124+
125+
private static Map<String, Boolean> createXMLParserFeatures() {
126+
HashMap<String, Boolean> features = new HashMap<String, Boolean>();
127+
features.put(XMLConstants.FEATURE_SECURE_PROCESSING, true);
128+
features.put("http://xml.org/sax/features/external-general-entities", false);
129+
features.put("http://xml.org/sax/features/external-parameter-entities", false);
130+
features.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
131+
return features;
132+
};
133+
121134
// -- Interfaces --
122135

123136
/**
@@ -159,7 +172,18 @@ private XMLTools() { }
159172
*/
160173
public static DocumentBuilder createBuilder() {
161174
try {
162-
return DocumentBuilderFactory.newInstance().newDocumentBuilder();
175+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
176+
factory.setXIncludeAware(false);
177+
factory.setExpandEntityReferences(false);
178+
for (String feature : FEATURES.keySet()) {
179+
try {
180+
factory.setFeature(feature, FEATURES.get(feature));
181+
}
182+
catch (ParserConfigurationException e) {
183+
LOGGER.debug("Parser does not support feature " + feature, e);
184+
}
185+
}
186+
return factory.newDocumentBuilder();
163187
}
164188
catch (ParserConfigurationException e) {
165189
LOGGER.error("Cannot create DocumentBuilder", e);
@@ -196,7 +220,7 @@ public static Document parseDOM(File file)
196220
}
197221

198222
/**
199-
* Parses a DOM from the given XML string.
223+
* Parses a DOM from the given XML string, using UTF-8 encoding.
200224
*
201225
* @param xml XML data
202226
* @return a {@link Document} reflecting the XML string
@@ -207,7 +231,23 @@ public static Document parseDOM(File file)
207231
public static Document parseDOM(String xml)
208232
throws ParserConfigurationException, SAXException, IOException
209233
{
210-
byte[] bytes = xml.getBytes(Constants.ENCODING);
234+
return parseDOM(xml, Constants.ENCODING);
235+
}
236+
237+
/**
238+
* Parses a DOM from the given XML string, using the given encoding.
239+
*
240+
* @param xml XML data
241+
* @param encoding charset name
242+
* @return a {@link Document} reflecting the XML string
243+
* @throws ParserConfigurationException if the XML parser cannot be created
244+
* @throws SAXException if there is an error parsing the XML
245+
* @throws IOException if there is an error reading from the file
246+
*/
247+
public static Document parseDOM(String xml, String encoding)
248+
throws ParserConfigurationException, SAXException, IOException
249+
{
250+
byte[] bytes = xml.getBytes(encoding);
211251
try (InputStream is = new ByteArrayInputStream(bytes)) {
212252
Document doc = parseDOM(is);
213253
return doc;
@@ -231,8 +271,7 @@ public static Document parseDOM(InputStream is)
231271
checkUTF8(in);
232272

233273
// Java XML factories are not declared to be thread safe
234-
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
235-
DocumentBuilder db = factory.newDocumentBuilder();
274+
DocumentBuilder db = createBuilder();
236275
db.setErrorHandler(new ParserErrorHandler());
237276
return db.parse(in);
238277
}
@@ -469,6 +508,29 @@ public static String indentXML(String xml, int spacing,
469508

470509
// -- Parsing --
471510

511+
/**
512+
* Create a new SAX parser.
513+
*
514+
* @throws ParserConfigurationException
515+
* @throws SAXException
516+
*/
517+
public static SAXParser createSAXParser()
518+
throws ParserConfigurationException, SAXException
519+
{
520+
// Java XML factories are not declared to be thread safe
521+
SAXParserFactory factory = SAXParserFactory.newInstance();
522+
factory.setXIncludeAware(false);
523+
for (String feature : FEATURES.keySet()) {
524+
try {
525+
factory.setFeature(feature, FEATURES.get(feature));
526+
}
527+
catch (ParserConfigurationException e) {
528+
LOGGER.debug("Parser does not support feature " + feature, e);
529+
}
530+
}
531+
return factory.newSAXParser();
532+
}
533+
472534
/**
473535
* Parses the given XML string into a list of key/value pairs.
474536
*
@@ -539,9 +601,7 @@ public static void parseXML(InputStream xml, DefaultHandler handler)
539601
throws IOException
540602
{
541603
try {
542-
// Java XML factories are not declared to be thread safe
543-
SAXParserFactory factory = SAXParserFactory.newInstance();
544-
SAXParser parser = factory.newSAXParser();
604+
SAXParser parser = createSAXParser();
545605
parser.parse(xml, handler);
546606
}
547607
catch (ParserConfigurationException exc) {
@@ -797,9 +857,7 @@ public static boolean validateXML(String xml, String label,
797857
LOGGER.info("Parsing schema path");
798858
ValidationSAXHandler saxHandler = new ValidationSAXHandler();
799859
try {
800-
// Java XML factories are not declared to be thread safe
801-
SAXParserFactory factory = SAXParserFactory.newInstance();
802-
SAXParser saxParser = factory.newSAXParser();
860+
SAXParser saxParser = createSAXParser();
803861
InputStream is =
804862
new ByteArrayInputStream(xml.getBytes(Constants.ENCODING));
805863
saxParser.parse(is, saxHandler);

0 commit comments

Comments
 (0)