Don't rely on JAXP for ignoring comments (semantics are slightly different than E4X, or JRE parser has a bug, not sure which)

This commit is contained in:
inonit%inonit.com 2007-02-05 19:08:07 +00:00
Родитель 23c795e9da
Коммит 308b2125a4
1 изменённых файлов: 21 добавлений и 3 удалений

Просмотреть файл

@ -103,12 +103,22 @@ class XmlProcessor {
}
}
private void addCommentsTo(java.util.Vector v, Node node) {
if (node instanceof Comment) {
v.add(node);
}
if (node.getChildNodes() != null) {
for (int i=0; i<node.getChildNodes().getLength(); i++) {
addProcessingInstructionsTo(v, node.getChildNodes().item(i));
}
}
}
private void addTextNodesToRemoveAndTrim(java.util.Vector toRemove, Node node) {
if (node instanceof Text) {
Text text = (Text)node;
String value = text.getData();
text.setData(text.getData().trim());
if (value.trim().length() == 0) {
if (text.getData().length() == 0) {
toRemove.add(node);
}
}
@ -135,7 +145,7 @@ class XmlProcessor {
// See ECMA357 10.3.1
javax.xml.parsers.DocumentBuilderFactory domFactory = newDomFactory();
domFactory.setNamespaceAware(true);
domFactory.setIgnoringComments(ignoreComments);
domFactory.setIgnoringComments(false);
try {
String syntheticXml = "<parent xmlns=\"" + defaultNamespaceUri + "\">" + xml + "</parent>";
Document document = domFactory.newDocumentBuilder().parse( new org.xml.sax.InputSource(new java.io.StringReader(syntheticXml)) );
@ -147,6 +157,14 @@ class XmlProcessor {
node.getParentNode().removeChild(node);
}
}
if (ignoreComments) {
java.util.Vector v = new java.util.Vector();
addCommentsTo(v, document);
for (int i=0; i<v.size(); i++) {
Node node = (Node)v.elementAt(i);
node.getParentNode().removeChild(node);
}
}
if (ignoreWhitespace) {
// Apparently JAXP setIgnoringElementContentWhitespace() has a different meaning, it appears from the Javadoc
// Refers to element-only content models, which means we would need to have a validating parser and DTD or schema