Don't rely on JAXP for ignoring comments (semantics are slightly different than E4X, or JRE parser has a bug, not sure which)

2007-02-05 19:08:07 +00:00 · 2007-02-05 19:08:07 +00:00 · 308b2125a4
--- a/js/rhino/xmlimplsrc/org/mozilla/javascript/xmlimpl/XmlProcessor.java
+++ b/js/rhino/xmlimplsrc/org/mozilla/javascript/xmlimpl/XmlProcessor.java
@ -103,12 +103,22 @@ class XmlProcessor {
 		}
 	}
 	
+	private void addCommentsTo(java.util.Vector v, Node node) {
+		if (node instanceof Comment) {
+			v.add(node);
+		}
+		if (node.getChildNodes() != null) {
+			for (int i=0; i<node.getChildNodes().getLength(); i++) {
+				addProcessingInstructionsTo(v, node.getChildNodes().item(i));
+			}
+		}
+	}
+	
 	private void addTextNodesToRemoveAndTrim(java.util.Vector toRemove, Node node) {
 		if (node instanceof Text) {
 			Text text = (Text)node;
-			String value = text.getData();
 			text.setData(text.getData().trim());
-			if (value.trim().length() == 0) {
+			if (text.getData().length() == 0) {
 				toRemove.add(node);
 			}
 		}
@ -135,7 +145,7 @@ class XmlProcessor {
 		//	See ECMA357 10.3.1
 		javax.xml.parsers.DocumentBuilderFactory domFactory = newDomFactory();
 		domFactory.setNamespaceAware(true);
-		domFactory.setIgnoringComments(ignoreComments);
+		domFactory.setIgnoringComments(false);
 		try {
 			String syntheticXml = "<parent xmlns=\"" + defaultNamespaceUri + "\">" + xml + "</parent>";
 			Document document = domFactory.newDocumentBuilder().parse( new org.xml.sax.InputSource(new java.io.StringReader(syntheticXml)) );
@ -147,6 +157,14 @@ class XmlProcessor {
 					node.getParentNode().removeChild(node);
 				}
 			}
+			if (ignoreComments) {
+				java.util.Vector v = new java.util.Vector();
+				addCommentsTo(v, document);
+				for (int i=0; i<v.size(); i++) {
+					Node node = (Node)v.elementAt(i);
+					node.getParentNode().removeChild(node);
+				}
+			}
 			if (ignoreWhitespace) {
 				//	Apparently JAXP setIgnoringElementContentWhitespace() has a different meaning, it appears from the Javadoc
 				//	Refers to element-only content models, which means we would need to have a validating parser and DTD or schema