[602974] ForParser now tolerates leading text runs that aren't within a tag.

2010-10-08 17:41:07 -07:00 · 2010-10-08 17:41:07 -07:00 · 8dd5e129c8
--- a/apps/wiki/parser.py
+++ b/apps/wiki/parser.py
@ -135,6 +135,14 @@ class ForParser(object):
            """
            top_level_elements = parser.parseFragment(html)
            container = Element(self.CONTAINER_TAG)
+
+            # Why lxml couldn't just have text nodes, I'll never understand.
+            # Text nodes that come other than first are automatically stuffed
+            # into the tail attrs of the preceding elements by html5lib.
+            if top_level_elements and isinstance(top_level_elements[0],
+                                                 basestring):
+                container.text = top_level_elements.pop(0)
+
            container.extend(top_level_elements)
            return container

--- a/apps/wiki/tests/test_parser.py
+++ b/apps/wiki/tests/test_parser.py
@ -528,3 +528,13 @@ class ForParserTests(TestCase):
                    '<img src="smoo"><span>g</span>')
        balanced_eq('<img src="smoo"><span>g</span>',
                    '<img src="smoo"/><span>g</span>')
+
+    def test_leading_text_nodes(self):
+        """Make sure the parser handles a leading naked run of text.
+
+        Test inner runs of text while we're at it.
+
+        """
+        html = 'A<i>hi</i>B<i>there</i>C'
+        p = ForParser(html)
+        eq_(html, p.to_unicode())