[602974] ForParser now tolerates leading text runs that aren't within a tag.

This commit is contained in:
Erik Rose 2010-10-08 17:41:07 -07:00
Родитель 23c860974b
Коммит 8dd5e129c8
2 изменённых файлов: 18 добавлений и 0 удалений

Просмотреть файл

@ -135,6 +135,14 @@ class ForParser(object):
"""
top_level_elements = parser.parseFragment(html)
container = Element(self.CONTAINER_TAG)
# Why lxml couldn't just have text nodes, I'll never understand.
# Text nodes that come other than first are automatically stuffed
# into the tail attrs of the preceding elements by html5lib.
if top_level_elements and isinstance(top_level_elements[0],
basestring):
container.text = top_level_elements.pop(0)
container.extend(top_level_elements)
return container

Просмотреть файл

@ -528,3 +528,13 @@ class ForParserTests(TestCase):
'<img src="smoo"><span>g</span>')
balanced_eq('<img src="smoo"><span>g</span>',
'<img src="smoo"/><span>g</span>')
def test_leading_text_nodes(self):
"""Make sure the parser handles a leading naked run of text.
Test inner runs of text while we're at it.
"""
html = 'A<i>hi</i>B<i>there</i>C'
p = ForParser(html)
eq_(html, p.to_unicode())