[parsing] return unclosed matched tags

pull/5588/head
Marcel 3 years ago
parent 8451074b50
commit dbf350c122
No known key found for this signature in database
GPG Key ID: 7813C97693AD6AAE

@ -218,8 +218,9 @@ class TestParsing(unittest.TestCase):
get_element_text_and_html_by_tag('malnested_b', html),
(f'{inner_text}</malnested_a>',
f'<malnested_b>{inner_text}</malnested_a></malnested_b>'))
self.assertEqual(
get_element_text_and_html_by_tag('orphan', f'<orphan>{html}'), ('', '<orphan>'))
self.assertIsNone(get_element_text_and_html_by_tag('orphan', f'{html}</orphan>'))
self.assertIsNone(get_element_text_and_html_by_tag('orphan', f'<orphan>{html}'))
def test_strict_html_parsing(self):
class StrictTagParser(HTMLTagParser):
@ -244,13 +245,13 @@ class TestParsing(unittest.TestCase):
parser = HTMLTagParser()
self.assertEqual(parser.taglist('</p>', reset=True), [])
self.assertEqual(parser.taglist('<div><p>', reset=True), [])
self.assertEqual(parser.taglist('<div><p>', reset=True), [Tag('div'), Tag('p')])
tags = parser.taglist('<div><p></div></p>', reset=True)
self.assertEqual(tags, [Tag('p'), Tag('div')])
self.assertEqual(tags, [Tag('div'), Tag('p')])
tags = parser.taglist('<div><p>/p></div>', reset=True)
self.assertEqual(tags, [Tag('div')])
self.assertEqual(tags, [Tag('div'), Tag('p')])
tags = parser.taglist('<div><p>paragraph</p<ignored></div>', reset=True)
self.assertEqual(tags, [Tag('div'), Tag('p')])

@ -190,7 +190,7 @@ class HTMLTagParser(HTMLParser):
tag_obj = self.Tag(tag, string=self.rawdata, attrs=attrs)
tag_obj.openrange(self._offset, len(tag_text))
if tag_is_open:
nesting = []
nesting = [tag_obj]
self._nestedtags[-1].append(nesting)
self._nestedtags.append(nesting)
else:
@ -218,7 +218,7 @@ class HTMLTagParser(HTMLParser):
if isinstance(tag_obj, self.Tag):
close_idx = self.rawdata.find('>', self._offset) + 1
tag_obj.closerange(self._offset, close_idx - self._offset)
self._nestedtags.pop().insert(0, tag_obj)
self._nestedtags.pop()
self.callback(tag_obj)
except ValueError as exc:
if isinstance(exc, compat_HTMLParseError):

Loading…
Cancel
Save