[core] utils: fix some cases

pull/11736/head
Allen 1 month ago
parent ec3a0927c4
commit c49aa772cc
No known key found for this signature in database

@ -1846,7 +1846,7 @@ Line 1
random text lorem ipsum</p> random text lorem ipsum</p>
<div> <div>
this should be returned this should be returned
<span>this should also be returned</span> <SPAN>this should also be returned</SPAN>
<div> <div>
this should also be returned this should also be returned
</div> </div>
@ -1859,10 +1859,6 @@ Line 1
GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE = '''
<SPAN id="foo">nice</SPAN>
'''
def test_get_element_text_and_html_by_tag(self): def test_get_element_text_and_html_by_tag(self):
html = self.GET_ELEMENT_BY_TAG_TEST_STRING html = self.GET_ELEMENT_BY_TAG_TEST_STRING
@ -1872,14 +1868,11 @@ Line 1
self.assertEqual( self.assertEqual(
get_element_text_and_html_by_tag('span', html), get_element_text_and_html_by_tag('span', html),
(self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
html = self.GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE
self.assertEqual( self.assertEqual(
get_element_text_and_html_by_tag('SPAN', html), get_element_text_and_html_by_tag('SPAN', html),
('nice', html.strip()), html) (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
def test_iri_to_uri(self): def test_iri_to_uri(self):
self.assertEqual( self.assertEqual(

@ -430,10 +430,14 @@ def get_element_text_and_html_by_tag(tag, html):
return its' content (text) and the whole element (html) return its' content (text) and the whole element (html)
""" """
def find_or_raise(haystack, needle, exc): def find_or_raise(haystack, needle, exc):
try: with contextlib.suppress(ValueError):
return haystack.index(needle) return haystack.index(needle)
except ValueError:
raise exc with contextlib.suppress(ValueError):
return haystack.index(needle.upper())
raise exc
closing_tag = f'</{tag}>' closing_tag = f'</{tag}>'
whole_start = find_or_raise( whole_start = find_or_raise(
html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found')) html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))

Loading…
Cancel
Save