|
|
|
@ -1787,6 +1787,9 @@ Line 1
|
|
|
|
|
GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = '''
|
|
|
|
|
<div itemprop="author" itemscope>foo</div>
|
|
|
|
|
'''
|
|
|
|
|
VOID_ELEMENT_TEST_STRING = '''
|
|
|
|
|
<img alt="foo" src="bar.png"><img alt="foobar" src="baz.jpg"><img alt="foo"/>
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
def test_get_element_by_attribute(self):
|
|
|
|
|
html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
|
|
|
|
@ -1799,6 +1802,10 @@ Line 1
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
|
|
|
|
|
|
|
|
|
|
html = self.VOID_ELEMENT_TEST_STRING
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_element_by_attribute('alt', 'foo', html), '')
|
|
|
|
|
|
|
|
|
|
def test_get_element_html_by_attribute(self):
|
|
|
|
|
html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
|
|
|
|
|
|
|
|
|
@ -1810,6 +1817,10 @@ Line 1
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip())
|
|
|
|
|
|
|
|
|
|
html = self.VOID_ELEMENT_TEST_STRING
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_element_html_by_attribute('alt', 'foo', html), '<img alt="foo" src="bar.png">')
|
|
|
|
|
|
|
|
|
|
GET_ELEMENTS_BY_CLASS_TEST_STRING = '''
|
|
|
|
|
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
|
|
|
|
'''
|
|
|
|
@ -1834,6 +1845,10 @@ Line 1
|
|
|
|
|
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
|
|
|
|
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
|
|
|
|
|
|
|
|
|
html = self.VOID_ELEMENT_TEST_STRING
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_elements_by_attribute('alt', 'foo', html), ['', ''])
|
|
|
|
|
|
|
|
|
|
def test_get_elements_html_by_attribute(self):
|
|
|
|
|
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
|
|
|
|
|
|
|
|
@ -1841,6 +1856,11 @@ Line 1
|
|
|
|
|
self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), [])
|
|
|
|
|
self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), [])
|
|
|
|
|
|
|
|
|
|
html = self.VOID_ELEMENT_TEST_STRING
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_elements_html_by_attribute(
|
|
|
|
|
'alt', 'foo', html), ['<img alt="foo" src="bar.png">', '<img alt="foo"/>'])
|
|
|
|
|
|
|
|
|
|
def test_get_elements_text_and_html_by_attribute(self):
|
|
|
|
|
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
|
|
|
|
|
|
|
|
@ -1853,6 +1873,11 @@ Line 1
|
|
|
|
|
self.assertEqual(list(get_elements_text_and_html_by_attribute(
|
|
|
|
|
'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')])
|
|
|
|
|
|
|
|
|
|
html = self.VOID_ELEMENT_TEST_STRING
|
|
|
|
|
|
|
|
|
|
self.assertEqual(list(get_elements_text_and_html_by_attribute(
|
|
|
|
|
'alt', 'foo', html, tag='img')), [('', '<img alt="foo" src="bar.png">'), ('', '<img alt="foo"/>')])
|
|
|
|
|
|
|
|
|
|
GET_ELEMENT_BY_TAG_TEST_STRING = '''
|
|
|
|
|
random text lorem ipsum</p>
|
|
|
|
|
<div>
|
|
|
|
@ -1881,6 +1906,10 @@ Line 1
|
|
|
|
|
(self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
|
|
|
|
|
self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
|
|
|
|
|
|
|
|
|
|
html = self.VOID_ELEMENT_TEST_STRING
|
|
|
|
|
|
|
|
|
|
self.assertEqual(get_element_text_and_html_by_tag('img', html), ('', '<img alt="foo" src="bar.png">'))
|
|
|
|
|
|
|
|
|
|
def test_iri_to_uri(self):
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
|
|
|
|
|