|
|
|
@ -273,15 +273,17 @@ def get_element_by_attribute(attribute, value, html):
|
|
|
|
|
|
|
|
|
|
return unescapeHTML(res)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HTMLAttributeParser(compat_HTMLParser):
|
|
|
|
|
"""Trivial HTML parser to gather the attributes for a single element"""
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.attrs = { }
|
|
|
|
|
self.attrs = {}
|
|
|
|
|
compat_HTMLParser.__init__(self)
|
|
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
|
|
self.attrs = dict(attrs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_attributes(html_element):
|
|
|
|
|
"""Given a string for an HTML element such as
|
|
|
|
|
<el
|
|
|
|
@ -303,6 +305,7 @@ def extract_attributes(html_element):
|
|
|
|
|
parser.close()
|
|
|
|
|
return parser.attrs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clean_html(html):
|
|
|
|
|
"""Clean an HTML snippet into a readable string"""
|
|
|
|
|
|
|
|
|
|