[utils] Handle HTMLParseError in extract_attributes (closes #13349)

8 years ago · b4a3d461e4
parent 72b409559c
commit b4a3d461e4
2 changed files with 9 additions and 2 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase):
            supports_outside_bmp = False
        if supports_outside_bmp:
            self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+        # Malformed HTML should not break attributes extraction on older Python
+        self.assertEqual(extract_attributes('<mal"formed/>'), {})

    def test_clean_html(self):
        self.assertEqual(clean_html('a:\nb'), 'a: b')
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -36,6 +36,7 @@ import xml.etree.ElementTree
 import zlib

 from .compat import (
+    compat_HTMLParseError,
    compat_HTMLParser,
    compat_basestring,
    compat_chr,
@ -409,8 +410,12 @@ def extract_attributes(html_element):
    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
    """
    parser = HTMLAttributeParser()
+    try:
        parser.feed(html_element)
        parser.close()
+    # Older Python may throw HTMLParseError in case of malformed HTML
+    except compat_HTMLParseError:
+        pass
    return parser.attrs