[empflix] Revert to XML parser

Don't rely on the XML being broken (if they fix it, our code wouldn't work anymore). Instead, use the transform function we already have :) This partially reverts commit c7bee2a725.
11 years ago · 8f1ea7cbb6
parent a204c85408
commit 8f1ea7cbb6
1 changed files with 8 additions and 10 deletions
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import fix_xml_ampersands
 class EmpflixIE(InfoExtractor):
@ -35,20 +36,17 @@ class EmpflixIE(InfoExtractor):
            r'flashvars\.config = escape\("([^"]+)"',
            webpage, 'flashvars.config')
-        # XML is malformed
+        cfg_xml = self._download_xml(
-        cfg_xml = self._download_webpage(
+            cfg_url, video_id, note='Downloading metadata',
-            cfg_url, video_id, note='Downloading metadata')
+            transform_source=fix_xml_ampersands)
        formats = [
            {
-                'url': item[1],
+                'url': item.find('videoLink').text,
-                'format_id': item[0],
+                'format_id': item.find('res').text,
-            } for item in re.findall(
+            } for item in cfg_xml.findall('./quality/item')
                r'<item>\s*<res>([^>]+)</res>\s*<videoLink>([^<]+)</videoLink>\s*</item>', cfg_xml)
        ]
-
+        thumbnail = cfg_xml.find('./startThumb').text
        thumbnail = self._html_search_regex(
            r'<startThumb>([^<]+)</startThumb>', cfg_xml, 'thumbnail', fatal=False)
        return {
            'id': video_id,