[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf6.
pull/9204/merge
Yen Chi Hsuan 9 years ago
parent bdafd88da0
commit fd6ca38262

@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return mobj.group('url')
# Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player
mobj = re.search(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)''', webpage)
if mobj is not None:
return mobj.group('url')
def _login(self): def _login(self):
(useremail, password) = self._get_login_info() (useremail, password) = self._get_login_info()
if useremail is None: if useremail is None:

@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE
from .vessel import VesselIE from .vessel import VesselIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor):
'uploader': 'TheAtlantic', 'uploader': 'TheAtlantic',
}, },
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveLegacy'],
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
'info_dict': {
'id': '599637780109885',
'ext': 'mp4',
'title': 'Facebook video #599637780109885',
},
},
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
'info_dict': {
'id': '10153467542406923',
'ext': 'mp4',
'title': 'Facebook video #10153467542406923',
},
} }
] ]
@ -1759,10 +1778,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
mobj = re.search( facebook_url = FacebookIE._extract_url(webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) if facebook_url is not None:
if mobj is not None: return self.url_result(facebook_url, 'Facebook')
return self.url_result(mobj.group('url'), 'Facebook')
# Look for embedded VK player # Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)

Loading…
Cancel
Save