|
|
@ -15,6 +15,7 @@ import email.utils
|
|
|
|
import gzip
|
|
|
|
import gzip
|
|
|
|
import htmlentitydefs
|
|
|
|
import htmlentitydefs
|
|
|
|
import httplib
|
|
|
|
import httplib
|
|
|
|
|
|
|
|
import json # TODO: json for 2.5
|
|
|
|
import locale
|
|
|
|
import locale
|
|
|
|
import math
|
|
|
|
import math
|
|
|
|
import netrc
|
|
|
|
import netrc
|
|
|
@ -2563,6 +2564,80 @@ class FacebookIE(InfoExtractor):
|
|
|
|
except UnavailableVideoError, err:
|
|
|
|
except UnavailableVideoError, err:
|
|
|
|
self._downloader.trouble(u'\nERROR: unable to download video')
|
|
|
|
self._downloader.trouble(u'\nERROR: unable to download video')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BlipTVIE(InfoExtractor):
|
|
|
|
|
|
|
|
"""Information extractor for blip.tv"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$'
|
|
|
|
|
|
|
|
_URL_EXT = r'^.*\.([a-z0-9]+)$'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
|
|
def suitable(url):
|
|
|
|
|
|
|
|
return (re.match(BlipTVIE._VALID_URL, url) is not None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def report_download_webpage(self, file_id):
|
|
|
|
|
|
|
|
"""Report webpage download."""
|
|
|
|
|
|
|
|
self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def report_extraction(self, file_id):
|
|
|
|
|
|
|
|
"""Report information extraction."""
|
|
|
|
|
|
|
|
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
|
|
def service_name(self):
|
|
|
|
|
|
|
|
return u'blip.tv'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _simplify_title(self, title):
|
|
|
|
|
|
|
|
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
|
|
|
|
|
|
|
res = res.strip(ur'_')
|
|
|
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
|
|
|
mobj = re.match(self._VALID_URL, url)
|
|
|
|
|
|
|
|
if mobj is None:
|
|
|
|
|
|
|
|
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1'
|
|
|
|
|
|
|
|
request = urllib2.Request(json_url)
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
json_code = urllib2.urlopen(request).read()
|
|
|
|
|
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
|
|
|
|
|
|
|
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
json_data = json.loads(json_code)
|
|
|
|
|
|
|
|
data = json_data['Post']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
|
|
|
|
|
|
|
video_url = data['media']['url']
|
|
|
|
|
|
|
|
umobj = re.match(self._URL_EXT, video_url)
|
|
|
|
|
|
|
|
if umobj is None:
|
|
|
|
|
|
|
|
raise ValueError('Can not determine filename extension')
|
|
|
|
|
|
|
|
ext = umobj.group(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info = {
|
|
|
|
|
|
|
|
'id': data['item_id'],
|
|
|
|
|
|
|
|
'url': video_url,
|
|
|
|
|
|
|
|
'uploader': data['display_name'],
|
|
|
|
|
|
|
|
'upload_date': upload_date,
|
|
|
|
|
|
|
|
'title': data['title'],
|
|
|
|
|
|
|
|
'stitle': self._simplify_title(data['title']),
|
|
|
|
|
|
|
|
'ext': ext,
|
|
|
|
|
|
|
|
'format': data['media']['mimeType'],
|
|
|
|
|
|
|
|
'thumbnail': data['thumbnailUrl'],
|
|
|
|
|
|
|
|
'description': data['description'],
|
|
|
|
|
|
|
|
'player_url': data['embedUrl']
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
except (ValueError,KeyError), err:
|
|
|
|
|
|
|
|
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err))
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
self._downloader.process_info(info)
|
|
|
|
|
|
|
|
except UnavailableVideoError, err:
|
|
|
|
|
|
|
|
self._downloader.trouble(u'\nERROR: unable to download video')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PostProcessor(object):
|
|
|
|
class PostProcessor(object):
|
|
|
|
"""Post Processor class.
|
|
|
|
"""Post Processor class.
|
|
|
|
|
|
|
|
|
|
|
@ -2911,6 +2986,7 @@ if __name__ == '__main__':
|
|
|
|
yahoo_search_ie = YahooSearchIE(yahoo_ie)
|
|
|
|
yahoo_search_ie = YahooSearchIE(yahoo_ie)
|
|
|
|
deposit_files_ie = DepositFilesIE()
|
|
|
|
deposit_files_ie = DepositFilesIE()
|
|
|
|
facebook_ie = FacebookIE()
|
|
|
|
facebook_ie = FacebookIE()
|
|
|
|
|
|
|
|
bliptv_ie = BlipTVIE()
|
|
|
|
generic_ie = GenericIE()
|
|
|
|
generic_ie = GenericIE()
|
|
|
|
|
|
|
|
|
|
|
|
# File downloader
|
|
|
|
# File downloader
|
|
|
@ -2963,6 +3039,7 @@ if __name__ == '__main__':
|
|
|
|
fd.add_info_extractor(yahoo_search_ie)
|
|
|
|
fd.add_info_extractor(yahoo_search_ie)
|
|
|
|
fd.add_info_extractor(deposit_files_ie)
|
|
|
|
fd.add_info_extractor(deposit_files_ie)
|
|
|
|
fd.add_info_extractor(facebook_ie)
|
|
|
|
fd.add_info_extractor(facebook_ie)
|
|
|
|
|
|
|
|
fd.add_info_extractor(bliptv_ie)
|
|
|
|
|
|
|
|
|
|
|
|
# This must come last since it's the
|
|
|
|
# This must come last since it's the
|
|
|
|
# fallback if none of the others work
|
|
|
|
# fallback if none of the others work
|
|
|
|