|
|
@ -15,7 +15,6 @@ import email.utils
|
|
|
|
import gzip
|
|
|
|
import gzip
|
|
|
|
import htmlentitydefs
|
|
|
|
import htmlentitydefs
|
|
|
|
import httplib
|
|
|
|
import httplib
|
|
|
|
import json # TODO: json for 2.5
|
|
|
|
|
|
|
|
import locale
|
|
|
|
import locale
|
|
|
|
import math
|
|
|
|
import math
|
|
|
|
import netrc
|
|
|
|
import netrc
|
|
|
@ -24,20 +23,35 @@ import os.path
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
import socket
|
|
|
|
import socket
|
|
|
|
import string
|
|
|
|
import string
|
|
|
|
import StringIO
|
|
|
|
|
|
|
|
import subprocess
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import time
|
|
|
|
import urllib
|
|
|
|
import urllib
|
|
|
|
import urllib2
|
|
|
|
import urllib2
|
|
|
|
|
|
|
|
import warnings
|
|
|
|
import zlib
|
|
|
|
import zlib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
|
|
warnings.warn('No JSON support (TODO: insert trivialjson here)')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
import cStringIO as StringIO
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
|
|
import StringIO
|
|
|
|
|
|
|
|
|
|
|
|
# parse_qs was moved from the cgi module to the urlparse module recently.
|
|
|
|
# parse_qs was moved from the cgi module to the urlparse module recently.
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
from urlparse import parse_qs
|
|
|
|
from urlparse import parse_qs
|
|
|
|
except ImportError:
|
|
|
|
except ImportError:
|
|
|
|
from cgi import parse_qs
|
|
|
|
from cgi import parse_qs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
import lxml.etree
|
|
|
|
|
|
|
|
except ImportError: # Python < 2.6
|
|
|
|
|
|
|
|
pass # Handled below
|
|
|
|
|
|
|
|
|
|
|
|
std_headers = {
|
|
|
|
std_headers = {
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
|
|
|
|
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
|
|
|
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
|
|
@ -1068,11 +1082,19 @@ class YoutubeIE(InfoExtractor):
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# description
|
|
|
|
# description
|
|
|
|
video_description = 'No description available.'
|
|
|
|
try:
|
|
|
|
if self._downloader.params.get('forcedescription', False):
|
|
|
|
lxml.etree
|
|
|
|
mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
|
|
|
|
except NameError:
|
|
|
|
if mobj is not None:
|
|
|
|
video_description = u'No description available.'
|
|
|
|
video_description = mobj.group(1)
|
|
|
|
if self._downloader.params.get('forcedescription', False):
|
|
|
|
|
|
|
|
warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.')
|
|
|
|
|
|
|
|
mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
|
|
|
|
|
|
|
|
if mobj is not None:
|
|
|
|
|
|
|
|
video_description = mobj.group(1).decode('utf-8')
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
html_parser = lxml.etree.HTMLParser(encoding='utf-8')
|
|
|
|
|
|
|
|
vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
|
|
|
|
|
|
|
|
video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
|
|
|
|
|
|
|
|
|
|
|
|
# token
|
|
|
|
# token
|
|
|
|
video_token = urllib.unquote_plus(video_info['token'][0])
|
|
|
|
video_token = urllib.unquote_plus(video_info['token'][0])
|
|
|
@ -1130,7 +1152,7 @@ class YoutubeIE(InfoExtractor):
|
|
|
|
'ext': video_extension.decode('utf-8'),
|
|
|
|
'ext': video_extension.decode('utf-8'),
|
|
|
|
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
|
|
|
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
|
|
|
'thumbnail': video_thumbnail.decode('utf-8'),
|
|
|
|
'thumbnail': video_thumbnail.decode('utf-8'),
|
|
|
|
'description': video_description.decode('utf-8'),
|
|
|
|
'description': video_description,
|
|
|
|
'player_url': player_url,
|
|
|
|
'player_url': player_url,
|
|
|
|
})
|
|
|
|
})
|
|
|
|
except UnavailableVideoError, err:
|
|
|
|
except UnavailableVideoError, err:
|
|
|
|