Merge remote-tracking branch 'origin/master'

Conflicts:
	youtube_dl/YoutubeDL.py
pull/8/head
Philipp Hagemeister 10 years ago
commit dcca581967

@ -38,6 +38,7 @@ from youtube_dl.utils import (
parse_iso8601, parse_iso8601,
read_batch_urls, read_batch_urls,
sanitize_filename, sanitize_filename,
sanitize_path,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_to_int, str_to_int,
@ -131,6 +132,37 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
def test_sanitize_path(self):
if sys.platform != 'win32':
return
self.assertEqual(sanitize_path('abc'), 'abc')
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
self.assertEqual(
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
self.assertEqual(
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([]), [])

@ -61,6 +61,7 @@ from .utils import (
render_table, render_table,
SameFileError, SameFileError,
sanitize_filename, sanitize_filename,
sanitize_path,
std_headers, std_headers,
subtitles_filename, subtitles_filename,
takewhile_inclusive, takewhile_inclusive,
@ -562,7 +563,7 @@ class YoutubeDL(object):
if v is not None) if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict) template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
tmpl = compat_expanduser(outtmpl) tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict filename = tmpl % template_dict
# Temporary fix for #4787 # Temporary fix for #4787
@ -1261,7 +1262,7 @@ class YoutubeDL(object):
return return
try: try:
dn = os.path.dirname(encodeFilename(filename)) dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn): if dn and not os.path.exists(dn):
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError) as err: except (OSError, IOError) as err:

@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
boot_info = self._get_bootstrap_from_url(bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url)
else: else:
bootstrap_url = None bootstrap_url = None
bootstrap = base64.b64decode(node.text) bootstrap = base64.b64decode(node.text.encode('ascii'))
boot_info = read_bootstrap_info(bootstrap) boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url) return (boot_info, bootstrap_url)
@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
live = boot_info['live'] live = boot_info['live']
metadata_node = media.find(_add_ns('metadata')) metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None: if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text) metadata = base64.b64decode(metadata_node.text.encode('ascii'))
else: else:
metadata = None metadata = None

@ -175,6 +175,7 @@ from .gameone import (
from .gamespot import GameSpotIE from .gamespot import GameSpotIE
from .gamestar import GameStarIE from .gamestar import GameStarIE
from .gametrailers import GametrailersIE from .gametrailers import GametrailersIE
from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE from .gdcvault import GDCVaultIE
from .generic import GenericIE from .generic import GenericIE
from .giantbomb import GiantBombIE from .giantbomb import GiantBombIE
@ -363,6 +364,7 @@ from .pbs import PBSIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE from .planetaplay import PlanetaPlayIE
from .pladform import PladformIE
from .played import PlayedIE from .played import PlayedIE
from .playfm import PlayFMIE from .playfm import PlayFMIE
from .playvid import PlayvidIE from .playvid import PlayvidIE

@ -2,13 +2,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
xpath_text,
float_or_none, float_or_none,
xpath_text,
) )
@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
'title': 'American Dad - Putting Francine Out of Business', 'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
}, },
}, {
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
'playlist': [
{
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'flv',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
}
],
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
}] }]
@staticmethod @staticmethod
@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
for video in collection.get('videos'): for video in collection.get('videos'):
if video.get('slug') == slug: if video.get('slug') == slug:
return collection, video return collection, video
return None, None
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
webpage = self._download_webpage(url, episode_path) webpage = self._download_webpage(url, episode_path)
# Extract the value of `bootstrappedData` from the Javascript in the page. # Extract the value of `bootstrappedData` from the Javascript in the page.
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path) bootstrapped_data = self._parse_json(self._search_regex(
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
try:
bootstrappedData = json.loads(bootstrappedDataJS)
except ValueError as ve:
errmsg = '%s: Failed to parse JSON ' % episode_path
raise ExtractorError(errmsg, cause=ve)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently. # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist # NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist: if is_playlist:
collections = bootstrappedData['playlists']['collections'] collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path) collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path) video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle'] show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']] segment_ids = [video_info['videoPlaybackID']]
else: else:
collections = bootstrappedData['show']['collections'] collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path) collection, video_info = self.find_collection_containing_video(collections, episode_path)
show = bootstrappedData['show'] # Video wasn't found in the collections, let's try `slugged_video`.
if video_info is None:
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
video_info = bootstrapped_data['slugged_video']
else:
raise ExtractorError('Unable to find video info')
show = bootstrapped_data['show']
show_title = show['title'] show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]

@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
'tbr': media['bitRate'], 'tbr': media['bitRate'],
'width': media['width'], 'width': media['width'],
'height': media['height'], 'height': media['height'],
} for media in info['media']] } for media in info['media'] if media.get('mediaPurpose') == 'play']
if not formats: if not formats:
formats.append({ formats.append({

@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class GazetaIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
_TESTS = [{
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
'info_dict': {
'id': '205566',
'ext': 'mp4',
'title': '«7080 процентов гражданских в Донецке на грани голода»',
'description': 'md5:38617526050bd17b234728e7f9620a71',
'thumbnail': 're:^https?://.*\.jpg',
},
}, {
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
embed_url = '%s?p=embed' % mobj.group('url')
embed_page = self._download_webpage(
embed_url, display_id, 'Downloading embed page')
video_id = self._search_regex(
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
return self.url_result(
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')

@ -596,6 +596,19 @@ class GenericIE(InfoExtractor):
'view_count': int, 'view_count': int,
}, },
}, },
# Pladform embed
{
'url': 'http://muz-tv.ru/kinozal/view/7400/',
'info_dict': {
'id': '100183293',
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
},
# RSS feed with enclosure # RSS feed with enclosure
{ {
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@ -1193,6 +1206,12 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform') return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
# Look for Pladform embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Pladform')
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True

@ -0,0 +1,90 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
xpath_text,
qualities,
)
class PladformIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:
out\.pladform\.ru/player|
static\.pladform\.ru/player\.swf
)
\?.*\bvideoid=|
video\.pladform\.ru/catalog/video/videoid/
)
(?P<id>\d+)
'''
_TESTS = [{
# http://muz-tv.ru/kinozal/view/7400/
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
'info_dict': {
'id': '100183293',
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
}, {
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
'only_matching': True,
}, {
'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_xml(
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
video_id)
if video.tag == 'error':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, video.text),
expected=True)
quality = qualities(('ld', 'sd', 'hd'))
formats = [{
'url': src.text,
'format_id': src.get('quality'),
'quality': quality(src.get('quality')),
} for src in video.findall('./src')]
self._sort_formats(formats)
webpage = self._download_webpage(
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
video_id)
title = self._og_search_title(webpage, fatal=False) or xpath_text(
video, './/title', 'title', fatal=True)
description = self._search_regex(
r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
video, './/cover', 'cover')
duration = int_or_none(xpath_text(video, './/time', 'duration'))
age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}

@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor):
embed = self._download_webpage( embed = self._download_webpage(
embed_url, video_id, 'Downloading embed page') embed_url, video_id, 'Downloading embed page')
encoded_data = self._search_regex( player_data = self._parse_json(self._search_regex(
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
data = self._parse_json( data = self._parse_json(
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
formats = [] formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])

@ -358,13 +358,12 @@ class TwitchStreamIE(TwitchBaseIE):
'p': random.randint(1000000, 10000000), 'p': random.randint(1000000, 10000000),
'player': 'twitchweb', 'player': 'twitchweb',
'segment_preference': '4', 'segment_preference': '4',
'sig': access_token['sig'], 'sig': access_token['sig'].encode('utf-8'),
'token': access_token['token'], 'token': access_token['token'].encode('utf-8'),
} }
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
'%s/api/channel/hls/%s.m3u8?%s' '%s/api/channel/hls/%s.m3u8?%s'
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
channel_id, 'mp4') channel_id, 'mp4')
self._prefer_source(formats) self._prefer_source(formats)

@ -41,13 +41,10 @@ class VidmeIE(InfoExtractor):
duration = float_or_none(self._html_search_regex( duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False)) r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex( view_count = str_to_int(self._html_search_regex(
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False)) r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex( like_count = str_to_int(self._html_search_regex(
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">', r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
webpage, 'like count', fatal=False)) webpage, 'like count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
webpage, 'comment count', fatal=False))
return { return {
'id': video_id, 'id': video_id,
@ -61,5 +58,4 @@ class VidmeIE(InfoExtractor):
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'comment_count': comment_count,
} }

@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
raise raise
# In case of error, try to remove win32 forbidden chars # In case of error, try to remove win32 forbidden chars
alt_filename = os.path.join( alt_filename = sanitize_path(filename)
re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
for path_part in os.path.split(filename)
)
if alt_filename == filename: if alt_filename == filename:
raise raise
else: else:
# An exception here should be caught in the caller # An exception here should be caught in the caller
stream = open(encodeFilename(filename), open_mode) stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename) return (stream, alt_filename)
@ -311,6 +308,24 @@ def sanitize_filename(s, restricted=False, is_id=False):
return result return result
def sanitize_path(s):
"""Sanitizes and normalizes path on Windows"""
if sys.platform != 'win32':
return s
drive, _ = os.path.splitdrive(s)
unc, _ = os.path.splitunc(s)
unc_or_drive = unc or drive
norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
if unc_or_drive:
norm_path.pop(0)
sanitized_path = [
re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
for path_part in norm_path]
if unc_or_drive:
sanitized_path.insert(0, unc_or_drive + os.path.sep)
return os.path.join(*sanitized_path)
def orderedSet(iterable): def orderedSet(iterable):
""" Remove all duplicates from the input iterable """ """ Remove all duplicates from the input iterable """
res = [] res = []

Loading…
Cancel
Save