diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 56e5f80e1f..42813da1a4 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -100,6 +100,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+ self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':thedailyshow', ['ComedyCentral'])
self.assertMatch(':tds', ['ComedyCentral'])
self.assertMatch(':colbertreport', ['ComedyCentral'])
diff --git a/test/test_playlists.py b/test/test_playlists.py
index d83b3bf519..7c67239a43 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -102,7 +102,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity')
- self.assertTrue(len(result['entries']) >= 66)
+ self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self):
dl = FakeYDL()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 4b7a7847bd..938517a2de 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
+ result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'ytdl test PL')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
def test_issue_673(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
- result = ie.extract('PLBB231211A4F62143')[0]
+ result = ie.extract('PLBB231211A4F62143')
self.assertTrue(len(result['entries']) > 25)
def test_youtube_playlist_long(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
+ result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 799)
@@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
#651
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
+ result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertFalse('pElCt5oNDuI' in ytie_results)
self.assertFalse('KdPEApIVdWM' in ytie_results)
@@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist_empty(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
+ result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
self.assertIsPlaylist(result)
self.assertEqual(len(result['entries']), 0)
@@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
# TODO find a > 100 (paginating?) videos course
- result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
+ result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
entries = result['entries']
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(len(entries), 25)
@@ -84,22 +84,22 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubeChannelIE(dl)
#test paginated channel
- result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
+ result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel
- result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
+ result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self):
dl = FakeYDL()
ie = YoutubeUserIE(dl)
- result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
+ result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
- result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
+ result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
self.assertEqual(len(result['entries']), 2)
def test_youtube_show(self):
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 0a845a344b..87eb1a0b37 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -104,6 +104,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
+ logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
@@ -204,7 +205,9 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
- if not self.params.get('quiet', False):
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ elif not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
@@ -212,10 +215,13 @@ class YoutubeDL(object):
def to_stderr(self, message):
"""Print message to stderr."""
assert type(message) == type(u'')
- output = message + u'\n'
- if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
- output = output.encode(preferredencoding())
- sys.stderr.write(output)
+ if self.params.get('logger'):
+ self.params['logger'].error(message)
+ else:
+ output = message + u'\n'
+ if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+ output = output.encode(preferredencoding())
+ sys.stderr.write(output)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -370,15 +376,17 @@ class YoutubeDL(object):
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
@@ -389,8 +397,8 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
- return (u'%(title)s has already been recorded in archive'
- % info_dict)
+ return (u'%s has already been recorded in archive'
+ % info_dict.get('title', info_dict.get('id', u'video')))
return None
@staticmethod
@@ -469,7 +477,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
- self.add_extra_info(ie_result, extra_info)
+
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -499,6 +507,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
+
+ reason = self._match_entry(entry)
+ if reason is not None:
+ self.to_screen(u'[download] ' + reason)
+ continue
+
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@@ -654,7 +668,7 @@ class YoutubeDL(object):
# Forced printings
if self.params.get('forcetitle', False):
- compat_print(info_dict['title'])
+ compat_print(info_dict['fulltitle'])
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False):
@@ -825,7 +839,16 @@ class YoutubeDL(object):
fn = self.params.get('download_archive')
if fn is None:
return False
- vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ extractor = info_dict.get('extractor_id')
+ if extractor is None:
+ if 'id' in info_dict:
+ extractor = info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ return False # Incomplete video information
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = extractor.lower()
+ vid_id = extractor + u' ' + info_dict['id']
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 27886593b4..1f15c7eaa0 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -35,6 +35,7 @@ __authors__ = (
'Jelle van der Waa',
'Marcin Cieślak',
'Anton Larionov',
+ 'Takuya Tsuchida',
)
__license__ = 'Public Domain'
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 02f9e25468..1fbd10bc5c 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -20,6 +20,7 @@ from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE
+from .clipfish import ClipfishIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
@@ -98,6 +99,7 @@ from .nba import NBAIE
from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
+from .niconico import NiconicoIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
@@ -156,6 +158,7 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
+from .viki import VikiIE
from .vk import VKIE
from .wat import WatIE
from .websurg import WeBSurgIE
@@ -183,6 +186,7 @@ from .youtube import (
YoutubeTruncatedURLIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
+ YoutubeHistoryIE,
)
from .zdf import ZDFIE
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 359d4174bc..3a32c14c59 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
},
u'skip': u'There is a limit of 200 free downloads / month for the test song'
- }, {
- u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
- u'playlist': [
- {
- u'file': u'1353101989.mp3',
- u'md5': u'39bc1eded3476e927c724321ddf116cf',
- u'info_dict': {
- u'title': u'Intro',
- }
- },
- {
- u'file': u'38097443.mp3',
- u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
- u'info_dict': {
- u'title': u'Kero One - Keep It Alive (Blazo remix)',
- }
- },
- ],
- u'params': {
- u'playlistend': 2
- },
- u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}]
def _real_extract(self, url):
@@ -56,20 +34,17 @@ class BandcampIE(InfoExtractor):
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
- entries = []
for d in data:
formats = [{
'format_id': 'format_id',
'url': format_url,
'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())]
- entries.append({
+ return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
- })
-
- return self.playlist_result(entries, title, title)
+ }
else:
raise ExtractorError(u'No free songs found')
@@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P
.*)'
+ _TEST = {
+ u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+ u'playlist': [
+ {
+ u'file': u'1353101989.mp3',
+ u'md5': u'39bc1eded3476e927c724321ddf116cf',
+ u'info_dict': {
+ u'title': u'Intro',
+ }
+ },
+ {
+ u'file': u'38097443.mp3',
+ u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
+ u'info_dict': {
+ u'title': u'Kero One - Keep It Alive (Blazo remix)',
+ }
+ },
+ ],
+ u'params': {
+ u'playlistend': 2
+ },
+ u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+ }
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index d8c35465a3..66fe0ac9ad 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
params = {'flashID': object_doc.attrib['id'],
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
}
- playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+ def find_param(name):
+ node = find_xpath_attr(object_doc, './param', 'name', name)
+ if node is not None:
+ return node.attrib['value']
+ return None
+ playerKey = find_param('playerKey')
# Not all pages define this value
if playerKey is not None:
- params['playerKey'] = playerKey.attrib['value']
- videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+ params['playerKey'] = playerKey
+ # The three fields hold the id of the video
+ videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None:
- params['@videoPlayer'] = videoPlayer.attrib['value']
- linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+ params['@videoPlayer'] = videoPlayer
+ linkBase = find_param('linkBaseURL')
if linkBase is not None:
- params['linkBaseURL'] = linkBase.attrib['value']
+ params['linkBaseURL'] = linkBase
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
new file mode 100644
index 0000000000..95449da3cc
--- /dev/null
+++ b/youtube_dl/extractor/clipfish.py
@@ -0,0 +1,53 @@
+import re
+import time
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class ClipfishIE(InfoExtractor):
+ IE_NAME = u'clipfish'
+
+ _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P[0-9]+)/'
+ _TEST = {
+ u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
+ u'file': u'4028320.f4v',
+ u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
+ u'info_dict': {
+ u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
+ u'duration': 399,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+
+ info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
+ (video_id, int(time.time())))
+ info_xml = self._download_webpage(
+ info_url, video_id, note=u'Downloading info page')
+ doc = xml.etree.ElementTree.fromstring(info_xml)
+ title = doc.find('title').text
+ video_url = doc.find('filename').text
+ thumbnail = doc.find('imageurl').text
+ duration_str = doc.find('duration').text
+ m = re.match(
+ r'^(?P[0-9]+):(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9]*)$',
+ duration_str)
+ if m:
+ duration = (
+ (int(m.group('hours')) * 60 * 60) +
+ (int(m.group('minutes')) * 60) +
+ (int(m.group('seconds')))
+ )
+ else:
+ duration = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py
index 0c29acfb13..b27c1dfc52 100644
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -1,5 +1,4 @@
import re
-import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
- metaXml = self._download_webpage(xmlUrl, video_id,
+ mdoc = self._download_xml(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
- mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
- manifestXml = self._download_webpage(manifest_url, video_id,
+ adoc = self._download_xml(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
- adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 423e54ceaa..6ec835f8af 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -4,6 +4,7 @@ import re
import socket
import sys
import netrc
+import xml.etree.ElementTree
from ..utils import (
compat_http_client,
@@ -208,6 +209,11 @@ class InfoExtractor(object):
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
+ def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
+ """Return the xml as an xml.etree.ElementTree.Element"""
+ xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+ return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@@ -229,12 +235,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
- def url_result(self, url, ie=None):
+ def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py
index 46954337f2..bafc5826f6 100644
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4',
- u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
+ u'md5': u'8b743df908c42f60cf6496586c7f12c3',
u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly"
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index a200dcd74a..e2baf44d7e 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
'title': info['name'],
'url': final_song_url,
'ext': 'mp3',
- 'description': info['description'],
+ 'description': info.get('description'),
'thumbnail': info['pictures'].get('extra_large'),
'uploader': info['user']['name'],
'uploader_id': info['user']['username'],
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
new file mode 100644
index 0000000000..729607ea3a
--- /dev/null
+++ b/youtube_dl/extractor/niconico.py
@@ -0,0 +1,131 @@
+# encoding: utf-8
+
+import re
+import socket
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_http_client,
+ compat_urllib_error,
+ compat_urllib_parse,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_str,
+
+ ExtractorError,
+ unified_strdate,
+)
+
+
+class NiconicoIE(InfoExtractor):
+ IE_NAME = u'niconico'
+ IE_DESC = u'ニコニコ動画'
+
+ _TEST = {
+ u'url': u'http://www.nicovideo.jp/watch/sm22312215',
+ u'file': u'sm22312215.mp4',
+ u'md5': u'd1a75c0823e2f629128c43e1212760f9',
+ u'info_dict': {
+ u'title': u'Big Buck Bunny',
+ u'uploader': u'takuya0301',
+ u'uploader_id': u'2698420',
+ u'upload_date': u'20131123',
+ u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+ },
+ u'params': {
+ u'username': u'ydl.niconico@gmail.com',
+ u'password': u'youtube-dl',
+ },
+ }
+
+ _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
+ _NETRC_MACHINE = 'niconico'
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = True
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ # No authentication to be performed
+ if username is None:
+ if self._LOGIN_REQUIRED:
+ raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ return False
+
+ # Log in
+ login_form_strs = {
+ u'mail': username,
+ u'password': password,
+ }
+ # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+ # chokes on unicode
+ login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+ login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+ request = compat_urllib_request.Request(
+ u'https://secure.nicovideo.jp/secure/login', login_data)
+ login_results = self._download_webpage(
+ request, u'', note=u'Logging in', errnote=u'Unable to log in')
+ if re.search(r'(?i)Log in error
', login_results) is not None:
+ self._downloader.report_warning(u'unable to log in: bad username or password')
+ return False
+ return True
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+
+ # Get video webpage. We are not actually interested in it, but need
+ # the cookies in order to be able to download the info webpage
+ self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+
+ video_info_webpage = self._download_webpage(
+ 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
+ note=u'Downloading video info page')
+
+ # Get flv info
+ flv_info_webpage = self._download_webpage(
+ u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+ video_id, u'Downloading flv info')
+ video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+
+ # Start extracting information
+ video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
+ video_title = video_info.find('.//title').text
+ video_extension = video_info.find('.//movie_type').text
+ video_format = video_extension.upper()
+ video_thumbnail = video_info.find('.//thumbnail_url').text
+ video_description = video_info.find('.//description').text
+ video_uploader_id = video_info.find('.//user_id').text
+ video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
+ video_view_count = video_info.find('.//view_counter').text
+ video_webpage_url = video_info.find('.//watch_url').text
+
+ # uploader
+ video_uploader = video_uploader_id
+ url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
+ try:
+ user_info_webpage = self._download_webpage(
+ url, video_id, note=u'Downloading user information')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+ else:
+ user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
+ video_uploader = user_info.find('.//nickname').text
+
+ return {
+ 'id': video_id,
+ 'url': video_real_url,
+ 'title': video_title,
+ 'ext': video_extension,
+ 'format': video_format,
+ 'thumbnail': video_thumbnail,
+ 'description': video_description,
+ 'uploader': video_uploader,
+ 'upload_date': video_upload_date,
+ 'uploader_id': video_uploader_id,
+ 'view_count': video_view_count,
+ 'webpage_url': video_webpage_url,
+ }
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py
index d476693ec0..9faf3a5e3f 100644
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
u'title': u'youtube-dl test video \'/\\ ä ↭',
u'duration': 9,
},
+ u'skip': u'Only available from the EU'
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
new file mode 100644
index 0000000000..78d03c0793
--- /dev/null
+++ b/youtube_dl/extractor/viki.py
@@ -0,0 +1,91 @@
+import re
+
+from ..utils import (
+ unified_strdate,
+)
+from .subtitles import SubtitlesInfoExtractor
+
+
+class VikiIE(SubtitlesInfoExtractor):
+ IE_NAME = u'viki'
+
+ _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)'
+ _TEST = {
+ u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
+ u'file': u'1023585v.mp4',
+ u'md5': u'a21454021c2646f5433514177e2caa5f',
+ u'info_dict': {
+ u'title': u'Heirs Episode 14',
+ u'uploader': u'SBS',
+ u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+ u'upload_date': u'20131121',
+ u'age_limit': 13,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ uploader = self._html_search_regex(
+ r'Broadcast Network: \s*([^<]*)<', webpage,
+ u'uploader')
+ if uploader is not None:
+ uploader = uploader.strip()
+
+ rating_str = self._html_search_regex(
+ r'Rating: \s*([^<]*)<', webpage,
+ u'rating information', default='').strip()
+ RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+ }
+ age_limit = RATINGS.get(rating_str)
+
+ info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+ info_webpage = self._download_webpage(info_url, video_id)
+ video_url = self._html_search_regex(
+ r'