[khanacademy] Add support (Fixes #2066)

pull/2113/head
Philipp Hagemeister 11 years ago
parent 0cdad20c75
commit 3d3538e422

@ -1,7 +1,6 @@
#!/usr/bin/env python
# encoding: utf-8
# Allow direct execution
import os
import sys
@ -30,6 +29,7 @@ from youtube_dl.extractor import (
SmotriUserIE,
IviCompilationIE,
ImdbListIE,
KhanAcademyIE,
)
@ -198,6 +198,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48)
def test_khanacademy_topic(self):
dl = FakeYDL()
ie = KhanAcademyIE(dl)
result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'cryptography')
self.assertEqual(result['title'], u'Journey into cryptography')
self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?')
self.assertTrue(len(result['entries']) >= 3)
if __name__ == '__main__':
unittest.main()

@ -98,6 +98,7 @@ from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE
from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE

@ -1,4 +1,5 @@
import base64
import json
import os
import re
import socket
@ -260,6 +261,15 @@ class InfoExtractor(object):
xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
errnote=u'Unable to download JSON metadata'):
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
try:
return json.loads(json_string)
except ValueError as ve:
raise ExtractorError('Failed to download JSON', cause=ve)
def report_warning(self, msg, video_id=None):
idstr = u'' if video_id is None else u'%s: ' % video_id
self._downloader.report_warning(

@ -0,0 +1,71 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
)
class KhanAcademyIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
_TEST = {
'url': 'http://www.khanacademy.org/video/one-time-pad',
'file': 'one-time-pad.mp4',
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
'info_dict': {
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
'upload_date': '20120411',
}
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
if m.group('key') == 'video':
data = self._download_json(
'http://api.khanacademy.org/api/v1/videos/' + video_id,
video_id, 'Downloading video info')
upload_date = unified_strdate(data['date_added'])
uploader = ', '.join(data['author_names'])
return {
'_type': 'url_transparent',
'url': data['url'],
'id': video_id,
'title': data['title'],
'thumbnail': data['image_url'],
'duration': data['duration'],
'description': data['description'],
'uploader': uploader,
'upload_date': upload_date,
}
else:
# topic
data = self._download_json(
'http://api.khanacademy.org/api/v1/topic/' + video_id,
video_id, 'Downloading topic info')
entries = [
{
'_type': 'url',
'url': c['url'],
'id': c['id'],
'title': c['title'],
}
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
return {
'_type': 'playlist',
'id': video_id,
'title': data['title'],
'description': data['description'],
'entries': entries,
}
Loading…
Cancel
Save