|
|
@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
|
|
|
|
|
|
|
|
|
|
|
from hashlib import sha1
|
|
|
|
from hashlib import sha1
|
|
|
|
from math import pow, sqrt, floor
|
|
|
|
from math import pow, sqrt, floor
|
|
|
|
from .subtitles import SubtitlesInfoExtractor
|
|
|
|
from .common import InfoExtractor
|
|
|
|
from ..compat import (
|
|
|
|
from ..compat import (
|
|
|
|
compat_urllib_parse,
|
|
|
|
compat_urllib_parse,
|
|
|
|
compat_urllib_request,
|
|
|
|
compat_urllib_request,
|
|
|
@ -25,10 +25,9 @@ from ..aes import (
|
|
|
|
aes_cbc_decrypt,
|
|
|
|
aes_cbc_decrypt,
|
|
|
|
inc,
|
|
|
|
inc,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CrunchyrollIE(SubtitlesInfoExtractor):
|
|
|
|
class CrunchyrollIE(InfoExtractor):
|
|
|
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
|
|
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
|
|
|
_TESTS = [{
|
|
|
|
_TESTS = [{
|
|
|
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
|
|
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
|
|
@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
|
|
|
|
|
|
|
|
|
|
return output
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_subtitles(self, video_id, webpage):
|
|
|
|
|
|
|
|
subtitles = {}
|
|
|
|
|
|
|
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
|
|
|
|
|
|
|
sub_page = self._download_webpage(
|
|
|
|
|
|
|
|
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
|
|
|
|
|
|
|
video_id, note='Downloading subtitles for ' + sub_name)
|
|
|
|
|
|
|
|
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
|
|
|
|
|
|
|
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
|
|
|
|
|
|
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
|
|
|
|
|
|
|
if not id or not iv or not data:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
id = int(id)
|
|
|
|
|
|
|
|
iv = base64.b64decode(iv)
|
|
|
|
|
|
|
|
data = base64.b64decode(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
|
|
|
|
|
|
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
|
|
|
|
|
|
|
if not lang_code:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
|
|
|
|
|
|
|
subtitles[lang_code] = [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
'ext': 'srt',
|
|
|
|
|
|
|
|
'data': self._convert_subtitles_to_srt(sub_root),
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
'ext': 'ass',
|
|
|
|
|
|
|
|
'data': self._convert_subtitles_to_ass(sub_root),
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
return subtitles
|
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
def _real_extract(self, url):
|
|
|
|
mobj = re.match(self._VALID_URL, url)
|
|
|
|
mobj = re.match(self._VALID_URL, url)
|
|
|
|
video_id = mobj.group('video_id')
|
|
|
|
video_id = mobj.group('video_id')
|
|
|
@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
|
|
'format_id': video_format,
|
|
|
|
'format_id': video_format,
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
subtitles = {}
|
|
|
|
subtitles = self.extract_subtitles(video_id, webpage)
|
|
|
|
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
|
|
|
|
|
|
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
|
|
|
|
|
|
|
sub_page = self._download_webpage(
|
|
|
|
|
|
|
|
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
|
|
|
|
|
|
|
video_id, note='Downloading subtitles for ' + sub_name)
|
|
|
|
|
|
|
|
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
|
|
|
|
|
|
|
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
|
|
|
|
|
|
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
|
|
|
|
|
|
|
if not id or not iv or not data:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
id = int(id)
|
|
|
|
|
|
|
|
iv = base64.b64decode(iv)
|
|
|
|
|
|
|
|
data = base64.b64decode(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
|
|
|
|
|
|
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
|
|
|
|
|
|
|
if not lang_code:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
|
|
|
|
|
|
|
if sub_format == 'ass':
|
|
|
|
|
|
|
|
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if self._downloader.params.get('listsubtitles', False):
|
|
|
|
|
|
|
|
self._list_available_subtitles(video_id, subtitles)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
return {
|
|
|
|
'id': video_id,
|
|
|
|
'id': video_id,
|
|
|
|