Merge branch 'master' of https://github.com/blackjack4494/yt-dlc into fix-tmz

pull/106/head
Diego Fernando Rodríguez Varón 4 years ago
commit a2044d57ca

@ -82,7 +82,7 @@ jobs:
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: '3.x' python-version: '3.8'
- name: Install Requirements - name: Install Requirements
run: pip install pyinstaller run: pip install pyinstaller
- name: Bump version - name: Bump version
@ -109,14 +109,14 @@ jobs:
runs-on: windows-latest runs-on: windows-latest
needs: build_unix needs: [build_unix, build_windows]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python 3.5.4 32-Bit - name: Set up Python 3.4.4 32-Bit
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: '3.5.4' python-version: '3.4.4'
architecture: 'x86' architecture: 'x86'
- name: Install Requirements for 32 Bit - name: Install Requirements for 32 Bit
run: pip install pyinstaller==3.5 run: pip install pyinstaller==3.5
@ -146,10 +146,10 @@ jobs:
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }} SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }} YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
run: | run: |
echo "version:$YTDLC_VERSION" >> SHA2-256SUMS echo "version:${env:YTDLC_VERSION}" >> SHA2-256SUMS
echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS echo "youtube-dlc.exe:${env:SHA2_WINDOWS}" >> SHA2-256SUMS
echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS echo "youtube-dlc_x86.exe:${env:SHA2_WINDOWS32}" >> SHA2-256SUMS
echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS echo "youtube-dlc:${env:SHA2_UNIX}" >> SHA2-256SUMS
- name: Upload 256SUMS file - name: Upload 256SUMS file
id: upload-sums id: upload-sums

@ -1,15 +1,15 @@
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc) [![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc) [![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc) [![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE) [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)
youtube-dlc - download videos from youtube.com or other video platforms. youtube-dlc - download videos from youtube.com or other video platforms.
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462) youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
- [INSTALLATION](#installation) - [INSTALLATION](#installation)
- [UPDATE](#update)
- [DESCRIPTION](#description) - [DESCRIPTION](#description)
- [OPTIONS](#options) - [OPTIONS](#options)
- [Network Options:](#network-options) - [Network Options:](#network-options)
@ -44,6 +44,10 @@ You may want to use `python3` instead of `python`
python -m pip install --upgrade youtube-dlc python -m pip install --upgrade youtube-dlc
If you want to install the current master branch
python -m pip install git+https://github.com/blackjack4494/yt-dlc
**UNIX** (Linux, macOS, etc.) **UNIX** (Linux, macOS, etc.)
Using wget: Using wget:

@ -66,7 +66,7 @@ setup(
description=DESCRIPTION, description=DESCRIPTION,
long_description=LONG_DESCRIPTION, long_description=LONG_DESCRIPTION,
# long_description_content_type="text/markdown", # long_description_content_type="text/markdown",
url="https://github.com/blackjack4494/youtube-dlc", url="https://github.com/blackjack4494/yt-dlc",
packages=find_packages(exclude=("youtube_dl","test",)), packages=find_packages(exclude=("youtube_dl","test",)),
#packages=[ #packages=[
# 'youtube_dlc', # 'youtube_dlc',

@ -364,8 +364,10 @@ class FileDownloader(object):
else '%.2f' % sleep_interval)) else '%.2f' % sleep_interval))
time.sleep(sleep_interval) time.sleep(sleep_interval)
else: else:
if self.params.get('sleep_interval_subtitles') > 0: sleep_interval_sub = 0
if type(self.params.get('sleep_interval_subtitles')) is int:
sleep_interval_sub = self.params.get('sleep_interval_subtitles') sleep_interval_sub = self.params.get('sleep_interval_subtitles')
if sleep_interval_sub > 0:
self.to_screen( self.to_screen(
'[download] Sleeping %s seconds...' % ( '[download] Sleeping %s seconds...' % (
sleep_interval_sub)) sleep_interval_sub))

@ -115,8 +115,10 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename] cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items(): if info_dict.get('http_headers') is not None:
cmd += ['--header', '%s: %s' % (key, val)] for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose') cmd += self._valueless_option('--verbose', 'verbose')
@ -150,8 +152,9 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename] cmd = [self.exe, '-o', tmpfilename]
for key, val in info_dict['http_headers'].items(): if info_dict.get('http_headers') is not None:
cmd += ['-H', '%s: %s' % (key, val)] for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)]
cmd += self._configuration_args() cmd += self._configuration_args()
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@ -162,8 +165,9 @@ class WgetFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items(): if info_dict.get('http_headers') is not None:
cmd += ['--header', '%s: %s' % (key, val)] for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit') cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries') retry = self._option('--tries', 'retries')
if len(retry) == 2: if len(retry) == 2:
@ -189,8 +193,9 @@ class Aria2cFD(ExternalFD):
if dn: if dn:
cmd += ['--dir', dn] cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)] cmd += ['--out', os.path.basename(tmpfilename)]
for key, val in info_dict['http_headers'].items(): if info_dict.get('http_headers') is not None:
cmd += ['--header', '%s: %s' % (key, val)] for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--interface', 'source_address') cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy') cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
@ -206,8 +211,10 @@ class HttpieFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)] if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)]
return cmd return cmd
@ -253,7 +260,7 @@ class FFmpegFD(ExternalFD):
# if end_time: # if end_time:
# args += ['-t', compat_str(end_time - start_time)] # args += ['-t', compat_str(end_time - start_time)]
if info_dict['http_headers'] and re.match(r'^https?://', url): if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers']) headers = handle_youtubedl_headers(info_dict['http_headers'])

@ -82,7 +82,10 @@ class YoutubeLiveChatReplayFD(FragmentFD):
offset = int(replay_chat_item_action['videoOffsetTimeMsec']) offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend( processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] try:
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
except KeyError:
continuation_id = None
self._append_fragment(ctx, processed_fragment) self._append_fragment(ctx, processed_fragment)

@ -1175,6 +1175,7 @@ from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .thisvid import ThisVidIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE from .tiktok import TikTokIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
@ -1541,4 +1542,5 @@ from .zattoo import (
) )
from .zdf import ZDFIE, ZDFChannelIE from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE from .zingmp3 import ZingMp3IE
from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE

@ -36,6 +36,9 @@ class LA7IE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not url.startswith('http'):
url = '%s//%s' % (self.http_scheme(), url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data = self._search_regex( player_data = self._search_regex(

@ -12,6 +12,7 @@ from ..utils import (
parse_duration, parse_duration,
remove_end, remove_end,
try_get, try_get,
urljoin,
) )
@ -93,6 +94,14 @@ class MailRuIE(InfoExtractor):
{ {
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html', 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
'only_matching': True,
},
{
'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
'only_matching': True,
} }
] ]
@ -110,7 +119,7 @@ class MailRuIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
page_config = self._parse_json(self._search_regex([ page_config = self._parse_json(self._search_regex([
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
r'(?s)"video":\s*(\{.+?\}),'], r'(?s)"video":\s*({.+?}),'],
webpage, 'page config', default='{}'), video_id, fatal=False) webpage, 'page config', default='{}'), video_id, fatal=False)
if page_config: if page_config:
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl') meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
@ -121,7 +130,7 @@ class MailRuIE(InfoExtractor):
# fix meta_url if missing the host address # fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url): if re.match(r'^\/\+\/', meta_url):
meta_url = 'https://my.mail.ru' + meta_url meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url: if meta_url:
video_data = self._download_json( video_data = self._download_json(

@ -13,6 +13,7 @@ class SkyItaliaBaseIE(InfoExtractor):
'high': [854, 480], 'high': [854, 480],
'hd': [1280, 720] 'hd': [1280, 720]
} }
_GEO_BYPASS = False
def _extract_video_id(self, url): def _extract_video_id(self, url):
webpage = self._download_webpage(url, 'skyitalia') webpage = self._download_webpage(url, 'skyitalia')
@ -43,6 +44,9 @@ class SkyItaliaBaseIE(InfoExtractor):
'height': r[1] 'height': r[1]
}) })
if not formats and video_data.get('geob') == 1:
self.raise_geo_restricted(countries=['IT'])
self._sort_formats(formats) self._sort_formats(formats)
title = video_data.get('title') title = video_data.get('title')
thumb = video_data.get('thumb') thumb = video_data.get('thumb')

@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class ThisVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
_TESTS = [{
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'ext': 'mp4',
'title': 'French Boy Pantsed',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
'age_limit': 18,
}
}, {
'url': 'https://thisvid.com/embed/2400174/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'ext': 'mp4',
'title': 'French Boy Pantsed',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
'age_limit': 18,
}
}]
def _real_extract(self, url):
main_id = self._match_id(url)
webpage = self._download_webpage(url, main_id)
# URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
if not kvs_version.startswith("5."):
self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
# video_id, video_url and license_code from the 'flashvars' JSON object:
video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
if thumbnail.startswith("//"):
thumbnail = "https:" + thumbnail
if (re.match(self._VALID_URL, url).group('type') == "videos"):
display_id = main_id
else:
display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
return {
'id': video_id,
'display_id': display_id,
'title': title,
'url': getrealurl(video_url, license_code),
'thumbnail': thumbnail,
'age_limit': 18,
}
def getrealurl(video_url, license_code):
urlparts = video_url.split('/')[2:]
license = getlicensetoken(license_code)
newmagic = urlparts[5][:32]
for o in range(len(newmagic) - 1, -1, -1):
new = ""
l = (o + sum([int(n) for n in license[o:]])) % 32
for i in range(0, len(newmagic)):
if i == o:
new += newmagic[l]
elif i == l:
new += newmagic[o]
else:
new += newmagic[i]
newmagic = new
urlparts[5] = newmagic + urlparts[5][32:]
return "/".join(urlparts)
def getlicensetoken(license):
modlicense = license.replace("$", "").replace("0", "1")
center = int(len(modlicense) / 2)
fronthalf = int(modlicense[:center + 1])
backhalf = int(modlicense[center:])
modlicense = str(4 * abs(fronthalf - backhalf))
retval = ""
for o in range(0, center + 1):
for i in range(1, 5):
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
return retval

@ -308,17 +308,26 @@ class VikiIE(VikiBaseIE):
'url': thumbnail.get('url'), 'url': thumbnail.get('url'),
}) })
new_video = self._download_json(
'https://www.viki.com/api/videos/%s' % video_id, video_id,
'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
subtitles = {} subtitles = {}
for sub in new_video.get('streamSubtitles').get('dash'): try:
subtitles[sub.get('srclang')] = [{ # New way to fetch subtitles
'ext': 'vtt', new_video = self._download_json(
'url': sub.get('src'), 'https://www.viki.com/api/videos/%s' % video_id, video_id,
'completion': sub.get('percentage'), 'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
}] for sub in new_video.get('streamSubtitles').get('dash'):
subtitles[sub.get('srclang')] = [{
'ext': 'vtt',
'url': sub.get('src'),
'completion': sub.get('percentage'),
}]
except AttributeError:
# fall-back to the old way if there isn't a streamSubtitles attribute
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
subtitles[subtitle_lang] = [{
'ext': subtitles_format,
'url': self._prepare_call(
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
} for subtitles_format in ('srt', 'vtt')]
result = { result = {
'id': video_id, 'id': video_id,

@ -11,7 +11,6 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
merge_dicts, merge_dicts,
remove_start,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
@ -19,10 +18,10 @@ from ..utils import (
class VLiveIE(NaverBaseIE): class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vlive.tv/video/1326', 'url': 'https://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
@ -32,8 +31,21 @@ class VLiveIE(NaverBaseIE):
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a', 'uploader_id': 'muploader_a',
}, },
}, { },
'url': 'http://www.vlive.tv/video/16937', {
'url': 'https://vlive.tv/post/1-18244258',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
},
{
'url': 'https://www.vlive.tv/video/16937',
'info_dict': { 'info_dict': {
'id': '16937', 'id': '16937',
'ext': 'mp4', 'ext': 'mp4',
@ -96,50 +108,69 @@ class VLiveIE(NaverBaseIE):
raise ExtractorError('Unable to log in', expected=True) raise ExtractorError('Unable to log in', expected=True)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) # url may match on a post or a video url with a post_id potentially matching a video_id
working_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(url, working_id)
'https://www.vlive.tv/video/%s' % video_id, video_id)
PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' PARAMS_FIELD = 'params'
VIDEO_PARAMS_FIELD = 'video params'
params = self._search_regex(
params = self._parse_json(self._search_regex( PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, params = self._parse_json(params, working_id, fatal=False)
transform_source=lambda s: '[' + s + ']', fatal=False)
video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
if not params or len(params) < 7:
params = self._search_regex( if video_params is None:
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) error = try_get(params, lambda x: x["postDetail"]["error"], dict)
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] error_data = try_get(error, lambda x: x["data"], dict)
error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
status, long_video_id, key = params[2], params[5], params[6] error_msg = try_get(error, lambda x: x["message"], compat_str)
status = remove_start(status, 'PRODUCT_') product_type = try_get(error_data,
[lambda x: x["officialVideo"]["productType"],
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): lambda x: x["board"]["boardType"]],
return self._live(video_id, webpage) compat_str)
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
return self._replay(video_id, webpage, long_video_id, key) if error_video is not None:
if product_type in ('VLIVE_PLUS', 'VLIVE+'):
if status == 'LIVE_END': self.raise_login_required('This video is only available with V LIVE+.')
raise ExtractorError('Uploading for replay. Please wait...', elif error_msg is not None:
expected=True) raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
elif status == 'COMING_SOON': else:
raise ExtractorError('Coming soon!', expected=True) raise ExtractorError('Failed to extract video parameters.')
elif status == 'CANCELED': elif 'post' in url:
raise ExtractorError('We are sorry, ' raise ExtractorError('Url does not appear to be a video post.', expected=True)
'but the live broadcast has been canceled.', else:
expected=True) raise ExtractorError('Failed to extract video parameters.')
elif status == 'ONLY_APP':
raise ExtractorError('Unsupported video type', expected=True) video_id = working_id if 'video' in url else str(video_params["videoSeq"])
video_type = video_params["type"]
if video_type in ('VOD'):
encoding_status = video_params["encodingStatus"]
if encoding_status == 'COMPLETE':
return self._replay(video_id, webpage, params, video_params)
else:
raise ExtractorError('VOD encoding not yet complete. Please try again later.',
expected=True)
elif video_type in ('LIVE'):
video_status = video_params["status"]
if video_status in ('RESERVED'):
raise ExtractorError('Coming soon!', expected=True)
elif video_status in ('ENDED', 'END'):
raise ExtractorError('Uploading for replay. Please wait...', expected=True)
else:
return self._live(video_id, webpage, params)
else: else:
raise ExtractorError('Unknown status %s' % status) raise ExtractorError('Unknown video type %s' % video_type)
def _get_common_fields(self, webpage): def _get_common_fields(self, webpage, params):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
creator = self._html_search_regex( description = self._html_search_meta(
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)', ['og:description', 'description', 'twitter:description'],
webpage, 'creator', fatal=False) webpage, 'description', default=None)
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
return { return {
'title': title, 'title': title,
@ -147,24 +178,21 @@ class VLiveIE(NaverBaseIE):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
def _live(self, video_id, webpage): def _live(self, video_id, webpage, params):
init_page = self._download_init_page(video_id) LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
live_params = self._search_regex( streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
r'"liveStreamInfo"\s*:\s*(".*"),',
init_page, 'live stream info')
live_params = self._parse_json(live_params, video_id)
live_params = self._parse_json(live_params, video_id)
formats = [] formats = []
for vid in live_params.get('resolutions', []): for stream in streams:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
vid['cdnUrl'], video_id, 'mp4', stream['serviceUrl'], video_id, 'mp4',
m3u8_id=vid.get('name'),
fatal=False, live=True)) fatal=False, live=True))
self._sort_formats(formats) self._sort_formats(formats)
info = self._get_common_fields(webpage) info = self._get_common_fields(webpage, params)
info.update({ info.update({
'title': self._live_title(info['title']), 'title': self._live_title(info['title']),
'id': video_id, 'id': video_id,
@ -173,44 +201,37 @@ class VLiveIE(NaverBaseIE):
}) })
return info return info
def _replay(self, video_id, webpage, long_video_id, key): def _replay(self, video_id, webpage, params, video_params):
if '' in (long_video_id, key): long_video_id = video_params["vodId"]
init_page = self._download_init_page(video_id)
video_info = self._parse_json(self._search_regex( VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script', key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'), headers={"referer": "https://www.vlive.tv"})
video_id) key = key_json["inkey"]
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
self.raise_login_required(
'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey']
return merge_dicts( return merge_dicts(
self._get_common_fields(webpage), self._get_common_fields(webpage, params),
self._extract_video_info(video_id, long_video_id, key)) self._extract_video_info(video_id, long_video_id, key))
def _download_init_page(self, video_id):
return self._download_webpage(
'https://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})
class VLiveChannelIE(InfoExtractor): class VLiveChannelIE(InfoExtractor):
IE_NAME = 'vlive:channel' IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
_TEST = { _TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B', 'url': 'https://channels.vlive.tv/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'info_dict': { 'info_dict': {
'id': 'FCD4B', 'id': 'FCD4B',
'title': 'MAMAMOO', 'title': 'MAMAMOO',
}, },
'playlist_mincount': 110 'playlist_mincount': 110
} }]
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
def _real_extract(self, url): def _real_extract(self, url):

@ -36,6 +36,7 @@ from ..utils import (
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
int_or_none, int_or_none,
js_to_json,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
parse_codecs, parse_codecs,
@ -70,6 +71,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}' _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
_YOUTUBE_CLIENT_HEADERS = { _YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1', 'x-youtube-client-name': '1',
@ -274,11 +277,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
query = kwargs.get('query', {}).copy() query = kwargs.get('query', {}).copy()
query['disable_polymer'] = 'true'
kwargs['query'] = query kwargs['query'] = query
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle( return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs)) *args, **compat_kwargs(kwargs))
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
return return
@ -288,15 +299,61 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
# Extract entries from page with "Load more" button
def _entries(self, page, playlist_id): def _find_entries_in_json(self, extracted):
more_widget_html = content_html = page entries = []
for page_num in itertools.count(1): c = {}
for entry in self._process_page(content_html):
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if self._is_entry(obj):
entries.append(obj)
return
if 'continuationCommand' in obj:
c['continuation'] = obj
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return entries, try_get(c, lambda x: x["continuation"])
def _entries(self, page, playlist_id, max_pages=None):
seen = []
yt_conf = {}
for m in re.finditer(self._YTCFG_DATA_RE, page):
parsed = self._parse_json(m.group(1), playlist_id,
transform_source=js_to_json, fatal=False)
if parsed:
yt_conf.update(parsed)
data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
entries, continuation = self._find_entries_in_json(data_json)
processed = self._process_entries(entries, seen)
if not processed:
break
for entry in processed:
yield entry yield entry
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) if not continuation or not yt_conf:
if not mobj: break
continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
if not continuation_token or not continuation_url:
break break
count = 0 count = 0
@ -305,12 +362,23 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
try: try:
# Downloading page may result in intermittent 5xx HTTP error # Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry # that is usually worked around with a retry
more = self._download_json( data_json = self._download_json(
'https://www.youtube.com/%s' % mobj.group('more'), playlist_id, 'https://www.youtube.com%s' % continuation_url,
'Downloading page #%s%s' playlist_id,
% (page_num, ' (retry #%d)' % count if count else ''), 'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape, transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS) query={
'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
},
data=str(json.dumps({
'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
'continuation': continuation_token
})).encode(encoding='UTF-8', errors='strict'),
headers={
'Content-Type': 'application/json'
}
)
break break
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
@ -319,31 +387,30 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
continue continue
raise raise
content_html = more['content_html'] def _extract_title(self, renderer):
if not content_html.strip(): title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
# Some webpages show a "Load more" button but they don't if title:
# have more videos return title
break return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
more_widget_html = more['load_more_widget_html']
class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content): def _is_entry(self, obj):
for video_id, video_title in self.extract_videos_from_page(content): return 'videoId' in obj
yield self.url_result(video_id, 'Youtube', video_id, video_title)
def _process_entries(self, entries, seen):
ids_in_page = []
titles_in_page = []
for renderer in entries:
video_id = try_get(renderer, lambda x: x['videoId'])
video_title = self._extract_title(renderer)
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page): if video_id is None or video_title is None:
for mobj in re.finditer(video_re, page): # we do not have a videoRenderer or title extraction broke
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue continue
video_id = mobj.group('id')
video_title = unescapeHTML( video_title = video_title.strip()
mobj.group('title')) if 'title' in mobj.groupdict() else None
if video_title:
video_title = video_title.strip()
if video_title == '► Play all':
video_title = None
try: try:
idx = ids_in_page.index(video_id) idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]: if video_title and not titles_in_page[idx]:
@ -352,19 +419,17 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
ids_in_page.append(video_id) ids_in_page.append(video_id)
titles_in_page.append(video_title) titles_in_page.append(video_title)
def extract_videos_from_page(self, page): for video_id, video_title in zip(ids_in_page, titles_in_page):
ids_in_page = [] yield self.url_result(video_id, 'Youtube', video_id, video_title)
titles_in_page = []
self.extract_videos_from_page_impl(
self._VIDEO_RE, page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content): def _is_entry(self, obj):
for playlist_id in orderedSet(re.findall( return 'playlistId' in obj
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
content)): def _process_entries(self, entries, seen):
for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
yield self.url_result( yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
@ -1390,6 +1455,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# https://github.com/ytdl-org/youtube-dl/pull/7599) # https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});', r';ytplayer\.config\s*=\s*({.+?});',
r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
) )
config = self._search_regex( config = self._search_regex(
patterns, webpage, 'ytplayer.config', default=None) patterns, webpage, 'ytplayer.config', default=None)
@ -1397,15 +1463,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial): def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = [] music_metadata = []
key_map = { key_map = {
@ -1454,10 +1511,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
try: try:
args = player_config['args'] if "args" in player_config and "ttsurl" in player_config["args"]:
caption_url = args.get('ttsurl') args = player_config['args']
if caption_url: caption_url = args['ttsurl']
timestamp = args['timestamp'] timestamp = args['timestamp']
# We get the available subtitles # We get the available subtitles
list_params = compat_urllib_parse_urlencode({ list_params = compat_urllib_parse_urlencode({
'type': 'list', 'type': 'list',
@ -1513,40 +1571,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions return captions
# New captions format as of 22.06.2017 # New captions format as of 22.06.2017
player_response = args.get('player_response') if "args" in player_config:
if player_response and isinstance(player_response, compat_str): player_response = player_config["args"].get('player_response')
player_response = self._parse_json( else:
player_response, video_id, fatal=False) # New player system (ytInitialPlayerResponse) as of October 2020
if player_response: player_response = player_config
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
caption_tracks = renderer['captionTracks'] if player_response:
for caption_track in caption_tracks: if isinstance(player_response, compat_str):
if 'kind' not in caption_track: player_response = self._parse_json(
# not an automatic transcription player_response, video_id, fatal=False)
continue
base_url = caption_track['baseUrl'] renderer = player_response['captions']['playerCaptionsTracklistRenderer']
sub_lang_list = [] caption_tracks = renderer['captionTracks']
for lang in renderer['translationLanguages']: for caption_track in caption_tracks:
lang_code = lang.get('languageCode') if 'kind' not in caption_track:
if lang_code: # not an automatic transcription
sub_lang_list.append(lang_code) continue
return make_captions(base_url, sub_lang_list) base_url = caption_track['baseUrl']
sub_lang_list = []
self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id) for lang in renderer['translationLanguages']:
return {} lang_code = lang.get('languageCode')
# Some videos don't provide ttsurl but rather caption_tracks and if lang_code:
# caption_translation_languages (e.g. 20LmZk1hakA) sub_lang_list.append(lang_code)
# Does not used anymore as of 22.06.2017 return make_captions(base_url, sub_lang_list)
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages'] self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] return {}
sub_lang_list = []
for lang in caption_translation_languages.split(','): if "args" in player_config:
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang)) args = player_config["args"]
sub_lang = lang_qs.get('lc', [None])[0]
if sub_lang: # Some videos don't provide ttsurl but rather caption_tracks and
sub_lang_list.append(sub_lang) # caption_translation_languages (e.g. 20LmZk1hakA)
return make_captions(caption_url, sub_lang_list) # Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
sub_lang_list = []
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
if sub_lang:
sub_lang_list.append(sub_lang)
return make_captions(caption_url, sub_lang_list)
# An extractor error can be raise by the download process if there are # An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles # no automatic captions but there are subtitles
except (KeyError, IndexError, ExtractorError): except (KeyError, IndexError, ExtractorError):
@ -1822,21 +1890,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: if ytplayer_config:
args = ytplayer_config['args'] args = ytplayer_config.get("args")
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): if args is not None:
# Convert to the same format returned by compat_parse_qs if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
video_info = dict((k, [v]) for k, v in args.items()) # Convert to the same format returned by compat_parse_qs
add_dash_mpd(video_info) video_info = dict((k, [v]) for k, v in args.items())
# Rental video is not rented but preview is available (e.g. add_dash_mpd(video_info)
# https://www.youtube.com/watch?v=yYr8q0y5Jfg, # Rental video is not rented but preview is available (e.g.
# https://github.com/ytdl-org/youtube-dl/issues/10532) # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
if not video_info and args.get('ypc_vid'): # https://github.com/ytdl-org/youtube-dl/issues/10532)
return self.url_result( if not video_info and args.get('ypc_vid'):
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) return self.url_result(
if args.get('livestream') == '1' or args.get('live_playback') == 1: args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
is_live = True if args.get('livestream') == '1' or args.get('live_playback') == 1:
if not player_response: is_live = True
player_response = extract_player_response(args.get('player_response'), video_id) if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response) add_dash_mpd_pr(player_response)
else: else:
@ -1866,8 +1937,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
age_gate = False age_gate = False
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: args = ytplayer_config.get("args")
args = ytplayer_config['args'] if args is not None:
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs # Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items()) video_info = dict((k, [v]) for k, v in args.items())
@ -1882,6 +1953,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = True is_live = True
if not player_response: if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id) player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response) add_dash_mpd_pr(player_response)
@ -2614,6 +2687,12 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})' _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist' IE_NAME = 'youtube:playlist'
_YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
_YTM_CHANNEL_INFO = {
'uploader': 'Youtube Music',
'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
'uploader_url': 'https://www.youtube.com/music'
}
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': { 'info_dict': {
@ -2811,10 +2890,21 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return zip(ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page)
def _extract_mix_ids_from_yt_initial(self, yt_initial):
ids = []
playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
if playlist_contents:
for item in playlist_contents:
videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
if videoId:
ids.append(videoId)
return ids
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a single video # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
ids = [] ids = []
yt_initial = None
last_id = playlist_id[-11:] last_id = playlist_id[-11:]
for n in itertools.count(1): for n in itertools.count(1):
url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
@ -2824,6 +2914,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
r'''(?xs)data-video-username=".*?".*? r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id), href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage)) webpage))
# if no ids in html of page, try using embedded json
if (len(new_ids) == 0):
yt_initial = self._get_yt_initial_data(playlist_id, webpage)
if yt_initial:
new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
# Fetch new pages until all the videos are repeated, it seems that # Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos. # there are always 51 unique videos.
new_ids = [_id for _id in new_ids if _id not in ids] new_ids = [_id for _id in new_ids if _id not in ids]
@ -2841,6 +2938,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
or search_title('title')) or search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
if not title:
title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
return self.playlist_result(url_results, playlist_id, title) return self.playlist_result(url_results, playlist_id, title)
def _extract_playlist(self, playlist_id): def _extract_playlist(self, playlist_id):
@ -2902,6 +3002,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
}) })
if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
playlist.update(self._YTM_CHANNEL_INFO)
return has_videos, playlist return has_videos, playlist
@ -2932,8 +3034,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return video return video
if playlist_id.startswith(('RD', 'UL', 'PU')): if playlist_id.startswith(('RD', 'UL', 'PU')):
# Mixes require a custom extraction process if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
return self._extract_mix(playlist_id) # Mixes require a custom extraction process,
# Youtube Music playlists act like normal playlists (with randomized order)
return self._extract_mix(playlist_id)
has_videos, playlist = self._extract_playlist(playlist_id) has_videos, playlist = self._extract_playlist(playlist_id)
if has_videos or not video_id: if has_videos or not video_id:
@ -3192,11 +3296,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
}] }]
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for # there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results # 'python' you get more than 8.000.000 results
@ -3293,11 +3393,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_SEARCH_PARAMS = 'CAI%3D' _SEARCH_PARAMS = 'CAI%3D'
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs' IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url' IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
_SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -3309,63 +3408,20 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _find_videos_in_json(self, extracted): def _process_json_dict(self, obj, videos, c):
videos = [] if "videoId" in obj:
videos.append(obj)
def _real_find(obj): return
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if "videoId" in obj:
videos.append(obj)
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return videos
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
result_items = self._find_videos_in_json(search_response)
for renderer in result_items:
video_id = try_get(renderer, lambda x: x['videoId'])
video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
if video_id is None or video_title is None:
# we do not have a videoRenderer or title extraction broke
continue
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
titles_in_page[idx] = video_title
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
def extract_videos_from_page(self, page): if "nextContinuationData" in obj:
ids_in_page = [] c["continuation"] = obj["nextContinuationData"]
titles_in_page = [] return
self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query')) query = compat_urllib_parse_unquote_plus(mobj.group('query'))
webpage = self._download_webpage(url, query) webpage = self._download_webpage(url, query)
return self.playlist_result(self._process_page(webpage), playlist_title=query) return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
@ -3387,14 +3443,12 @@ class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
'https://www.youtube.com/show/%s/playlists' % playlist_id) 'https://www.youtube.com/show/%s/playlists' % playlist_id)
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):
""" """
Base class for feed extractors Base class for feed extractors
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
""" """
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
_FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
@property @property
def IE_NAME(self): def IE_NAME(self):
@ -3403,96 +3457,35 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _find_videos_in_json(self, extracted): def _process_entries(self, entries, seen):
videos = [] new_info = []
c = {} for v in entries:
v_id = try_get(v, lambda x: x['videoId'])
def _real_find(obj): if not v_id:
if obj is None or isinstance(obj, str): continue
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if "videoId" in obj:
videos.append(obj)
return
if "nextContinuationData" in obj:
c["continuation"] = obj["nextContinuationData"]
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return videos, try_get(c, lambda x: x["continuation"])
def _entries(self, page):
info = []
yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
for page_num in itertools.count(1):
video_info, continuation = self._find_videos_in_json(search_response)
new_info = []
for v in video_info:
v_id = try_get(v, lambda x: x['videoId'])
if not v_id:
continue
have_video = False
for old in info:
if old['videoId'] == v_id:
have_video = True
break
if not have_video:
new_info.append(v)
if not new_info:
break
info.extend(new_info) have_video = False
for old in seen:
if old['videoId'] == v_id:
have_video = True
break
for video in new_info: if not have_video:
yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText'])) new_info.append(v)
if not continuation or not yt_conf: if not new_info:
break return
search_response = self._download_json( seen.extend(new_info)
'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE, for video in new_info:
'Downloading page #%s' % page_num, yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))
transform_source=uppercase_escape,
query={
"ctoken": try_get(continuation, lambda x: x["continuation"]),
"continuation": try_get(continuation, lambda x: x["continuation"]),
"itct": try_get(continuation, lambda x: x["clickTrackingParams"])
},
headers={
"X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
"X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
"X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
"X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
"X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
"X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
"X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
})
def _real_extract(self, url): def _real_extract(self, url):
page = self._download_webpage( page = self._download_webpage(
'https://www.youtube.com/feed/%s' % self._FEED_NAME, 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
self._PLAYLIST_TITLE) self._PLAYLIST_TITLE)
return self.playlist_result( return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),
self._entries(page), playlist_title=self._PLAYLIST_TITLE) playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeWatchLaterIE(YoutubePlaylistIE):

@ -0,0 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
parse_filesize,
urlencode_postdata
)
class ZoomIE(InfoExtractor):
IE_NAME = 'zoom'
_VALID_URL = r'https://(?:.*).?zoom.us/rec(?:ording)?/(play|share)/(?P<id>[A-Za-z0-9\-_.]+)'
_TEST = {
'url': 'https://zoom.us/recording/play/SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK',
'info_dict': {
'md5': '031a5b379f1547a8b29c5c4c837dccf2',
'title': "GAZ Transformational Tuesdays W/ Landon & Stapes",
'id': "SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK",
'ext': "mp4"
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
password_protected = self._search_regex(r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None)
if password_protected is not None:
self._verify_video_password(url, display_id, webpage)
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url')
title = self._html_search_regex([r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title')
viewResolvtionsWidth = self._search_regex(r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
viewResolvtionsHeight = self._search_regex(r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False)
fileSize = parse_filesize(self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False))
urlprefix = url.split("zoom.us")[0] + "zoom.us/"
formats = []
formats.append({
'url': url_or_none(video_url),
'width': int_or_none(viewResolvtionsWidth),
'height': int_or_none(viewResolvtionsHeight),
'http_headers': {'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
'Referer': urlprefix},
'ext': "mp4",
'filesize_approx': int_or_none(fileSize)
})
self._sort_formats(formats)
return {
'id': display_id,
'title': title,
'formats': formats
}
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
data = urlencode_postdata({
'id': meetId,
'passwd': password,
'action': "viewdetailedpage",
'recaptcha': ""
})
validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
validation_response = self._download_json(
validation_url, video_id,
note='Validating Password...',
errnote='Wrong password?',
data=data)
if validation_response['errorCode'] != 0:
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']))

@ -412,7 +412,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
for lang, sub_info in subtitles.items(): for lang, sub_info in subtitles.items():
sub_ext = sub_info['ext'] sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': if sub_ext == 'json':
self._downloader.to_screen('[ffmpeg] JSON subtitles cannot be embedded')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang) sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else: else:
@ -643,13 +645,18 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self._downloader.to_screen( self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue continue
elif ext == 'json':
self._downloader.to_screen(
'[ffmpeg] You have requested to convert json subtitles into another format, '
'which is currently not possible')
continue
old_file = subtitles_filename(filename, lang, ext, info.get('ext')) old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file) sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'): if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning( self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, ' '[ffmpeg] You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss') 'which results in style information loss')
dfxp_file = old_file dfxp_file = old_file

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2020.10.25' __version__ = '2020.11.11-2'

Loading…
Cancel
Save