mirror of https://github.com/yt-dlp/yt-dlp
Merge remote-tracking branch 'origin' into yt-live-from-start-range
commit
2741b5827d
@ -0,0 +1,21 @@
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.networking import RequestHandler
|
||||
from yt_dlp.networking.common import _REQUEST_HANDLERS
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def handler(request):
|
||||
RH_KEY = request.param
|
||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||
handler = RH_KEY
|
||||
elif RH_KEY in _REQUEST_HANDLERS:
|
||||
handler = _REQUEST_HANDLERS[RH_KEY]
|
||||
else:
|
||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||
|
||||
return functools.partial(handler, logger=FakeLogger)
|
@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import http.cookiejar
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from yt_dlp.downloader.external import (
|
||||
Aria2cFD,
|
||||
AxelFD,
|
||||
CurlFD,
|
||||
FFmpegFD,
|
||||
HttpieFD,
|
||||
WgetFD,
|
||||
)
|
||||
|
||||
TEST_COOKIE = {
|
||||
'version': 0,
|
||||
'name': 'test',
|
||||
'value': 'ytdlp',
|
||||
'port': None,
|
||||
'port_specified': False,
|
||||
'domain': '.example.com',
|
||||
'domain_specified': True,
|
||||
'domain_initial_dot': False,
|
||||
'path': '/',
|
||||
'path_specified': True,
|
||||
'secure': False,
|
||||
'expires': None,
|
||||
'discard': False,
|
||||
'comment': None,
|
||||
'comment_url': None,
|
||||
'rest': {},
|
||||
}
|
||||
|
||||
TEST_INFO = {'url': 'http://www.example.com/'}
|
||||
|
||||
|
||||
class TestHttpieFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = HttpieFD(ydl, {})
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['http', '--download', '--output', 'test', 'http://www.example.com/'])
|
||||
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
|
||||
|
||||
|
||||
class TestAxelFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = AxelFD(ydl, {})
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['axel', '-o', 'test', '--', 'http://www.example.com/'])
|
||||
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
|
||||
|
||||
|
||||
class TestWgetFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = WgetFD(ydl, {})
|
||||
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||
# Test cookiejar tempfile arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||
|
||||
|
||||
class TestCurlFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = CurlFD(ydl, {})
|
||||
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||
self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO))
|
||||
|
||||
|
||||
class TestAria2cFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = Aria2cFD(ydl, {})
|
||||
downloader._make_cmd('test', TEST_INFO)
|
||||
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
|
||||
|
||||
# Test cookiejar tempfile arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
cmd = downloader._make_cmd('test', TEST_INFO)
|
||||
self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
|
||||
|
||||
|
||||
@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
|
||||
class TestFFmpegFD(unittest.TestCase):
|
||||
_args = []
|
||||
|
||||
def _test_cmd(self, args):
|
||||
self._args = args
|
||||
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = FFmpegFD(ydl, {})
|
||||
downloader._debug_cmd = self._test_cmd
|
||||
|
||||
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
|
||||
'-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test cookies arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
|
||||
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test with non-url input (ffmpeg reads from stdin '-' for websockets)
|
||||
downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,500 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import gzip
|
||||
import http.cookiejar
|
||||
import http.server
|
||||
import io
|
||||
import pathlib
|
||||
import ssl
|
||||
import tempfile
|
||||
import threading
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zlib
|
||||
|
||||
from test.helper import http_server_port
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.dependencies import brotli
|
||||
from yt_dlp.utils import sanitized_Request, urlencode_postdata
|
||||
|
||||
from .helper import FakeYDL
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def _headers(self):
|
||||
payload = str(self.headers).encode('utf-8')
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _redirect(self):
|
||||
self.send_response(int(self.path[len('/redirect_'):]))
|
||||
self.send_header('Location', '/method')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
|
||||
def _method(self, method, payload=None):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', str(len(payload or '')))
|
||||
self.send_header('Method', method)
|
||||
self.end_headers()
|
||||
if payload:
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _status(self, status):
|
||||
payload = f'<html>{status} NOT FOUND</html>'.encode()
|
||||
self.send_response(int(status))
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _read_data(self):
|
||||
if 'Content-Length' in self.headers:
|
||||
return self.rfile.read(int(self.headers['Content-Length']))
|
||||
|
||||
def do_POST(self):
|
||||
data = self._read_data()
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('POST', data)
|
||||
elif self.path.startswith('/headers'):
|
||||
self._headers()
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_HEAD(self):
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('HEAD')
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_PUT(self):
|
||||
data = self._read_data()
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('PUT', data)
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/video.html':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/vid.mp4':
|
||||
payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/%c7%9f':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('GET')
|
||||
elif self.path.startswith('/headers'):
|
||||
self._headers()
|
||||
elif self.path == '/trailing_garbage':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Encoding', 'gzip')
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
compressed = buf.getvalue() + b'trailing garbage'
|
||||
self.send_header('Content-Length', str(len(compressed)))
|
||||
self.end_headers()
|
||||
self.wfile.write(compressed)
|
||||
elif self.path == '/302-non-ascii-redirect':
|
||||
new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
|
||||
self.send_response(301)
|
||||
self.send_header('Location', new_url)
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/content-encoding':
|
||||
encodings = self.headers.get('ytdl-encoding', '')
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||
if encoding == 'br' and brotli:
|
||||
payload = brotli.compress(payload)
|
||||
elif encoding == 'gzip':
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
payload = buf.getvalue()
|
||||
elif encoding == 'deflate':
|
||||
payload = zlib.compress(payload)
|
||||
elif encoding == 'unsupported':
|
||||
payload = b'raw'
|
||||
break
|
||||
else:
|
||||
self._status(415)
|
||||
return
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Encoding', encodings)
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def send_header(self, keyword, value):
|
||||
"""
|
||||
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
|
||||
This is against what is defined in RFC 3986, however we need to test we support this
|
||||
since some sites incorrectly do this.
|
||||
"""
|
||||
if keyword.lower() == 'connection':
|
||||
return super().send_header(keyword, value)
|
||||
|
||||
if not hasattr(self, '_headers_buffer'):
|
||||
self._headers_buffer = []
|
||||
|
||||
self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
|
||||
|
||||
|
||||
class FakeLogger:
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# HTTP server
|
||||
self.http_httpd = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.http_port = http_server_port(self.http_httpd)
|
||||
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
|
||||
# FIXME: we should probably stop the http server thread after each test
|
||||
# See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
|
||||
self.http_server_thread.daemon = True
|
||||
self.http_server_thread.start()
|
||||
|
||||
# HTTPS server
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.https_httpd = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
|
||||
self.https_port = http_server_port(self.https_httpd)
|
||||
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
|
||||
self.https_server_thread.daemon = True
|
||||
self.https_server_thread.start()
|
||||
|
||||
def test_nocheckcertificate(self):
|
||||
with FakeYDL({'logger': FakeLogger()}) as ydl:
|
||||
with self.assertRaises(urllib.error.URLError):
|
||||
ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
|
||||
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
|
||||
r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
self.assertEqual(r.status, 200)
|
||||
r.close()
|
||||
|
||||
def test_percent_encode(self):
|
||||
with FakeYDL() as ydl:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
|
||||
self.assertEqual(res.status, 200)
|
||||
res.close()
|
||||
# don't normalize existing percent encodings
|
||||
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
|
||||
self.assertEqual(res.status, 200)
|
||||
res.close()
|
||||
|
||||
def test_unicode_path_redirection(self):
|
||||
with FakeYDL() as ydl:
|
||||
r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
|
||||
r.close()
|
||||
|
||||
def test_redirect(self):
|
||||
with FakeYDL() as ydl:
|
||||
def do_req(redirect_status, method):
|
||||
data = b'testdata' if method in ('POST', 'PUT') else None
|
||||
res = ydl.urlopen(sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
|
||||
return res.read().decode('utf-8'), res.headers.get('method', '')
|
||||
|
||||
# A 303 must either use GET or HEAD for subsequent request
|
||||
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
|
||||
|
||||
# 301 and 302 turn POST only into a GET
|
||||
self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
|
||||
self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
|
||||
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
|
||||
|
||||
# 307 and 308 should not change method
|
||||
for m in ('POST', 'PUT'):
|
||||
self.assertEqual(do_req(307, m), ('testdata', m))
|
||||
self.assertEqual(do_req(308, m), ('testdata', m))
|
||||
|
||||
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
|
||||
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
# These should not redirect and instead raise an HTTPError
|
||||
for code in (300, 304, 305, 306):
|
||||
with self.assertRaises(urllib.error.HTTPError):
|
||||
do_req(code, 'GET')
|
||||
|
||||
def test_content_type(self):
|
||||
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
|
||||
with FakeYDL({'nocheckcertificate': True}) as ydl:
|
||||
# method should be auto-detected as POST
|
||||
r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||
|
||||
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||
|
||||
# test http
|
||||
r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||
|
||||
def test_cookiejar(self):
|
||||
with FakeYDL() as ydl:
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||
False, '/headers', True, False, None, False, None, None, {}))
|
||||
data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
|
||||
self.assertIn(b'Cookie: test=ytdlp', data)
|
||||
|
||||
def test_no_compression_compat_header(self):
|
||||
with FakeYDL() as ydl:
|
||||
data = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/headers',
|
||||
headers={'Youtubedl-no-compression': True})).read()
|
||||
self.assertIn(b'Accept-Encoding: identity', data)
|
||||
self.assertNotIn(b'youtubedl-no-compression', data.lower())
|
||||
|
||||
def test_gzip_trailing_garbage(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
|
||||
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
|
||||
with FakeYDL() as ydl:
|
||||
data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
|
||||
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
@unittest.skipUnless(brotli, 'brotli support is not installed')
|
||||
def test_brotli(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'br'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'br')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_deflate(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'deflate'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_gzip(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'gzip'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_multiple_encodings(self):
|
||||
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
|
||||
with FakeYDL() as ydl:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': pair}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), pair)
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_unsupported_encoding(self):
|
||||
# it should return the raw content
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'unsupported'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
|
||||
self.assertEqual(res.read(), b'raw')
|
||||
|
||||
|
||||
class TestClientCert(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
|
||||
cacertfn = os.path.join(self.certdir, 'ca.crt')
|
||||
self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.verify_mode = ssl.CERT_REQUIRED
|
||||
sslctx.load_verify_locations(cafile=cacertfn)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def _run_test(self, **params):
|
||||
ydl = YoutubeDL({
|
||||
'logger': FakeLogger(),
|
||||
# Disable client-side validation of unacceptable self-signed testcert.pem
|
||||
# The test is of a check on the server side, so unaffected
|
||||
'nocheckcertificate': True,
|
||||
**params,
|
||||
})
|
||||
r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
|
||||
self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
|
||||
|
||||
def test_certificate_combined_nopass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
||||
|
||||
def test_certificate_nocombined_nopass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
|
||||
client_certificate_key=os.path.join(self.certdir, 'client.key'))
|
||||
|
||||
def test_certificate_combined_pass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
client_certificate_password='foobar')
|
||||
|
||||
def test_certificate_nocombined_pass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
|
||||
client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
|
||||
client_certificate_password='foobar')
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.geo_proxy = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
geo_proxy = f'127.0.0.1:{self.geo_port}'
|
||||
ydl = YoutubeDL({
|
||||
'proxy': f'127.0.0.1:{self.port}',
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode()
|
||||
self.assertEqual(response, f'normal: {url}')
|
||||
|
||||
req = urllib.request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', geo_proxy)
|
||||
response = ydl.urlopen(req).read().decode()
|
||||
self.assertEqual(response, f'geo: {url}')
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
'proxy': f'127.0.0.1:{self.port}',
|
||||
})
|
||||
url = 'http://中文.tw/'
|
||||
response = ydl.urlopen(url).read().decode()
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||
|
||||
|
||||
class TestFileURL(unittest.TestCase):
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||
def test_file_urls(self):
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
tf.write(b'foobar')
|
||||
tf.close()
|
||||
url = pathlib.Path(tf.name).as_uri()
|
||||
with FakeYDL() as ydl:
|
||||
self.assertRaisesRegex(
|
||||
urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
|
||||
with FakeYDL({'enable_file_urls': True}) as ydl:
|
||||
res = ydl.urlopen(url)
|
||||
self.assertEqual(res.read(), b'foobar')
|
||||
res.close()
|
||||
os.unlink(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,282 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import platform
|
||||
import random
|
||||
import ssl
|
||||
import urllib.error
|
||||
import warnings
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import certifi
|
||||
from yt_dlp.networking import Response
|
||||
from yt_dlp.networking._helper import (
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
ssl_load_certs,
|
||||
)
|
||||
from yt_dlp.networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class TestNetworkingUtils:
|
||||
|
||||
def test_select_proxy(self):
|
||||
proxies = {
|
||||
'all': 'socks5://example.com',
|
||||
'http': 'http://example.com:1080',
|
||||
'no': 'bypass.example.com,yt-dl.org'
|
||||
}
|
||||
|
||||
assert select_proxy('https://example.com', proxies) == proxies['all']
|
||||
assert select_proxy('http://example.com', proxies) == proxies['http']
|
||||
assert select_proxy('http://bypass.example.com', proxies) is None
|
||||
assert select_proxy('https://yt-dl.org', proxies) is None
|
||||
|
||||
@pytest.mark.parametrize('socks_proxy,expected', [
|
||||
('socks5h://example.com', {
|
||||
'proxytype': ProxyType.SOCKS5,
|
||||
'addr': 'example.com',
|
||||
'port': 1080,
|
||||
'rdns': True,
|
||||
'username': None,
|
||||
'password': None
|
||||
}),
|
||||
('socks5://user:@example.com:5555', {
|
||||
'proxytype': ProxyType.SOCKS5,
|
||||
'addr': 'example.com',
|
||||
'port': 5555,
|
||||
'rdns': False,
|
||||
'username': 'user',
|
||||
'password': ''
|
||||
}),
|
||||
('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
|
||||
'proxytype': ProxyType.SOCKS4,
|
||||
'addr': '127.0.0.1',
|
||||
'port': 1080,
|
||||
'rdns': False,
|
||||
'username': 'u@ser',
|
||||
'password': 'pa ss'
|
||||
}),
|
||||
('socks4a://:pa%20ss@127.0.0.1', {
|
||||
'proxytype': ProxyType.SOCKS4A,
|
||||
'addr': '127.0.0.1',
|
||||
'port': 1080,
|
||||
'rdns': True,
|
||||
'username': '',
|
||||
'password': 'pa ss'
|
||||
})
|
||||
])
|
||||
def test_make_socks_proxy_opts(self, socks_proxy, expected):
|
||||
assert make_socks_proxy_opts(socks_proxy) == expected
|
||||
|
||||
def test_make_socks_proxy_unknown(self):
|
||||
with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
|
||||
make_socks_proxy_opts('socks://127.0.0.1')
|
||||
|
||||
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
|
||||
def test_load_certifi(self):
|
||||
context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context_certifi.load_verify_locations(cafile=certifi.where())
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=True)
|
||||
assert context.get_ca_certs() == context_certifi.get_ca_certs()
|
||||
|
||||
context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context_default.load_default_certs()
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=False)
|
||||
assert context.get_ca_certs() == context_default.get_ca_certs()
|
||||
|
||||
if context_default.get_ca_certs() == context_certifi.get_ca_certs():
|
||||
pytest.skip('System uses certifi as default. The test is not valid')
|
||||
|
||||
@pytest.mark.parametrize('method,status,expected', [
|
||||
('GET', 303, 'GET'),
|
||||
('HEAD', 303, 'HEAD'),
|
||||
('PUT', 303, 'GET'),
|
||||
('POST', 301, 'GET'),
|
||||
('HEAD', 301, 'HEAD'),
|
||||
('POST', 302, 'GET'),
|
||||
('HEAD', 302, 'HEAD'),
|
||||
('PUT', 302, 'PUT'),
|
||||
('POST', 308, 'POST'),
|
||||
('POST', 307, 'POST'),
|
||||
('HEAD', 308, 'HEAD'),
|
||||
('HEAD', 307, 'HEAD'),
|
||||
])
|
||||
def test_get_redirect_method(self, method, status, expected):
|
||||
assert get_redirect_method(method, status) == expected
|
||||
|
||||
@pytest.mark.parametrize('headers,supported_encodings,expected', [
|
||||
({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
|
||||
({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
|
||||
({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
|
||||
])
|
||||
def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
|
||||
headers = HTTPHeaderDict(headers)
|
||||
add_accept_encoding_header(headers, supported_encodings)
|
||||
assert headers == HTTPHeaderDict(expected)
|
||||
|
||||
|
||||
class TestInstanceStoreMixin:
|
||||
|
||||
class FakeInstanceStoreMixin(InstanceStoreMixin):
|
||||
def _create_instance(self, **kwargs):
|
||||
return random.randint(0, 1000000)
|
||||
|
||||
def _close_instance(self, instance):
|
||||
pass
|
||||
|
||||
def test_mixin(self):
|
||||
mixin = self.FakeInstanceStoreMixin()
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
|
||||
|
||||
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
|
||||
|
||||
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
|
||||
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||
|
||||
assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
|
||||
|
||||
# Different order
|
||||
assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||
|
||||
m = mixin._get_instance(t=1234)
|
||||
assert mixin._get_instance(t=1234) == m
|
||||
mixin._clear_instances()
|
||||
assert mixin._get_instance(t=1234) != m
|
||||
|
||||
|
||||
class TestNetworkingExceptions:
|
||||
|
||||
@staticmethod
|
||||
def create_response(status):
|
||||
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||
def test_http_error(self, http_error_class):
|
||||
|
||||
response = self.create_response(403)
|
||||
error = http_error_class(response)
|
||||
|
||||
assert error.status == 403
|
||||
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||
assert error.reason == response.reason
|
||||
assert error.response is response
|
||||
|
||||
data = error.response.read()
|
||||
assert data == b'test'
|
||||
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||
def test_redirect_http_error(self, http_error_class):
|
||||
response = self.create_response(301)
|
||||
error = http_error_class(response, redirect_loop=True)
|
||||
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||
assert error.reason == 'Moved Permanently'
|
||||
|
||||
def test_compat_http_error(self):
|
||||
response = self.create_response(403)
|
||||
error = _CompatHTTPError(HTTPError(response))
|
||||
assert isinstance(error, HTTPError)
|
||||
assert isinstance(error, urllib.error.HTTPError)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def raises_deprecation_warning():
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
yield
|
||||
|
||||
if len(w) == 0:
|
||||
pytest.fail('Did not raise DeprecationWarning')
|
||||
if len(w) > 1:
|
||||
pytest.fail(f'Raised multiple warnings: {w}')
|
||||
|
||||
if not issubclass(w[-1].category, DeprecationWarning):
|
||||
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
|
||||
w.clear()
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.code == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.getcode() == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.hdrs is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.info() is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.headers is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.filename == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.url == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.geturl() == error.response.url
|
||||
|
||||
# Passthrough file operations
|
||||
with raises_deprecation_warning():
|
||||
assert error.read() == b'test'
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert not error.closed
|
||||
|
||||
with raises_deprecation_warning():
|
||||
# Technically Response operations are also passed through, which should not be used.
|
||||
assert error.get_header('test') == 'test'
|
||||
|
||||
# Should not raise a warning
|
||||
error.close()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||
def test_compat_http_error_autoclose(self):
|
||||
# Compat HTTPError should not autoclose response
|
||||
response = self.create_response(403)
|
||||
_CompatHTTPError(HTTPError(response))
|
||||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(4, 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
|
||||
assert str(error) == error.msg == '4 bytes read, 3 more expected'
|
||||
assert error.partial == 4
|
||||
assert error.expected == 3
|
||||
assert error.cause == 'test'
|
||||
|
||||
error = IncompleteRead(3)
|
||||
assert repr(error) == '<IncompleteRead: 3 bytes read>'
|
||||
assert str(error) == '3 bytes read'
|
@ -0,0 +1,13 @@
|
||||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
@ -1,196 +0,0 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
@ -0,0 +1,87 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -1,56 +1,170 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||
|
||||
def _parse_vue_attributes(self, name, string, video_id):
|
||||
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||
|
||||
for key, value in attributes.items():
|
||||
if key.startswith(':'):
|
||||
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'quality': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
}
|
||||
|
@ -0,0 +1,127 @@
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BrilliantpalaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brilliantpala'
|
||||
_DOMAIN = '{subdomain}.brilliantpala.org'
|
||||
|
||||
def _initialize_pre_login(self):
|
||||
self._HOMEPAGE = f'https://{self._DOMAIN}'
|
||||
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
|
||||
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
|
||||
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
|
||||
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
|
||||
|
||||
def _get_logged_in_username(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if self._LOGIN_API == urlh.url:
|
||||
self.raise_login_required()
|
||||
return self._html_search_regex(
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
|
||||
|
||||
logged_page = self._download_webpage(
|
||||
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if self._html_search_regex(
|
||||
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
|
||||
raise ExtractorError('wrong username or password', expected=True)
|
||||
|
||||
# the maximum number of logins is one
|
||||
if self._html_search_regex(
|
||||
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
|
||||
logout_device_form = self._hidden_inputs(logged_page)
|
||||
self._download_webpage(
|
||||
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
|
||||
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
|
||||
video_id = f'{course_id}-{content_id}'
|
||||
|
||||
username = self._get_logged_in_username(url, video_id)
|
||||
|
||||
content_json = self._download_json(
|
||||
self._CONTENT_API.format(content_id=content_id), video_id,
|
||||
note='Fetching content info', errnote='Unable to fetch content info')
|
||||
|
||||
entries = []
|
||||
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
|
||||
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
entries.append({
|
||||
'id': str(stream['id']),
|
||||
'title': content_json.get('title'),
|
||||
'formats': formats,
|
||||
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
|
||||
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
|
||||
'thumbnail': content_json.get('cover_image'),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
|
||||
|
||||
|
||||
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Elearn'
|
||||
IE_DESC = 'VoD on elearn.brilliantpala.org'
|
||||
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
|
||||
'info_dict': {
|
||||
'id': '23577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Physical World, Units and Measurements - 1',
|
||||
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
|
||||
|
||||
|
||||
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Classes'
|
||||
IE_DESC = 'VoD on classes.brilliantpala.org'
|
||||
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
|
||||
'info_dict': {
|
||||
'id': '9128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Motion in a Straight Line - Class 1',
|
||||
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')
|
@ -0,0 +1,39 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
@ -0,0 +1,136 @@
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
@ -0,0 +1,136 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CineverseBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
|
||||
'cineverse.com',
|
||||
'asiancrush.com',
|
||||
'dovechannel.com',
|
||||
'screambox.com',
|
||||
'midnightpulp.com',
|
||||
'fandor.com',
|
||||
'retrocrush.tv',
|
||||
)))
|
||||
|
||||
|
||||
class CineverseIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Women Who Flirt',
|
||||
'ext': 'mp4',
|
||||
'id': 'DMR00018919',
|
||||
'modified_timestamp': 1678744575289,
|
||||
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
|
||||
'duration': 5811.597,
|
||||
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Archenemy! Crystal Bowie',
|
||||
'ext': 'mp4',
|
||||
'id': '1000000023016',
|
||||
'episode_number': 3,
|
||||
'season_number': 1,
|
||||
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
|
||||
'age_limit': 0,
|
||||
'episode': 'Episode 3',
|
||||
'season': 'Season 1',
|
||||
'duration': 1485.067,
|
||||
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
|
||||
'series': 'Space Adventure COBRA (Original Japanese)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, default={})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
countries=smuggled_data.get('geo_countries'))
|
||||
|
||||
return {
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
|
||||
**traverse_obj(idetails, {
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
'episode_number': ('details', 'episode', {int_or_none}),
|
||||
'age_limit': ('details', 'rating_code', {parse_age_limit}),
|
||||
'series': ('details', 'series_details', 'title'),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CineverseDetailsIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'title': 'Space Adventure COBRA (Original Japanese)',
|
||||
'id': '1000000023012',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
|
||||
'info_dict': {
|
||||
'id': 'NNVG4938',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hansel and Gretel',
|
||||
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
|
||||
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
|
||||
'duration': 7030.732,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, series_id = self._match_valid_url(url).group('host', 'id')
|
||||
html = self._download_webpage(url, series_id)
|
||||
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
|
||||
|
||||
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
|
||||
geoblocked = traverse_obj(pageprops, (
|
||||
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
|
||||
|
||||
def item_result(item):
|
||||
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
|
||||
if geoblocked:
|
||||
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
|
||||
return self.url_result(item_url, CineverseIE)
|
||||
|
||||
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
|
||||
if season:
|
||||
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
|
||||
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
|
||||
return item_result(pageprops['itemDetailsData'])
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue