mirror of https://github.com/yt-dlp/yt-dlp
Merge branch 'yt-dlp:master' into fix/iamf
commit
d60baa052f
@ -0,0 +1,41 @@
|
||||
name: Signature Tests
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: signature-tests-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Signature Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --only-optional --include test
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true # Print debug head
|
||||
python3 ./devscripts/run_tests.py test/test_youtube_signature.py
|
@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import datetime as dt
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import unittest
|
||||
|
||||
from yt_dlp.utils.jslib import devalue
|
||||
|
||||
|
||||
TEST_CASES_EQUALS = [{
|
||||
'name': 'int',
|
||||
'unparsed': [-42],
|
||||
'parsed': -42,
|
||||
}, {
|
||||
'name': 'str',
|
||||
'unparsed': ['woo!!!'],
|
||||
'parsed': 'woo!!!',
|
||||
}, {
|
||||
'name': 'Number',
|
||||
'unparsed': [['Object', 42]],
|
||||
'parsed': 42,
|
||||
}, {
|
||||
'name': 'String',
|
||||
'unparsed': [['Object', 'yar']],
|
||||
'parsed': 'yar',
|
||||
}, {
|
||||
'name': 'Infinity',
|
||||
'unparsed': -4,
|
||||
'parsed': math.inf,
|
||||
}, {
|
||||
'name': 'negative Infinity',
|
||||
'unparsed': -5,
|
||||
'parsed': -math.inf,
|
||||
}, {
|
||||
'name': 'negative zero',
|
||||
'unparsed': -6,
|
||||
'parsed': -0.0,
|
||||
}, {
|
||||
'name': 'RegExp',
|
||||
'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored
|
||||
'parsed': re.compile('regexp'),
|
||||
}, {
|
||||
'name': 'Date',
|
||||
'unparsed': [['Date', '2001-09-09T01:46:40.000Z']],
|
||||
'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc),
|
||||
}, {
|
||||
'name': 'Array',
|
||||
'unparsed': [[1, 2, 3], 'a', 'b', 'c'],
|
||||
'parsed': ['a', 'b', 'c'],
|
||||
}, {
|
||||
'name': 'Array (empty)',
|
||||
'unparsed': [[]],
|
||||
'parsed': [],
|
||||
}, {
|
||||
'name': 'Array (sparse)',
|
||||
'unparsed': [[-2, 1, -2], 'b'],
|
||||
'parsed': [None, 'b', None],
|
||||
}, {
|
||||
'name': 'Object',
|
||||
'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'],
|
||||
'parsed': {'foo': 'bar', 'x-y': 'z'},
|
||||
}, {
|
||||
'name': 'Set',
|
||||
'unparsed': [['Set', 1, 2, 3], 1, 2, 3],
|
||||
'parsed': [1, 2, 3],
|
||||
}, {
|
||||
'name': 'Map',
|
||||
'unparsed': [['Map', 1, 2], 'a', 'b'],
|
||||
'parsed': [['a', 'b']],
|
||||
}, {
|
||||
'name': 'BigInt',
|
||||
'unparsed': [['BigInt', '1']],
|
||||
'parsed': 1,
|
||||
}, {
|
||||
'name': 'Uint8Array',
|
||||
'unparsed': [['Uint8Array', 'AQID']],
|
||||
'parsed': [1, 2, 3],
|
||||
}, {
|
||||
'name': 'ArrayBuffer',
|
||||
'unparsed': [['ArrayBuffer', 'AQID']],
|
||||
'parsed': [1, 2, 3],
|
||||
}, {
|
||||
'name': 'str (repetition)',
|
||||
'unparsed': [[1, 1], 'a string'],
|
||||
'parsed': ['a string', 'a string'],
|
||||
}, {
|
||||
'name': 'None (repetition)',
|
||||
'unparsed': [[1, 1], None],
|
||||
'parsed': [None, None],
|
||||
}, {
|
||||
'name': 'dict (repetition)',
|
||||
'unparsed': [[1, 1], {}],
|
||||
'parsed': [{}, {}],
|
||||
}, {
|
||||
'name': 'Object without prototype',
|
||||
'unparsed': [['null']],
|
||||
'parsed': {},
|
||||
}, {
|
||||
'name': 'cross-realm POJO',
|
||||
'unparsed': [{}],
|
||||
'parsed': {},
|
||||
}]
|
||||
|
||||
TEST_CASES_IS = [{
|
||||
'name': 'bool',
|
||||
'unparsed': [True],
|
||||
'parsed': True,
|
||||
}, {
|
||||
'name': 'Boolean',
|
||||
'unparsed': [['Object', False]],
|
||||
'parsed': False,
|
||||
}, {
|
||||
'name': 'undefined',
|
||||
'unparsed': -1,
|
||||
'parsed': None,
|
||||
}, {
|
||||
'name': 'null',
|
||||
'unparsed': [None],
|
||||
'parsed': None,
|
||||
}, {
|
||||
'name': 'NaN',
|
||||
'unparsed': -3,
|
||||
'parsed': math.nan,
|
||||
}]
|
||||
|
||||
TEST_CASES_INVALID = [{
|
||||
'name': 'empty string',
|
||||
'unparsed': '',
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'hole',
|
||||
'unparsed': -2,
|
||||
'error': ValueError,
|
||||
'pattern': r'invalid integer input',
|
||||
}, {
|
||||
'name': 'string',
|
||||
'unparsed': 'hello',
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'number',
|
||||
'unparsed': 42,
|
||||
'error': ValueError,
|
||||
'pattern': r'invalid integer input',
|
||||
}, {
|
||||
'name': 'boolean',
|
||||
'unparsed': True,
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'null',
|
||||
'unparsed': None,
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'object',
|
||||
'unparsed': {},
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'empty array',
|
||||
'unparsed': [],
|
||||
'error': ValueError,
|
||||
'pattern': r'expected a non-empty list as input',
|
||||
}, {
|
||||
'name': 'Python negative indexing',
|
||||
'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7],
|
||||
'error': IndexError,
|
||||
'pattern': r'invalid index: -7',
|
||||
}]
|
||||
|
||||
|
||||
class TestDevalue(unittest.TestCase):
|
||||
def test_devalue_parse_equals(self):
|
||||
for tc in TEST_CASES_EQUALS:
|
||||
self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
|
||||
|
||||
def test_devalue_parse_is(self):
|
||||
for tc in TEST_CASES_IS:
|
||||
self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
|
||||
|
||||
def test_devalue_parse_invalid(self):
|
||||
for tc in TEST_CASES_INVALID:
|
||||
with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']):
|
||||
devalue.parse(tc['unparsed'])
|
||||
|
||||
def test_devalue_parse_cyclical(self):
|
||||
name = 'Map (cyclical)'
|
||||
result = devalue.parse([['Map', 1, 0], 'self'])
|
||||
self.assertEqual(result[0][0], 'self', name)
|
||||
self.assertIs(result, result[0][1], name)
|
||||
|
||||
name = 'Set (cyclical)'
|
||||
result = devalue.parse([['Set', 0, 1], 42])
|
||||
self.assertEqual(result[1], 42, name)
|
||||
self.assertIs(result, result[0], name)
|
||||
|
||||
result = devalue.parse([[0]])
|
||||
self.assertIs(result, result[0], 'Array (cyclical)')
|
||||
|
||||
name = 'Object (cyclical)'
|
||||
result = devalue.parse([{'self': 0}])
|
||||
self.assertIs(result, result['self'], name)
|
||||
|
||||
name = 'Object with null prototype (cyclical)'
|
||||
result = devalue.parse([['null', 'self', 0]])
|
||||
self.assertIs(result, result['self'], name)
|
||||
|
||||
name = 'Objects (cyclical)'
|
||||
result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}])
|
||||
self.assertIs(result[0], result[1]['first'], name)
|
||||
self.assertIs(result[1], result[0]['second'], name)
|
||||
|
||||
def test_devalue_parse_revivers(self):
|
||||
self.assertEqual(
|
||||
devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}),
|
||||
{'a': 'b'}, 'revivers (indirect)')
|
||||
|
||||
self.assertEqual(
|
||||
devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}),
|
||||
{'a': 0}, 'revivers (parse)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Binary file not shown.
Before Width: | Height: | Size: 3.8 KiB |
@ -1,33 +0,0 @@
|
||||
from .brightcove import BrightcoveNewBaseIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BandaiChannelIE(BrightcoveNewBaseIE):
|
||||
IE_NAME = 'bandaichannel'
|
||||
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.b-ch.com/titles/514/001',
|
||||
'md5': 'a0f2d787baa5729bed71108257f613a4',
|
||||
'info_dict': {
|
||||
'id': '6128044564001',
|
||||
'ext': 'mp4',
|
||||
'title': 'メタルファイターMIKU 第1話',
|
||||
'timestamp': 1580354056,
|
||||
'uploader_id': '5797077852001',
|
||||
'upload_date': '20200130',
|
||||
'duration': 1387.733,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
|
||||
bc = self._download_json(
|
||||
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
|
||||
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
|
||||
return self._parse_brightcove_metadata(bc, bc['id'])
|
@ -1,91 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BellMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:
|
||||
ctv|
|
||||
tsn|
|
||||
bnn(?:bloomberg)?|
|
||||
thecomedynetwork|
|
||||
discovery|
|
||||
discoveryvelocity|
|
||||
sciencechannel|
|
||||
investigationdiscovery|
|
||||
animalplanet|
|
||||
bravo|
|
||||
mtv|
|
||||
space|
|
||||
etalk|
|
||||
marilyn
|
||||
)\.ca|
|
||||
(?:much|cp24)\.com
|
||||
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||
'md5': '3e5b8e38370741d5089da79161646635',
|
||||
'info_dict': {
|
||||
'id': '1403070',
|
||||
'ext': 'flv',
|
||||
'title': 'David Cockfield\'s Top Picks',
|
||||
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||
'upload_date': '20180525',
|
||||
'timestamp': 1527288600,
|
||||
'season_id': '73997',
|
||||
'season': '2018',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
|
||||
'tags': [],
|
||||
'categories': ['ETFs'],
|
||||
'season_number': 8,
|
||||
'duration': 272.038,
|
||||
'series': 'Market Call Tonight',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cp24.com/video?clipId=1982548',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
'discoveryvelocity': 'discvel',
|
||||
'sciencechannel': 'discsci',
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
'etalk': 'ctv',
|
||||
'bnnbloomberg': 'bnn',
|
||||
'marilyn': 'ctv_marilyn',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = self._match_valid_url(url).groups()
|
||||
domain = domain.split('.')[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||
'info_dict': {
|
||||
'id': '2102249',
|
||||
'ext': 'flv',
|
||||
'title': 'Wednesday, December 23, 2020',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||
'timestamp': 1608732000,
|
||||
'upload_date': '20201223',
|
||||
'series': 'Your Morning',
|
||||
'season': '2020-2021',
|
||||
'season_number': 5,
|
||||
'episode_number': 88,
|
||||
'tags': ['Your Morning'],
|
||||
'categories': ['Talk Show'],
|
||||
'duration': 7467.126,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||
'query': '''{
|
||||
resolvedPath(path: "/%s") {
|
||||
lastSegment {
|
||||
content {
|
||||
... on AxisContent {
|
||||
axisId
|
||||
videoPlayerDestCode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % display_id, # noqa: UP031
|
||||
})['data']['resolvedPath']['lastSegment']['content']
|
||||
video_id = content['axisId']
|
||||
return self.url_result(
|
||||
'9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id),
|
||||
'NineCNineMedia', video_id)
|
@ -1,32 +1,66 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class MonsterSirenHypergryphMusicIE(InfoExtractor):
|
||||
IE_NAME = 'monstersiren'
|
||||
IE_DESC = '塞壬唱片'
|
||||
_API_BASE = 'https://monster-siren.hypergryph.com/api'
|
||||
_VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://monster-siren.hypergryph.com/music/514562',
|
||||
'info_dict': {
|
||||
'id': '514562',
|
||||
'ext': 'wav',
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'album': 'Flame Shadow',
|
||||
'title': 'Flame Shadow',
|
||||
'album': 'Flame Shadow',
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'description': 'md5:19e2acfcd1b65b41b29e8079ab948053',
|
||||
'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://monster-siren.hypergryph.com/music/514518',
|
||||
'info_dict': {
|
||||
'id': '514518',
|
||||
'ext': 'wav',
|
||||
'title': 'Heavenly Me (Instrumental)',
|
||||
'album': 'Heavenly Me',
|
||||
'artists': ['塞壬唱片-MSR', 'AIYUE blessed : 理名'],
|
||||
'description': 'md5:ce790b41c932d1ad72eb791d1d8ae598',
|
||||
'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
json_data = self._search_json(
|
||||
r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json)
|
||||
song = self._download_json(f'{self._API_BASE}/song/{audio_id}', audio_id)
|
||||
if traverse_obj(song, 'code') != 0:
|
||||
msg = traverse_obj(song, ('msg', {str}, filter))
|
||||
raise ExtractorError(
|
||||
msg or 'API returned an error response', expected=bool(msg))
|
||||
|
||||
album = None
|
||||
if album_id := traverse_obj(song, ('data', 'albumCid', {str})):
|
||||
album = self._download_json(
|
||||
f'{self._API_BASE}/album/{album_id}/detail', album_id, fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': traverse_obj(json_data, ('player', 'songDetail', 'name')),
|
||||
'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
|
||||
'ext': 'wav',
|
||||
'vcodec': 'none',
|
||||
'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
|
||||
'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')),
|
||||
**traverse_obj(song, ('data', {
|
||||
'title': ('name', {str}),
|
||||
'artists': ('artists', ..., {str}),
|
||||
'subtitles': ({'url': 'lyricUrl'}, all, {subs_list_to_dict(lang='en')}),
|
||||
'url': ('sourceUrl', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(album, ('data', {
|
||||
'album': ('name', {str}),
|
||||
'description': ('intro', {clean_html}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
@ -1,408 +0,0 @@
|
||||
import base64
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioCinemaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'jiocinema'
|
||||
_GEO_BYPASS = False
|
||||
_ACCESS_TOKEN = None
|
||||
_REFRESH_TOKEN = None
|
||||
_GUEST_TOKEN = None
|
||||
_USER_ID = None
|
||||
_DEVICE_ID = None
|
||||
_API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
|
||||
_APP_NAME = {'appName': 'RJIL_JioCinema'}
|
||||
_APP_VERSION = {'appVersion': '5.0.0'}
|
||||
_API_SIGNATURES = 'o668nxgzwff'
|
||||
_METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
|
||||
_ACCESS_HINT = 'the `accessToken` from your browser local storage'
|
||||
_LOGIN_HINT = (
|
||||
'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
|
||||
f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
|
||||
'If you have previously logged in with yt-dlp and your session '
|
||||
'has been cached, you can use "-u device -p <DEVICE_ID>"')
|
||||
|
||||
def _cache_token(self, token_type):
|
||||
assert token_type in ('access', 'refresh', 'all')
|
||||
if token_type in ('access', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
|
||||
if token_type in ('refresh', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
|
||||
|
||||
def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
|
||||
return self._download_json(
|
||||
url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
**self._API_HEADERS,
|
||||
**headers,
|
||||
}, expected_status=(400, 403, 474))
|
||||
|
||||
def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
|
||||
return self._call_api(
|
||||
f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
|
||||
None, note=note, headers=headers, data=data)
|
||||
|
||||
def _refresh_token(self):
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
|
||||
raise ExtractorError('User token has expired', expected=True)
|
||||
response = self._call_auth_api(
|
||||
'token', 'refreshtoken', 'Refreshing token',
|
||||
headers={'accesstoken': self._ACCESS_TOKEN}, data={
|
||||
**self._APP_NAME,
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'refreshToken': self._REFRESH_TOKEN,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
refresh_token = response.get('refreshTokenId')
|
||||
if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
self._cache_token('refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
self._cache_token('access')
|
||||
|
||||
def _fetch_guest_token(self):
|
||||
JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
|
||||
guest_token = self._call_auth_api(
|
||||
'token', 'guest', 'Downloading guest token', data={
|
||||
**self._APP_NAME,
|
||||
'deviceType': 'phone',
|
||||
'os': 'ios',
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'freshLaunch': False,
|
||||
'adId': self._DEVICE_ID,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
self._GUEST_TOKEN = guest_token['authToken']
|
||||
self._USER_ID = guest_token['userId']
|
||||
|
||||
def _call_login_api(self, endpoint, guest_token, data, note):
|
||||
return self._call_auth_api(
|
||||
'user', f'loginotp/{endpoint}', note, headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
**traverse_obj(guest_token, 'data', {
|
||||
'deviceType': ('deviceType', {str}),
|
||||
'os': ('os', {str}),
|
||||
})}, data=data)
|
||||
|
||||
def _is_token_expired(self, token):
|
||||
return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
|
||||
return
|
||||
|
||||
UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
if username.lower() == 'token':
|
||||
if try_call(lambda: jwt_decode_hs256(password)):
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = password
|
||||
refresh_hint = 'the `refreshToken` UUID from your browser local storage'
|
||||
refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
|
||||
if not refresh_token:
|
||||
self.to_screen(
|
||||
'To extend the life of your login session, in addition to your access token, '
|
||||
'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
|
||||
f'where REFRESH_TOKEN is {refresh_hint}')
|
||||
elif re.fullmatch(UUID_RE, refresh_token):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
else:
|
||||
self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
|
||||
|
||||
elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
|
||||
raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
|
||||
|
||||
elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
|
||||
self._fetch_guest_token()
|
||||
guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
|
||||
initial_data = {
|
||||
'number': base64.b64encode(password.encode()).decode(),
|
||||
**self._APP_VERSION,
|
||||
}
|
||||
response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
|
||||
if not traverse_obj(response, ('OTPInfo', {dict})):
|
||||
raise ExtractorError('There was a problem with the phone number login attempt')
|
||||
|
||||
is_iphone = guest_token.get('os') == 'ios'
|
||||
response = self._call_login_api('verify', guest_token, {
|
||||
'deviceInfo': {
|
||||
'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
|
||||
'info': {
|
||||
'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
|
||||
'androidId': self._DEVICE_ID,
|
||||
'type': 'iOS' if is_iphone else 'Android',
|
||||
},
|
||||
},
|
||||
**initial_data,
|
||||
'otp': self._get_tfa_info('the one-time password sent to your phone'),
|
||||
}, 'Submitting OTP')
|
||||
if traverse_obj(response, 'code') == 1043:
|
||||
raise ExtractorError('Wrong OTP', expected=True)
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
|
||||
JioCinemaBaseIE._USER_ID = user_token['userId']
|
||||
JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
|
||||
self._cache_token('all')
|
||||
if self.get_param('cachedir') is not False:
|
||||
self.to_screen(
|
||||
f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
|
||||
elif not JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
self._cache_token('access')
|
||||
self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
|
||||
if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
|
||||
class JioCinemaIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
|
||||
'info_dict': {
|
||||
'id': '3759931',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pradeep to stop the wedding?',
|
||||
'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
|
||||
'episode': 'Pradeep to stop the wedding?',
|
||||
'episode_number': 89,
|
||||
'season': 'Agnisakshi…Ek Samjhauta-S1',
|
||||
'season_number': 1,
|
||||
'series': 'Agnisakshi Ek Samjhauta',
|
||||
'duration': 1238.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'season_id': '3698031',
|
||||
'upload_date': '20230606',
|
||||
'timestamp': 1686009600,
|
||||
'release_date': '20230607',
|
||||
'genres': ['Drama'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
|
||||
'info_dict': {
|
||||
'id': '3754021',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bhediya',
|
||||
'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
|
||||
'episode': 'Bhediya',
|
||||
'duration': 8500.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'upload_date': '20230525',
|
||||
'timestamp': 1685026200,
|
||||
'release_date': '20230524',
|
||||
'genres': ['Comedy'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _extract_formats_and_subtitles(self, playback, video_id):
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
|
||||
if not m3u8_url: # DRM-only content only serves dash urls
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
|
||||
'formats': traverse_obj(formats, (
|
||||
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
|
||||
self._fetch_guest_token()
|
||||
elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
playback = self._call_api(
|
||||
f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
|
||||
'Downloading playback JSON', headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
'deviceid': self._DEVICE_ID,
|
||||
'uniqueid': self._USER_ID,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
'x-platform': 'androidweb',
|
||||
'x-platform-token': 'web',
|
||||
}, data={
|
||||
'4k': False,
|
||||
'ageGroup': '18+',
|
||||
'appVersion': '3.4.0',
|
||||
'bitrateProfile': 'xhdpi',
|
||||
'capability': {
|
||||
'drmCapability': {
|
||||
'aesSupport': 'yes',
|
||||
'fairPlayDrmSupport': 'none',
|
||||
'playreadyDrmSupport': 'none',
|
||||
'widevineDRMSupport': 'none',
|
||||
},
|
||||
'frameRateCapability': [{
|
||||
'frameRateSupport': '30fps',
|
||||
'videoQuality': '1440p',
|
||||
}],
|
||||
},
|
||||
'continueWatchingRequired': False,
|
||||
'dolby': False,
|
||||
'downloadRequest': False,
|
||||
'hevc': False,
|
||||
'kidsSafe': False,
|
||||
'manufacturer': 'Windows',
|
||||
'model': 'Windows',
|
||||
'multiAudioRequired': True,
|
||||
'osVersion': '10',
|
||||
'parentalPinValid': True,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
})
|
||||
|
||||
status_code = traverse_obj(playback, ('code', {int}))
|
||||
if status_code == 474:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
elif status_code == 1008:
|
||||
error_msg = 'This content is only available for premium users'
|
||||
if self._ACCESS_TOKEN:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
|
||||
elif status_code == 400:
|
||||
raise ExtractorError('The requested content is not available', expected=True)
|
||||
elif status_code is not None and status_code != 200:
|
||||
raise ExtractorError(
|
||||
f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
|
||||
video_id, fatal=False, query={
|
||||
'ids': f'include:{video_id}',
|
||||
'responseType': 'common',
|
||||
'devicePlatformType': 'desktop',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'http_headers': self._API_HEADERS,
|
||||
**self._extract_formats_and_subtitles(playback, video_id),
|
||||
**traverse_obj(playback, ('data', {
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
'release_date': ('telecastDate', {str}),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'genres': ('genres', ..., {str}),
|
||||
'thumbnail': ('seo', 'ogImage', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
|
||||
'info_dict': {
|
||||
'id': '3499917',
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||
'info_dict': {
|
||||
'id': '3499820',
|
||||
'title': 'mtv-splitsvilla-x5',
|
||||
},
|
||||
'playlist_mincount': 310,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||
'trayTabs', lambda _, v: v['id']))
|
||||
|
||||
for season_num, season in enumerate(seasons, start=1):
|
||||
season_id = season['id']
|
||||
label = season.get('label') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
season_id, f'Downloading season {label} page {page_num} JSON', query={
|
||||
'sort': 'episode:asc',
|
||||
'id': season_id,
|
||||
'responseType': 'common',
|
||||
'page': page_num,
|
||||
}), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
|
||||
if not episodes:
|
||||
break
|
||||
for episode in episodes:
|
||||
yield self.url_result(
|
||||
episode['slug'], JioCinemaIE, **traverse_obj(episode, {
|
||||
'video_id': 'id',
|
||||
'video_title': ('fullTitle', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, series_id = self._match_valid_url(url).group('slug', 'id')
|
||||
return self.playlist_result(self._entries(series_id), series_id, slug)
|
@ -1,358 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
|
||||
def smuggle(url):
|
||||
return smuggle_url(url, {'source_url': url})
|
||||
|
||||
entries = []
|
||||
for kind, video_id in re.findall(
|
||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle(f'limelight:{lm[kind]}:{video_id}'),
|
||||
f'Limelight{kind}', video_id))
|
||||
for mobj in re.finditer(
|
||||
# As per [1] class attribute should be exactly equal to
|
||||
# LimelightEmbeddedPlayerFlash but numerous examples seen
|
||||
# that don't exactly match it (e.g. [2]).
|
||||
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
|
||||
# 2. http://www.sedona.com/FacilitatorTraining2017
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
|
||||
''', webpage):
|
||||
kind, video_id = mobj.group('kind'), mobj.group('id')
|
||||
entries.append(cls.url_result(
|
||||
smuggle(f'limelight:{kind}:{video_id}'),
|
||||
f'Limelight{kind.capitalize()}', video_id))
|
||||
# http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
|
||||
for video_id in re.findall(
|
||||
r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle(f'limelight:media:{video_id}'),
|
||||
LimelightMediaIE.ie_key(), video_id))
|
||||
return entries
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
try:
|
||||
return self._download_json(
|
||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||
item_id, f'Downloading PlaylistService {method} JSON',
|
||||
fatal=fatal, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||
if error == 'CountryDisabled':
|
||||
self.raise_geo_restricted()
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
def _extract(self, item_id, pc_method, mobile_method, referer=None):
|
||||
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
|
||||
mobile = self._call_playlist_service(
|
||||
item_id, mobile_method, fatal=False, referer=referer)
|
||||
return pc, mobile
|
||||
|
||||
def _extract_info(self, pc, mobile, i, referer):
|
||||
get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
|
||||
pc_item = get_item(pc, 'playlistItems')
|
||||
mobile_item = get_item(mobile, 'mediaList')
|
||||
video_id = pc_item.get('mediaId') or mobile_item['mediaId']
|
||||
title = pc_item.get('title') or mobile_item['title']
|
||||
|
||||
formats = []
|
||||
urls = []
|
||||
for stream in pc_item.get('streams', []):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream_url in urls:
|
||||
continue
|
||||
if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
|
||||
continue
|
||||
urls.append(stream_url)
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'url': stream_url,
|
||||
'abr': float_or_none(stream.get('audioBitRate')),
|
||||
'fps': float_or_none(stream.get('videoFrameRate')),
|
||||
'ext': ext,
|
||||
}
|
||||
width = int_or_none(stream.get('videoWidthInPixels'))
|
||||
height = int_or_none(stream.get('videoHeightInPixels'))
|
||||
vbr = float_or_none(stream.get('videoBitRate'))
|
||||
if width or height or vbr:
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
fmt['vcodec'] = 'none'
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
|
||||
if rtmp:
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_format_id = format_id.replace('rtmp', 'http')
|
||||
|
||||
CDN_HOSTS = (
|
||||
('delvenetworks.com', 'cpl.delvenetworks.com'),
|
||||
('video.llnw.net', 's2.content.video.llnw.net'),
|
||||
)
|
||||
for cdn_host, http_host in CDN_HOSTS:
|
||||
if cdn_host not in rtmp.group('host').lower():
|
||||
continue
|
||||
http_url = 'http://{}/{}'.format(http_host, rtmp.group('playpath')[4:])
|
||||
urls.append(http_url)
|
||||
if self._is_valid_url(http_url, video_id, http_format_id):
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': http_url,
|
||||
'format_id': http_format_id,
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
break
|
||||
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
for mobile_url in mobile_item.get('mobileUrls', []):
|
||||
media_url = mobile_url.get('mobileUrl')
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if not media_url or media_url in urls:
|
||||
continue
|
||||
if (format_id in ('Widevine', 'SmoothStreaming')
|
||||
and not self.get_param('allow_unplayable_formats', False)):
|
||||
continue
|
||||
urls.append(media_url)
|
||||
ext = determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': format_id,
|
||||
'quality': -10,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for flag in mobile_item.get('flags'):
|
||||
if flag == 'ClosedCaptions':
|
||||
closed_captions = self._call_playlist_service(
|
||||
video_id, 'getClosedCaptionsDetailsByMediaId',
|
||||
False, referer) or []
|
||||
for cc in closed_captions:
|
||||
cc_url = cc.get('webvttFileUrl')
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
break
|
||||
|
||||
get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_meta('description'),
|
||||
'formats': formats,
|
||||
'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
|
||||
'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:media:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bmediaId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
||||
'info_dict': {
|
||||
'id': '3ffd040b522b4485b6d84effc750cd86',
|
||||
'ext': 'mp4',
|
||||
'title': 'HaP and the HB Prince Trailer',
|
||||
'description': 'md5:8005b944181778e313d95c1237ddb640',
|
||||
'thumbnail': r're:^https?://.*\.jpeg$',
|
||||
'duration': 144.23,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video with subtitles
|
||||
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
|
||||
'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
|
||||
'info_dict': {
|
||||
'id': 'a3e00274d4564ec4a9b29b9466432335',
|
||||
'ext': 'mp4',
|
||||
'title': '3Play Media Overview Video',
|
||||
'thumbnail': r're:^https?://.*\.jpeg$',
|
||||
'duration': 78.101,
|
||||
# TODO: extract all languages that were accessible via API
|
||||
# 'subtitles': 'mincount:9',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'media'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
source_url = smuggled_data.get('source_url')
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
pc, mobile = self._extract(
|
||||
video_id, 'getPlaylistByMediaId',
|
||||
'getMobilePlaylistByMediaId', source_url)
|
||||
|
||||
return self._extract_info(pc, mobile, 0, source_url)
|
||||
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:channel:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bchannelId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'info_dict': {
|
||||
'id': 'ab6a524c379342f9b23642917020c082',
|
||||
'title': 'Javascript Sample Code',
|
||||
'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
channel_id = self._match_id(url)
|
||||
source_url = smuggled_data.get('source_url')
|
||||
|
||||
pc, mobile = self._extract(
|
||||
channel_id, 'getPlaylistByChannelId',
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
|
||||
source_url)
|
||||
|
||||
entries = [
|
||||
self._extract_info(pc, mobile, i, source_url)
|
||||
for i in range(len(pc['playlistItems']))]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, pc.get('title'), mobile.get('description'))
|
||||
|
||||
|
||||
class LimelightChannelListIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel_list'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:channel_list:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bchannelListId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'info_dict': {
|
||||
'id': '301b117890c4465c8179ede21fd92e2b',
|
||||
'title': 'Website - Hero Player',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel_list'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_list_id = self._match_id(url)
|
||||
|
||||
channel_list = self._call_playlist_service(
|
||||
channel_list_id, 'getMobileChannelListById')
|
||||
|
||||
entries = [
|
||||
self.url_result('limelight:channel:{}'.format(channel['id']), 'LimelightChannel')
|
||||
for channel in channel_list['channelList']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_list_id, channel_list['title'])
|
@ -0,0 +1,134 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none, urlhandle_detect_ext
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MixlrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/events/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://suncity-104-9fm.mixlr.com/events/4387115',
|
||||
'info_dict': {
|
||||
'id': '4387115',
|
||||
'ext': 'mp3',
|
||||
'title': r're:SUNCITY 104.9FM\'s live audio \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'uploader': 'suncity-104-9fm',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/cd5b34d05fa2cee72d80477724a2f02e.png',
|
||||
'timestamp': 1751943773,
|
||||
'upload_date': '20250708',
|
||||
'release_timestamp': 1751943764,
|
||||
'release_date': '20250708',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://brcountdown.mixlr.com/events/4395480',
|
||||
'info_dict': {
|
||||
'id': '4395480',
|
||||
'ext': 'aac',
|
||||
'title': r're:Beats Revolution Countdown Episodio 461 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'md5:5cacd089723f7add3f266bd588315bb3',
|
||||
'uploader': 'brcountdown',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/c48727a59f690b87a55d47d123ba0d6d.jpg',
|
||||
'timestamp': 1752354007,
|
||||
'upload_date': '20250712',
|
||||
'release_timestamp': 1752354000,
|
||||
'release_date': '20250712',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.brcountdown.mixlr.com/events/4395480',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, event_id = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
broadcast_info = self._download_json(
|
||||
f'https://api.mixlr.com/v3/channels/{username}/events/{event_id}', event_id)
|
||||
|
||||
formats = []
|
||||
format_url = traverse_obj(
|
||||
broadcast_info, ('included', 0, 'attributes', 'progressive_stream_url', {url_or_none}))
|
||||
if format_url:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), event_id, fatal=False, note='Checking stream')
|
||||
if urlh and urlh.status == 200:
|
||||
ext = urlhandle_detect_ext(urlh)
|
||||
if ext == 'octet-stream':
|
||||
self.report_warning(
|
||||
'The server did not return a valid file extension for the stream URL. '
|
||||
'Assuming an mp3 stream; postprocessing may fail if this is incorrect')
|
||||
ext = 'mp3'
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
release_timestamp = traverse_obj(
|
||||
broadcast_info, ('data', 'attributes', 'starts_at', {str}))
|
||||
if not formats and release_timestamp:
|
||||
self.raise_no_formats(f'This event will start at {release_timestamp}', expected=True)
|
||||
|
||||
return {
|
||||
'id': event_id,
|
||||
'uploader': username,
|
||||
'formats': formats,
|
||||
'release_timestamp': parse_iso8601(release_timestamp),
|
||||
**traverse_obj(broadcast_info, ('included', 0, 'attributes', {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('started_at', {parse_iso8601}),
|
||||
'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
|
||||
'like_count': ('heart_count', {int_or_none}),
|
||||
'is_live': ('live', {bool}),
|
||||
})),
|
||||
**traverse_obj(broadcast_info, ('data', 'attributes', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('started_at', {parse_iso8601}),
|
||||
'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
|
||||
'like_count': ('heart_count', {int_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'uploader_id': ('broadcaster_id', {str}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class MixlrRecoringIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/recordings/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://biblewayng.mixlr.com/recordings/2375193',
|
||||
'info_dict': {
|
||||
'id': '2375193',
|
||||
'ext': 'mp3',
|
||||
'title': "God's Jewels and Their Resting Place Bro. Adeniji",
|
||||
'description': 'Preached February 21, 2024 in the evening',
|
||||
'uploader_id': '8659190',
|
||||
'duration': 10968,
|
||||
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/ceca120ef707f642abeea6e29cd74238.jpg',
|
||||
'timestamp': 1708544542,
|
||||
'upload_date': '20240221',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, recording_id = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
recording_info = self._download_json(
|
||||
f'https://api.mixlr.com/v3/channels/{username}/recordings/{recording_id}', recording_id)
|
||||
|
||||
return {
|
||||
'id': recording_id,
|
||||
**traverse_obj(recording_info, ('data', 'attributes', {
|
||||
'ext': ('file_format', {str}),
|
||||
'url': ('url', {url_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'uploader_id': ('user_id', {str}),
|
||||
})),
|
||||
}
|
@ -1,53 +1,72 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NewsPicksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
||||
|
||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
||||
'url': 'https://newspicks.com/movie-series/11/?movieId=1813',
|
||||
'info_dict': {
|
||||
'id': '1813',
|
||||
'ext': 'mp4',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'cast': 'count:4',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'channel': 'HORIE ONE',
|
||||
'channel_id': '11',
|
||||
'duration': 2940,
|
||||
'release_date': '20220117',
|
||||
'thumbnail': r're:https://.+jpg',
|
||||
'release_timestamp': 1642424400,
|
||||
'series': 'HORIE ONE',
|
||||
'series_id': '11',
|
||||
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||
'timestamp': 1642424420,
|
||||
'upload_date': '20220117',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://newspicks.com/movie-series/158/?movieId=3932',
|
||||
'info_dict': {
|
||||
'id': '3932',
|
||||
'ext': 'mp4',
|
||||
'title': '【検証】専門家は、KADOKAWAをどう見るか',
|
||||
'cast': 'count:3',
|
||||
'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d',
|
||||
'duration': 1320,
|
||||
'release_date': '20240622',
|
||||
'release_timestamp': 1719088080,
|
||||
'series': 'NPレポート',
|
||||
'series_id': '158',
|
||||
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||
'timestamp': 1719086400,
|
||||
'upload_date': '20240622',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
||||
series_id = self._match_id(url)
|
||||
video_id = traverse_obj(parse_qs(url), ('movieId', -1, {str}, {require('movie ID')}))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
||||
if not entries:
|
||||
raise ExtractorError('No HTML5 media elements found')
|
||||
info = entries[0]
|
||||
|
||||
title = self._html_search_meta('og:title', webpage, fatal=False)
|
||||
description = self._html_search_meta(
|
||||
('og:description', 'twitter:title'), webpage, fatal=False)
|
||||
channel = self._html_search_regex(
|
||||
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
||||
if not title or not channel:
|
||||
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
||||
|
||||
release_date = self._search_regex(
|
||||
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
||||
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
||||
fragment = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['fragment']
|
||||
m3u8_url = traverse_obj(fragment, ('movie', 'movieUrl', {url_or_none}, {require('m3u8 URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
|
||||
info.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
||||
})
|
||||
return info
|
||||
'formats': formats,
|
||||
'series': traverse_obj(fragment, ('series', 'title', {str})),
|
||||
'series_id': series_id,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(fragment, ('movie', {
|
||||
'title': ('title', {str}),
|
||||
'cast': ('relatedUsers', ..., 'displayName', {str}, filter, all, filter),
|
||||
'description': ('explanation', {clean_html}),
|
||||
'release_timestamp': ('onAirStartDate', {parse_iso8601}),
|
||||
'thumbnail': (('image', 'coverImageUrl'), {url_or_none}, any),
|
||||
'timestamp': ('published', {parse_iso8601}),
|
||||
})),
|
||||
}
|
||||
|
@ -1,59 +1,57 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
get_element_by_attribute,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
update_url_query,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
update_url,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class NobelPrizeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'md5': '04c81e5714bb36cc4e2232fee1d8157f',
|
||||
_VALID_URL = r'https?://(?:(?:mediaplayer|www)\.)?nobelprize\.org/mediaplayer/'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'info_dict': {
|
||||
'id': '2636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Announcement of the 2016 Nobel Prize in Physics',
|
||||
'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
|
||||
'description': 'md5:1a2d8a6ca80c88fb3b9a326e0b0e8e43',
|
||||
'duration': 1560.0,
|
||||
'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg',
|
||||
'timestamp': 1504883793,
|
||||
'upload_date': '20170908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mediaplayer.nobelprize.org/mediaplayer/?qid=12693',
|
||||
'info_dict': {
|
||||
'id': '12693',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nobel Lecture by Peter Higgs',
|
||||
'description': 'md5:9b12e275dbe3a8138484e70e00673a05',
|
||||
'duration': 1800.0,
|
||||
'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg',
|
||||
'timestamp': 1504883793,
|
||||
'upload_date': '20170908',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'(?s)var\s*config\s*=\s*({.+?});', webpage,
|
||||
'config'), video_id, js_to_json)['media']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for source in media.get('source', []):
|
||||
source_src = source.get('src')
|
||||
if not source_src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(source_src, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_src,
|
||||
})
|
||||
video_id = traverse_obj(parse_qs(url), (
|
||||
('id', 'qid'), -1, {int_or_none}, {str_or_none}, any))
|
||||
if not video_id:
|
||||
raise UnsupportedError(url)
|
||||
webpage = self._download_webpage(
|
||||
update_url(url, netloc='mediaplayer.nobelprize.org'), video_id)
|
||||
|
||||
return {
|
||||
**self._search_json_ld(webpage, video_id),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('itemprop', 'description', webpage),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'formats': formats,
|
||||
'title': self._html_search_meta('caption', webpage),
|
||||
'description': traverse_obj(webpage, (
|
||||
{find_element(tag='span', attr='itemprop', value='description')}, {clean_html})),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
}
|
||||
|
@ -1,100 +0,0 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class NoovoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
|
||||
'info_dict': {
|
||||
'id': '5386045029001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chrysler Imperial',
|
||||
'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
|
||||
'timestamp': 1491399228,
|
||||
'upload_date': '20170405',
|
||||
'uploader_id': '618566855001',
|
||||
'series': 'RPM+',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
|
||||
'info_dict': {
|
||||
'id': '5395865725001',
|
||||
'title': 'Épisode 13 : Les retrouvailles',
|
||||
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492019320,
|
||||
'upload_date': '20170412',
|
||||
'uploader_id': '618566855001',
|
||||
'series': "L'amour est dans le pré",
|
||||
'season_number': 5,
|
||||
'episode': 'Épisode 13',
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
title = try_get(
|
||||
data, lambda x: x['video']['nom'],
|
||||
str) or self._html_search_meta(
|
||||
'dcterms.Title', webpage, 'title', fatal=True)
|
||||
|
||||
description = self._html_search_meta(
|
||||
('dcterms.Description', 'description'), webpage, 'description')
|
||||
|
||||
series = try_get(
|
||||
data, lambda x: x['emission']['nom']) or self._search_regex(
|
||||
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
|
||||
webpage, 'series', default=None)
|
||||
|
||||
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
|
||||
season = try_get(season_el, lambda x: x['nom'], str)
|
||||
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
|
||||
|
||||
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
|
||||
episode = try_get(episode_el, lambda x: x['nom'], str)
|
||||
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['CA']}),
|
||||
'id': brightcove_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
}
|
@ -1,55 +1,82 @@
|
||||
from .common import InfoExtractor
|
||||
from .streaks import StreaksBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NTVCoJpCUIE(InfoExtractor):
|
||||
class NTVCoJpCUIE(StreaksBaseIE):
|
||||
IE_NAME = 'cu.ntv.co.jp'
|
||||
IE_DESC = 'Nippon Television Network'
|
||||
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
|
||||
IE_DESC = '日テレ無料TADA!'
|
||||
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program-list|search)(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cu.ntv.co.jp/gaki_20250525/',
|
||||
'info_dict': {
|
||||
'id': '5978891207001',
|
||||
'id': 'gaki_20250525',
|
||||
'ext': 'mp4',
|
||||
'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
|
||||
'upload_date': '20181213',
|
||||
'description': 'md5:1985b51a9abc285df0104d982a325f2a',
|
||||
'uploader_id': '3855502814001',
|
||||
'timestamp': 1544669941,
|
||||
'title': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
|
||||
'cast': 'count:2',
|
||||
'description': 'md5:1e1db556224d627d4d2f74370c650927',
|
||||
'display_id': 'ref:gaki_20250525',
|
||||
'duration': 1450,
|
||||
'episode': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
|
||||
'episode_id': '000000010172808',
|
||||
'episode_number': 255,
|
||||
'genres': ['variety'],
|
||||
'live_status': 'not_live',
|
||||
'modified_date': '20250525',
|
||||
'modified_timestamp': 1748145537,
|
||||
'release_date': '20250525',
|
||||
'release_timestamp': 1748145539,
|
||||
'series': 'ダウンタウンのガキの使いやあらへんで!',
|
||||
'series_id': 'gaki',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1748145197,
|
||||
'upload_date': '20250525',
|
||||
'uploader': '日本テレビ放送網',
|
||||
'uploader_id': '0x7FE2',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_config = self._search_nuxt_data(webpage, display_id)
|
||||
video_id = traverse_obj(player_config, ('movie', 'video_id'))
|
||||
if not video_id:
|
||||
raise ExtractorError('Failed to extract video ID for Brightcove')
|
||||
account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
|
||||
title = traverse_obj(player_config, ('movie', 'name'))
|
||||
if not title:
|
||||
og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
|
||||
if og_title:
|
||||
title = og_title.split('(', 1)[0].strip()
|
||||
description = (traverse_obj(player_config, ('movie', 'description'))
|
||||
or self._html_search_meta(['description', 'og:description'], webpage))
|
||||
|
||||
info = self._search_json(
|
||||
r'window\.app\s*=', webpage, 'video info',
|
||||
display_id)['falcorCache']['catalog']['episode'][display_id]['value']
|
||||
media_id = traverse_obj(info, (
|
||||
'streaks_data', 'mediaid', {str_or_none}, {require('Streaks media ID')}))
|
||||
non_phonetic = (lambda _, v: v['is_phonetic'] is False, 'value', {str})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
**self._extract_from_streaks_api('ntv-tada', media_id, headers={
|
||||
'X-Streaks-Api-Key': 'df497719056b44059a0483b8faad1f4a',
|
||||
}),
|
||||
**traverse_obj(info, {
|
||||
'id': ('content_id', {str_or_none}),
|
||||
'title': ('title', *non_phonetic, any),
|
||||
'age_limit': ('is_adult_only_content', {lambda x: 18 if x else None}),
|
||||
'cast': ('credit', ..., 'name', *non_phonetic),
|
||||
'genres': ('genre', ..., {str}),
|
||||
'release_timestamp': ('pub_date', {parse_iso8601}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('artwork', ..., 'url', any, {url_or_none}),
|
||||
}),
|
||||
**traverse_obj(info, ('tv_episode_info', {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int}),
|
||||
'series': ('parent_show_title', *non_phonetic, any),
|
||||
'series_id': ('show_content_id', {str}),
|
||||
})),
|
||||
**traverse_obj(info, ('custom_data', {
|
||||
'description': ('program_detail', {str}),
|
||||
'episode': ('episode_title', {str}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
'uploader': ('network_name', {str}),
|
||||
'uploader_id': ('network_id', {str}),
|
||||
})),
|
||||
}
|
||||
|
@ -0,0 +1,41 @@
|
||||
from .floatplane import FloatplaneBaseIE
|
||||
|
||||
|
||||
class SaucePlusIE(FloatplaneBaseIE):
|
||||
IE_DESC = 'Sauce+'
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/post/(?P<id>\w+)'
|
||||
_BASE_URL = 'https://www.sauceplus.com'
|
||||
_HEADERS = {
|
||||
'Origin': _BASE_URL,
|
||||
'Referer': f'{_BASE_URL}/',
|
||||
}
|
||||
_IMPERSONATE_TARGET = True
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sauceplus.com/post/YbBwIa2A5g',
|
||||
'info_dict': {
|
||||
'id': 'eit4Ugu5TL',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'YbBwIa2A5g',
|
||||
'title': 'Scare the Coyote - Episode 3',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2975,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'release_date': '20250627',
|
||||
'release_timestamp': 1750993500,
|
||||
'uploader': 'Scare The Coyote',
|
||||
'uploader_id': '683e0a3269688656a5a49a44',
|
||||
'uploader_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home',
|
||||
'channel': 'Scare The Coyote',
|
||||
'channel_id': '683e0a326968866ceba49a45',
|
||||
'channel_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home/main',
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'):
|
||||
self.raise_login_required()
|
@ -1,57 +1,102 @@
|
||||
from .ard import ARDMediathekBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class SRMediathekIE(ARDMediathekBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'sr:mediathek'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_CLS_COMMON = 'teaser__image__caption__text teaser__image__caption__text--'
|
||||
_VALID_URL = r'https?://(?:www\.)?sr-mediathek\.de/index\.php\?.*?&id=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
|
||||
'url': 'https://www.sr-mediathek.de/index.php?seite=7&id=141317',
|
||||
'info_dict': {
|
||||
'id': '28455',
|
||||
'id': '141317',
|
||||
'ext': 'mp4',
|
||||
'title': 'sportarena (26.10.2014)',
|
||||
'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Kärnten, da will ich hin!',
|
||||
'channel': 'SR Fernsehen',
|
||||
'description': 'md5:7732e71e803379a499732864a572a456',
|
||||
'duration': 1788.0,
|
||||
'release_date': '20250525',
|
||||
'series': 'da will ich hin!',
|
||||
'series_id': 'DWIH',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
}, {
|
||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
|
||||
'url': 'https://www.sr-mediathek.de/index.php?seite=7&id=153853',
|
||||
'info_dict': {
|
||||
'id': '37682',
|
||||
'ext': 'mp4',
|
||||
'title': 'Love, Cakes and Rock\'n\'Roll',
|
||||
'description': 'md5:18bf9763631c7d326c22603681e1123d',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': '153853',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kappes, Klöße, Kokosmilch: Bruschetta mit Nduja',
|
||||
'channel': 'SR 3',
|
||||
'description': 'md5:3935798de3562b10c4070b408a15e225',
|
||||
'duration': 139.0,
|
||||
'release_date': '20250523',
|
||||
'series': 'Kappes, Klöße, Kokosmilch',
|
||||
'series_id': 'SR3_KKK_A',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.sr-mediathek.de/index.php?seite=7&id=31406&pnr=&tbl=pf',
|
||||
'info_dict': {
|
||||
'id': '31406',
|
||||
'ext': 'mp3',
|
||||
'title': 'Das Leben schwer nehmen, ist einfach zu anstrengend',
|
||||
'channel': 'SR 1',
|
||||
'description': 'md5:3e03fd556af831ad984d0add7175fb0c',
|
||||
'duration': 1769.0,
|
||||
'release_date': '20230717',
|
||||
'series': 'Abendrot',
|
||||
'series_id': 'SR1_AB_P',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage:
|
||||
if description == 'Der gewünschte Beitrag ist leider nicht mehr vorhanden.':
|
||||
raise ExtractorError(f'Video {video_id} is no longer available', expected=True)
|
||||
|
||||
media_collection_url = self._search_regex(
|
||||
r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
|
||||
info = self._extract_media_info(media_collection_url, webpage, video_id)
|
||||
info.update({
|
||||
player_url = traverse_obj(webpage, (
|
||||
{find_element(tag='div', id=f'player{video_id}', html=True)},
|
||||
{extract_attributes}, 'data-mediacollection-ardplayer',
|
||||
{self._proto_relative_url}, {require('player URL')}))
|
||||
article = traverse_obj(webpage, (
|
||||
{find_element(cls='article__content')},
|
||||
{find_element(tag='p')}, {clean_html}))
|
||||
|
||||
return {
|
||||
**self._extract_media_info(player_url, webpage, video_id),
|
||||
'id': video_id,
|
||||
'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(webpage, (
|
||||
{find_element(cls='ardplayer-title')}, {clean_html})),
|
||||
'channel': traverse_obj(webpage, (
|
||||
{find_element(cls=f'{self._CLS_COMMON}subheadline')},
|
||||
{lambda x: x.split('|')[0]}, {clean_html})),
|
||||
'description': description,
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'(\d{2}:\d{2}:\d{2})', article, 'duration')),
|
||||
'release_date': unified_strdate(self._search_regex(
|
||||
r'(\d{2}\.\d{2}\.\d{4})', article, 'release_date')),
|
||||
'series': traverse_obj(webpage, (
|
||||
{find_element(cls=f'{self._CLS_COMMON}headline')}, {clean_html})),
|
||||
'series_id': traverse_obj(webpage, (
|
||||
{find_element(cls='teaser__link', html=True)},
|
||||
{extract_attributes}, 'href', {parse_qs}, 'sen', ..., {str}, any)),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
})
|
||||
return info
|
||||
}
|
||||
|
@ -1,76 +1,76 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, urljoin
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_iso8601,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class StarTrekIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?P<base>https?://(?:intl|www)\.startrek\.com)/videos/(?P<id>[^/]+)'
|
||||
IE_NAME = 'startrek'
|
||||
IE_DESC = 'STAR TREK'
|
||||
_VALID_URL = r'https?://(?:www\.)?startrek\.com(?:/en-(?:ca|un))?/videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
|
||||
'md5': '491df5035c9d4dc7f63c79caaf9c839e',
|
||||
'url': 'https://www.startrek.com/en-un/videos/official-trailer-star-trek-lower-decks-season-4',
|
||||
'info_dict': {
|
||||
'id': 'watch-welcoming-jess-bush-to-the-ready-room',
|
||||
'id': 'official-trailer-star-trek-lower-decks-season-4',
|
||||
'ext': 'mp4',
|
||||
'title': 'WATCH: Welcoming Jess Bush to The Ready Room',
|
||||
'duration': 1888,
|
||||
'timestamp': 1655388000,
|
||||
'upload_date': '20220616',
|
||||
'description': 'md5:1ffee884e3920afbdd6dd04e926a1221',
|
||||
'thumbnail': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14794_rr_thumb_107_yt_16x9\.jpg(?:\?.+)?',
|
||||
'subtitles': {'en-US': [{
|
||||
'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_107_v4\.vtt',
|
||||
}, {
|
||||
'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8',
|
||||
}]},
|
||||
'title': 'Official Trailer | Star Trek: Lower Decks - Season 4',
|
||||
'alt_title': 'md5:dd7e3191aaaf9e95db16fc3abd5ef68b',
|
||||
'categories': ['TRAILERS'],
|
||||
'description': 'md5:563d7856ddab99bee7a5e50f45531757',
|
||||
'release_date': '20230722',
|
||||
'release_timestamp': 1690033200,
|
||||
'series': 'Star Trek: Lower Decks',
|
||||
'series_id': 'star-trek-lower-decks',
|
||||
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
|
||||
'md5': 'f5ad74fbb86e91e0882fc0a333178d1d',
|
||||
'url': 'https://www.startrek.com/en-ca/videos/my-first-contact-senator-cory-booker',
|
||||
'info_dict': {
|
||||
'id': 'watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
|
||||
'id': 'my-first-contact-senator-cory-booker',
|
||||
'ext': 'mp4',
|
||||
'title': 'WATCH: Ethan Peck and Gia Sandhu Beam Down to The Ready Room',
|
||||
'duration': 1986,
|
||||
'timestamp': 1654221600,
|
||||
'upload_date': '20220603',
|
||||
'description': 'md5:b3aa0edacfe119386567362dec8ed51b',
|
||||
'thumbnail': r're:https://www\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14792_rr_thumb_105_yt_16x9_1.jpg(?:\?.+)?',
|
||||
'subtitles': {'en-US': [{
|
||||
'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt',
|
||||
}]},
|
||||
'title': 'My First Contact: Senator Cory Booker',
|
||||
'alt_title': 'md5:fe74a8bdb0afab421c6e159a7680db4d',
|
||||
'categories': ['MY FIRST CONTACT'],
|
||||
'description': 'md5:a3992ab3b3e0395925d71156bbc018ce',
|
||||
'release_date': '20250401',
|
||||
'release_timestamp': 1743512400,
|
||||
'series': 'Star Trek: The Original Series',
|
||||
'series_id': 'star-trek-the-original-series',
|
||||
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
urlbase, video_id = self._match_valid_url(url).group('base', 'id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._search_regex(
|
||||
r'(<\s*div\s+id\s*=\s*"cvp-player-[^<]+<\s*/div\s*>)', webpage, 'player')
|
||||
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
video_data = page_props['video']['data']
|
||||
if youtube_id := video_data.get('youtube_video_id'):
|
||||
return self.url_result(youtube_id, YoutubeIE)
|
||||
|
||||
hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4')
|
||||
|
||||
captions = self._html_search_regex(
|
||||
r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False)
|
||||
if captions:
|
||||
subtitles.setdefault('en-US', [])[:0] = [{'url': urljoin(urlbase, captions)}]
|
||||
|
||||
# NB: Most of the data in the json_ld is undesirable
|
||||
json_ld = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
series_id = traverse_obj(video_data, (
|
||||
'series_and_movies', ..., 'series_or_movie', 'slug', {str}, any))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_regex(
|
||||
r'\bdata-title\s*=\s*"([^"]+)"', player, 'title', json_ld.get('title')),
|
||||
'description': self._html_search_regex(
|
||||
r'(?s)<\s*div\s+class\s*=\s*"header-body"\s*>(.+?)<\s*/div\s*>',
|
||||
webpage, 'description', fatal=False),
|
||||
'duration': int_or_none(self._html_search_regex(
|
||||
r'\bdata-duration\s*=\s*"(\d+)"', player, 'duration', fatal=False)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': urljoin(urlbase, self._html_search_regex(
|
||||
r'\bdata-poster-url\s*=\s*"([^"]+)"', player, 'thumbnail', fatal=False)),
|
||||
'timestamp': json_ld.get('timestamp'),
|
||||
'series': traverse_obj(page_props, (
|
||||
'queried', 'header', 'tab3', 'slices', ..., 'items',
|
||||
lambda _, v: v['link']['slug'] == series_id, 'link_copy', {str}, any)),
|
||||
'series_id': series_id,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('title', ..., 'text', {clean_html}, any),
|
||||
'alt_title': ('subhead', ..., 'text', {clean_html}, any),
|
||||
'categories': ('category', 'data', 'category_name', {str.upper}, filter, all),
|
||||
'description': ('slices', ..., 'primary', 'content', ..., 'text', {clean_html}, any),
|
||||
'release_timestamp': ('published', {parse_iso8601}),
|
||||
'subtitles': ({'url': 'legacy_subtitle_file'}, all, {subs_list_to_dict(lang='en')}),
|
||||
'thumbnail': ('poster_frame', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'url': ('legacy_video_url', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -0,0 +1,43 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class TheHighWireIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thehighwire\.com/ark-videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thehighwire.com/ark-videos/the-deposition-of-stanley-plotkin/',
|
||||
'info_dict': {
|
||||
'id': 'the-deposition-of-stanley-plotkin',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE DEPOSITION OF STANLEY PLOTKIN',
|
||||
'description': 'md5:6d0be4f1181daaa10430fd8b945a5e54',
|
||||
'thumbnail': r're:https?://static\.arkengine\.com/video/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = traverse_obj(webpage, (
|
||||
{find_element(cls='ark-video-embed', html=True)},
|
||||
{extract_attributes}, 'src', {url_or_none}, {require('embed URL')}))
|
||||
embed_page = self._download_webpage(embed_url, display_id)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
**traverse_obj(webpage, {
|
||||
'title': ({find_element(cls='section-header')}, {clean_html}),
|
||||
'description': ({find_element(cls='episode-description__copy')}, {clean_html}),
|
||||
}),
|
||||
**self._parse_html5_media_entries(embed_url, embed_page, display_id, m3u8_id='hls')[0],
|
||||
}
|
@ -1,98 +1,53 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils import clean_html
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class UMGDeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'umg:de'
|
||||
IE_DESC = 'Universal Music Deutschland'
|
||||
_VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/?#]+/videos/(?P<slug>[^/?#]+-(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
|
||||
'md5': 'ebd90f48c80dcc82f77251eb1902634f',
|
||||
'info_dict': {
|
||||
'id': '457803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jedes Wort ist Gold wert',
|
||||
'artists': ['Sido'],
|
||||
'description': 'md5:df2dbffcff1a74e0a7c9bef4b497aeec',
|
||||
'display_id': 'jedes-wort-ist-gold-wert-457803',
|
||||
'duration': 210.0,
|
||||
'thumbnail': r're:https?://images\.universal-music\.de/img/assets/.+\.jpg',
|
||||
'timestamp': 1513591800,
|
||||
'upload_date': '20171218',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.universal-music.de/alexander-eder/videos/der-doktor-hat-gesagt-609533',
|
||||
'info_dict': {
|
||||
'id': '609533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Doktor hat gesagt',
|
||||
'artists': ['Alexander Eder'],
|
||||
'display_id': 'der-doktor-hat-gesagt-609533',
|
||||
'duration': 146.0,
|
||||
'thumbnail': r're:https?://images\.universal-music\.de/img/assets/.+\.jpg',
|
||||
'timestamp': 1742982100,
|
||||
'upload_date': '20250326',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://graphql.universal-music.de/',
|
||||
video_id, query={
|
||||
'query': '''{
|
||||
universalMusic(channel:16) {
|
||||
video(id:%s) {
|
||||
headline
|
||||
formats {
|
||||
formatId
|
||||
url
|
||||
type
|
||||
width
|
||||
height
|
||||
mimeType
|
||||
fileSize
|
||||
}
|
||||
duration
|
||||
createdDate
|
||||
}
|
||||
}
|
||||
}''' % video_id})['data']['universalMusic']['video'] # noqa: UP031
|
||||
|
||||
title = video_data['headline']
|
||||
hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
|
||||
def add_m3u8_format(format_id):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url_template % format_id, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
for f in video_data.get('formats', []):
|
||||
f_url = f.get('url')
|
||||
mime_type = f.get('mimeType')
|
||||
if not f_url or mime_type == 'application/mxf':
|
||||
continue
|
||||
fmt = {
|
||||
'url': f_url,
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'filesize': parse_filesize(f.get('fileSize')),
|
||||
}
|
||||
f_type = f.get('type')
|
||||
if f_type == 'Image':
|
||||
thumbnails.append(fmt)
|
||||
elif f_type == 'Video':
|
||||
format_id = f.get('formatId')
|
||||
if format_id:
|
||||
fmt['format_id'] = format_id
|
||||
if mime_type == 'video/mp4':
|
||||
add_m3u8_format(format_id)
|
||||
urlh = self._request_webpage(f_url, video_id, fatal=False)
|
||||
if urlh:
|
||||
first_byte = urlh.read(1)
|
||||
if first_byte not in (b'F', b'\x00'):
|
||||
continue
|
||||
formats.append(fmt)
|
||||
if not formats:
|
||||
for format_id in (867, 836, 940):
|
||||
add_m3u8_format(format_id)
|
||||
display_id, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return {
|
||||
**self._search_json_ld(webpage, display_id),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('createdDate'), ' '),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'artists': traverse_obj(self._html_search_meta('umg-artist-screenname', webpage), (filter, all)),
|
||||
# The JSON LD description duplicates the title
|
||||
'description': traverse_obj(webpage, ({find_element(cls='_3Y0Lj')}, {clean_html})),
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
'https://hls.universal-music.de/get', display_id, 'mp4', query={'id': video_id}),
|
||||
}
|
||||
|
@ -0,0 +1,32 @@
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
|
||||
|
||||
class UnitedNationsWebTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://webtv\.un\.org/(?:ar|zh|en|fr|ru|es)/asset/\w+/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://webtv.un.org/en/asset/k1o/k1o7stmi6p',
|
||||
'md5': 'b2f8b3030063298ae841b4b7ddc01477',
|
||||
'info_dict': {
|
||||
'id': '1_o7stmi6p',
|
||||
'ext': 'mp4',
|
||||
'title': 'António Guterres (Secretary-General) on Israel and Iran - Security Council, 9939th meeting',
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/2503451/sp/250345100/thumbnail/entry_id/1_o7stmi6p/version/100021',
|
||||
'uploader_id': 'evgeniia.alisova@un.org',
|
||||
'upload_date': '20250620',
|
||||
'timestamp': 1750430976,
|
||||
'duration': 234,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._html_search_regex(
|
||||
r'partnerId:\s*(\d+)', webpage, 'partner_id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'const\s+kentryID\s*=\s*["\'](\w+)["\']', webpage, 'kentry_id')
|
||||
|
||||
return self.url_result(f'kaltura:{partner_id}:{entry_id}', KalturaIE)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue