diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2fe15f6d2..96040b58c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2027,6 +2027,7 @@ from .dlive import ( ) from .drooble import DroobleIE from .umg import UMGDeIE +from .underline import UnderlineIE from .unistra import UnistraIE from .unity import UnityIE from .unscripted import UnscriptedNewsVideoIE diff --git a/yt_dlp/extractor/underline.py b/yt_dlp/extractor/underline.py new file mode 100644 index 000000000..6d3d1588b --- /dev/null +++ b/yt_dlp/extractor/underline.py @@ -0,0 +1,103 @@ +from .common import InfoExtractor + +DEBUG_P = False +if DEBUG_P: + import json + from icecream import ic + from IPython import embed + + +def gen_dict_extract(var, key): + if hasattr(var, "items"): + for k, v in var.items(): + if k == key: + yield v + if isinstance(v, dict): + for result in gen_dict_extract(v, key): + yield result + elif isinstance(v, list): + for d in v: + for result in gen_dict_extract(d, key): + yield result + + +class UnderlineIE(InfoExtractor): + _VALID_URL = r"https?://(?:www\.)?underline\.io/events/(?P[^?]+).*" + + _TESTS = [ + { + "params": { + "skip_download": True, + }, + "url": "https://underline.io/events/342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter?tab=video", + "md5": "md5:eaa894161adaef6efd6008681e1cd2c5", + # md5 sum of the first 10241 bytes of the video file (use --test) + "info_dict": { + "id": "342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter", + "ext": "mp4", + "title": "MBTI Personality Prediction Approach on Persian Twitter", + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type, e.g. int or float + }, + } + ] + + def _real_extract(self, url): + # cookies = self._get_cookies(url) + # if DEBUG_P: + # ic(cookies) + + # if not cookies: + # self.raise_login_required('Cookies are needed to download from this website', method='cookies') + + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + webpage_info = self._search_json( + r'', + webpage, + "idk_what_this_arg_does", + video_id, + end_pattern=r"", + ) + + if DEBUG_P: + # ic(webpage_info) + with open("./tmp.json", "w") as f: + json.dump(webpage_info, f) + + # ic(webpage_info["props"]["pageProps"]["snapshot"]["models"][10]["title"]) + # embed() + + title = list(gen_dict_extract(webpage_info, "title")) + if DEBUG_P: + ic(title) + + if len(title) == 0: + title = None + else: + title = title[0] + + playlist_urls = list(gen_dict_extract(webpage_info, "playlist")) + if DEBUG_P: + ic(playlist_urls) + + if len(playlist_urls) == 0: + url = None + else: + url = playlist_urls[0] + + formats = [] + + m3u8_url = url + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + + return { + "id": video_id, + "title": title, + "formats": formats, + }