Fix extractor

11 months ago · 308e713d9e
parent d1c4d88b2d
commit 308e713d9e
1 changed files with 89 additions and 54 deletions
--- a/yt_dlp/extractor/fptplay.py
+++ b/yt_dlp/extractor/fptplay.py
@ -2,78 +2,114 @@ import hashlib
 import time
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
-    clean_html,
+    ExtractorError,
-    join_nonempty,
+    int_or_none,
    strip_or_none,
 )
 from .common import InfoExtractor
 class FptplayIE(InfoExtractor):
-    _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)'
+    _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>[a-f0-9]+)'
    _GEO_COUNTRIES = ['VN']
    IE_NAME = 'fptplay'
    IE_DESC = 'fptplay.vn'
    _TESTS = [{
-        'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
+        'url': 'https://fptplay.vn/xem-video/jumanji-tro-choi-ky-ao-615c9b232089bd0509bfbf42',
        'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
        'info_dict': {
            'id': '621a123016f369ebbde55945',
            'ext': 'mp4',
            'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A',
            'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
        },
    }, {
        'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
        'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
        'info_dict': {
-            'id': '61f3aa8a6b3b1d2e73c60eb5',
+            'id': '615c9b232089bd0509bfbf42',
            'ext': 'mp4',
-            'title': 'Má Tôi Là Đại Gia - Tập 3',
+            'title': 'Jumanji: Welcome To The Jungle',
-            'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
+            'description': 'Phim theo chân một nhóm bốn học sinh phổ thông bị phạt dọn dẹp tầng hầm trường học. Tại đó, họ phát hiện ra trò chơi cổ mang tên Jumanji.',
            'thumbnail': 'https://images.fptplay.net/media/OTT/VOD/2023/03/13/jumanji-tro-choi-ky-ao-fpt-play-1678685776013_Background_1920x1080_over.jpg',
            'release_year': '2017',
        },
    }, {
-        'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
+        'url': 'https://fptplay.vn/xem-video/sang-nhu-trang-trong-may-6156d8292089bd2184e26238',
        'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
        'info_dict': {
-            'id': '6222d9684ec7230fa6e627a2',
+            'id': '346034',
            'ext': 'mp4',
-            'title': 'Lạp Tội Đồ Giám - Tập 2B',
+            'title': 'Bright As The Moon',
-            'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
+            'description': '',
            'release_year': '2021',
            'season_number': '1',
            'episode': 'Tập 1',
            'episode_number': '1',
            'duration': '2665'
        },
-    }, {
+    }, ]
-        'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
+
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        video_id, slug_episode = self._match_valid_url(url).group('id', 'episode')
+        contentId = self._match_id(url)
-        webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
+
-        title = self._search_regex(
+        # Need valid cookie with Bearer token, else it won't work
-            r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
+        token = self._get_cookies(url).get("token")
-        real_episode = slug_episode if not title else self._search_regex(
+    
-            r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
+        res = self._download_json(self.get_api_with_st_token(contentId), contentId, expected_status=406)
        title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
        info = self._download_json(
            self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
        return {
            'id': video_id,
            'title': join_nonempty(title, real_episode, delim=' - '),
            'description': (
                clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description'))
                or self._html_search_meta(('og:description', 'twitter:description'), webpage)),
            'formats': formats,
            'subtitles': subtitles,
        }
    def get_api_with_st_token(self, video_id, episode):
        path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
        timestamp = int(time.time()) + 10800
-        t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
+        if res["result"]["episode_type"] == 0:
            # movie or single video
            manifest = self._download_json(self.get_api_with_st_token(contentId, 0), contentId, headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
            if manifest.get("msg") != "success":
                raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
            formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], contentId)
            return {
                'id': contentId,
                'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
                'description': res["result"]["description"],
                'thumbnail': res["result"]["thumb"],
                'release_year': int_or_none(res["result"]["movie_release_date"]),
                'duration': int_or_none(res["result"]["duration"]),
                'formats': formats,
                'subtitles': subtitles
            }
        else:
            # playlist
            entries = []
            for episode in res["result"]["episodes"]:
                if episode["is_trailer"] == 1:
                    continue
                manifest = self._download_json(self.get_api_with_st_token(contentId, episode["_id"]), episode["_id"], headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
                if manifest.get("msg") != "success":
                    raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
                formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], episode["_id"])
                entry = {
                    'id': episode["ref_episode_id"],
                    'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
                    'description': episode["description"],
                    'thumbnail': episode["thumb"],
                    'release_year': int_or_none(res["result"]["movie_release_date"]),
                    'season_number': 1,  # Assuming season 1 for simplicity
                    'episode': episode["title"],
                    'episode_number': episode["_id"] + 1,
                    'duration': int_or_none(episode["duration"]),
                    'formats': formats,
                    'subtitles': subtitles
                }
                entries.append(entry)
            return {
                '_type': 'playlist',
                'id': contentId,
                'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
                'entries': entries
            }
    def get_api_with_st_token(self, video_id, episode=None):
        if episode is not None:
            path = f'/api/v7.1_w/stream/vod/{video_id}/{0 if episode is None else episode}/adaptive_bitrate'
        else:
            path = f'/api/v7.1_w/vod/detail/{video_id}'
        timestamp = int(time.time()) + 10800
        t = hashlib.md5(f'6ea6d2a4e2d3a4bd5e275401aa086d{timestamp}{path}'.encode()).hexdigest().upper()
        r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
        n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
@ -89,7 +125,7 @@ class FptplayIE(InfoExtractor):
                    i[n] = e[c]
                n += 1
                c += 1
-                if 3 == n:
+                if n == 3:
                    a[0] = (252 & i[0]) >> 2
                    a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
                    a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
@ -100,14 +136,13 @@ class FptplayIE(InfoExtractor):
            if n:
                for o in range(n, 3):
                    i[o] = 0
                for o in range(n + 1):
                    a[0] = (252 & i[0]) >> 2
                    a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
                    a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
                    a[3] = (63 & i[2])
                    t += r[a[o]]
-                n += 1
+                    n += 1
                while n < 3:
                    t += ''
                    n += 1