diff --git a/README.md b/README.md index ad98af7c4..1029d1a6d 100644 --- a/README.md +++ b/README.md @@ -1815,6 +1815,7 @@ The following extractors use this feature: * `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020` * `aid`: Default app ID to use with mobile API calls, e.g. `1180` * `app_info`: Enable mobile API extraction with one or more app info strings in the format of `/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001` +* `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string #### rokfinchannel * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 2fb41ba79..6d0d7eea3 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,8 +1,8 @@ +import functools import itertools import json import random import re -import string import time import uuid @@ -15,6 +15,7 @@ from ..utils import ( UnsupportedError, UserNotLive, determine_ext, + filter_dict, format_field, int_or_none, join_nonempty, @@ -49,11 +50,21 @@ class TikTokBaseIE(InfoExtractor): _APP_INFO = None _APP_USER_AGENT = None - @property + @functools.cached_property def _KNOWN_APP_INFO(self): - return self._configuration_arg('app_info', ie_key=TikTokIE) + # If we have a genuine device ID, we may not need any IID + default = [''] if self._KNOWN_DEVICE_ID else [] + return self._configuration_arg('app_info', default, ie_key=TikTokIE) - @property + @functools.cached_property + def _KNOWN_DEVICE_ID(self): + return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0] + + @functools.cached_property + def _DEVICE_ID(self): + return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000)) + + @functools.cached_property def _API_HOSTNAME(self): return self._configuration_arg( 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0] @@ -115,7 +126,7 @@ class TikTokBaseIE(InfoExtractor): }, query=query) def _build_api_query(self, query): - return { + return filter_dict({ **query, 'device_platform': 'android', 'os': 'android', @@ -156,10 +167,10 @@ class TikTokBaseIE(InfoExtractor): 'build_number': self._APP_INFO['app_version'], 'region': 'US', 'ts': int(time.time()), - 'iid': self._APP_INFO['iid'], - 'device_id': random.randint(7250000000000000000, 7351147085025500000), + 'iid': self._APP_INFO.get('iid'), + 'device_id': self._DEVICE_ID, 'openudid': ''.join(random.choices('0123456789abcdef', k=16)), - } + }) def _call_api(self, ep, query, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): @@ -848,7 +859,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } for page in itertools.count(1): @@ -896,7 +907,7 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul 'cursor': 0, 'count': 20, 'type': 5, - 'device_id': ''.join(random.choices(string.digits, k=19)) + 'device_id': self._DEVICE_ID, } for page in itertools.count(1):