@ -1,3 +1,4 @@
import base64
import calendar
import copy
import datetime
@ -2199,7 +2200,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' description ' : ' md5:2ef1d002cad520f65825346e2084e49d ' ,
} ,
' params ' : { ' skip_download ' : True }
} ,
} , {
# Story. Requires specific player params to work.
# Note: stories get removed after some period of time
' url ' : ' https://www.youtube.com/watch?v=yN3x1t3sieA ' ,
' info_dict ' : {
' id ' : ' yN3x1t3sieA ' ,
' ext ' : ' mp4 ' ,
' uploader ' : ' Linus Tech Tips ' ,
' duration ' : 13 ,
' channel ' : ' Linus Tech Tips ' ,
' playable_in_embed ' : True ,
' tags ' : [ ] ,
' age_limit ' : 0 ,
' uploader_url ' : ' http://www.youtube.com/user/LinusTechTips ' ,
' upload_date ' : ' 20220402 ' ,
' thumbnail ' : ' https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp ' ,
' title ' : ' Story ' ,
' live_status ' : ' not_live ' ,
' uploader_id ' : ' LinusTechTips ' ,
' view_count ' : int ,
' description ' : ' ' ,
' channel_id ' : ' UCXuqSBlHAE6Xw-yeJA0Tunw ' ,
' categories ' : [ ' Science & Technology ' ] ,
' channel_url ' : ' https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw ' ,
' availability ' : ' unlisted ' ,
}
}
]
@classmethod
@ -2831,12 +2858,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
lambda p : int_or_none ( p , default = sys . maxsize ) , self . _configuration_arg ( ' max_comments ' , ) + [ ' ' ] * 4 )
continuation = self . _extract_continuation ( root_continuation_data )
message = self . _get_text ( root_continuation_data , ( ' contents ' , . . . , ' messageRenderer ' , ' text ' ) , max_runs = 1 )
if message and not parent :
self . report_warning ( message , video_id = video_id )
response = None
is_forced_continuation = False
is_first_continuation = parent is None
if is_first_continuation and not continuation :
# Sometimes you can get comments by generating the continuation yourself,
# even if YouTube initially reports them being disabled - e.g. stories comments.
# Note: if the comment section is actually disabled, YouTube may return a response with
# required check_get_keys missing. So we will disable that check initially in this case.
continuation = self . _build_api_continuation_query ( self . _generate_comment_continuation ( video_id ) )
is_forced_continuation = True
for page_num in itertools . count ( 0 ) :
if not continuation :
@ -2857,8 +2889,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
response = self . _extract_response (
item_id = None , query = continuation ,
ep = ' next ' , ytcfg = ytcfg , headers = headers , note = note_prefix ,
check_get_keys = ' onResponseReceivedEndpoints ' )
check_get_keys = ' onResponseReceivedEndpoints ' if not is_forced_continuation else None )
is_forced_continuation = False
continuation_contents = traverse_obj (
response , ' onResponseReceivedEndpoints ' , expected_type = list , default = [ ] )
@ -2883,6 +2915,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if continuation :
break
message = self . _get_text ( root_continuation_data , ( ' contents ' , . . . , ' messageRenderer ' , ' text ' ) , max_runs = 1 )
if message and not parent and tracker [ ' running_total ' ] == 0 :
self . report_warning ( f ' Youtube said: { message } ' , video_id = video_id , only_once = True )
@staticmethod
def _generate_comment_continuation ( video_id ) :
"""
Generates initial comment section continuation token from given video id
"""
token = f ' \x12 \r \x12 \x0b { video_id } \x18 \x06 2 \' " \x11 " \x0b { video_id } 0 \x00 x \x02 0 \x00 B \x10 comments-section '
return base64 . b64encode ( token . encode ( ) ) . decode ( )
def _get_comments ( self , ytcfg , video_id , contents , webpage ) :
""" Entry for comment extraction """
def _real_comment_extract ( contents ) :
@ -2936,7 +2980,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
headers = self . generate_api_headers (
ytcfg = player_ytcfg , account_syncid = syncid , session_index = session_index , default_client = client )
yt_query = { ' videoId ' : video_id }
yt_query = {
' videoId ' : video_id ,
' params ' : ' 8AEB ' # enable stories
}
yt_query . update ( self . _generate_player_context ( sts ) )
return self . _extract_response (
item_id = video_id , ep = ' player ' , query = yt_query ,
@ -3251,7 +3298,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
webpage = None
if ' webpage ' not in self . _configuration_arg ( ' player_skip ' ) :
webpage = self . _download_webpage (
webpage_url + ' &bpctr=9999999999&has_verified=1 ' , video_id , fatal = False )
webpage_url + ' &bpctr=9999999999&has_verified=1 &pp=8AEB ' , video_id , fatal = False )
master_ytcfg = self . extract_ytcfg ( video_id , webpage ) or self . _get_default_ytcfg ( )
@ -3696,7 +3743,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
unified_strdate ( get_first ( microformats , ' uploadDate ' ) )
or unified_strdate ( search_meta ( ' uploadDate ' ) ) )
if not upload_date or ( not info . get ( ' is_live ' ) and not info . get ( ' was_live ' ) and info . get ( ' live_status ' ) != ' is_upcoming ' ) :
upload_date = strftime_or_none ( self . _extract_time_text ( vpir , ' dateText ' ) [ 0 ] , ' % Y % m %d ' )
upload_date = strftime_or_none ( self . _extract_time_text ( vpir , ' dateText ' ) [ 0 ] , ' % Y % m %d ' ) or upload_date
info [ ' upload_date ' ] = upload_date
for to , frm in fallbacks . items ( ) :
@ -4211,7 +4258,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
self . _extract_visitor_data ( data , ytcfg ) ) ,
* * metadata )
def _extract_ mix _playlist( self , playlist , playlist_id , data , ytcfg ) :
def _extract_ inline _playlist( self , playlist , playlist_id , data , ytcfg ) :
first_id = last_id = response = None
for page_num in itertools . count ( 1 ) :
videos = list ( self . _playlist_entries ( playlist ) )
@ -4221,9 +4268,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if start > = len ( videos ) :
return
for video in videos [ start : ] :
if video [ ' id ' ] == first_id :
self . to_screen ( ' First video %s found again; Assuming end of Mix ' % first_id )
return
yield video
first_id = first_id or videos [ 0 ] [ ' id ' ]
last_id = videos [ - 1 ] [ ' id ' ]
@ -4255,13 +4299,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
playlist_url = urljoin ( url , try_get (
playlist , lambda x : x [ ' endpoint ' ] [ ' commandMetadata ' ] [ ' webCommandMetadata ' ] [ ' url ' ] ,
compat_str ) )
if playlist_url and playlist_url != url :
# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
is_known_unviewable = re . fullmatch ( r ' MLCT|RLTD[ \ w-] {22} ' , playlist_id )
if playlist_url and playlist_url != url and not is_known_unviewable :
return self . url_result (
playlist_url , ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id ,
video_title = title )
return self . playlist_result (
self . _extract_ mix _playlist( playlist , playlist_id , data , ytcfg ) ,
self . _extract_ inline _playlist( playlist , playlist_id , data , ytcfg ) ,
playlist_id = playlist_id , playlist_title = title )
def _extract_availability ( self , data ) :
@ -5798,6 +5847,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
} ]
class YoutubeStoriesIE ( InfoExtractor ) :
IE_DESC = ' YouTube channel stories; " ytstories: " prefix '
IE_NAME = ' youtube:stories '
_VALID_URL = r ' ytstories:UC(?P<id>[A-Za-z0-9_-] {21} [AQgw])$ '
_TESTS = [ {
' url ' : ' ytstories:UCwFCb4jeqaKWnciAYM-ZVHg ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
playlist_id = f ' RLTD { self . _match_id ( url ) } '
return self . url_result (
f ' https://www.youtube.com/playlist?list= { playlist_id } &playnext=1 ' ,
ie = YoutubeTabIE , video_id = playlist_id )
class YoutubeTruncatedURLIE ( InfoExtractor ) :
IE_NAME = ' youtube:truncated_url '
IE_DESC = False # Do not list