@ -5,6 +5,7 @@ import base64
import hashlib
import hashlib
import json
import json
import random
import random
import re
import time
import time
from . common import InfoExtractor
from . common import InfoExtractor
@ -16,6 +17,7 @@ from ..utils import (
intlist_to_bytes ,
intlist_to_bytes ,
int_or_none ,
int_or_none ,
strip_jsonp ,
strip_jsonp ,
unescapeHTML ,
)
)
@ -26,6 +28,8 @@ def md5_text(s):
class AnvatoIE ( InfoExtractor ) :
class AnvatoIE ( InfoExtractor ) :
_VALID_URL = r ' anvato:(?P<access_key_or_mcp>[^:]+):(?P<id> \ d+) '
# Copied from anvplayer.min.js
# Copied from anvplayer.min.js
_ANVACK_TABLE = {
_ANVACK_TABLE = {
' nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6 ' : ' NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ ' ,
' nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6 ' : ' NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ ' ,
@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor):
' nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure ' : ' NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ '
' nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure ' : ' NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ '
}
}
_MCP_TO_ACCESS_KEY_TABLE = {
' qa ' : ' anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922 ' ,
' lin ' : ' anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749 ' ,
' univison ' : ' anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa ' ,
' uni ' : ' anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa ' ,
' dev ' : ' anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a ' ,
' sps ' : ' anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336 ' ,
' spsstg ' : ' anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336 ' ,
' anv ' : ' anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3 ' ,
' gray ' : ' anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900 ' ,
' hearst ' : ' anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99 ' ,
' cbs ' : ' anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe ' ,
' telemundo ' : ' anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582 '
}
_ANVP_RE = r ' <script[^>]+ \ bdata-anvp \ s*= \ s*([ " \' ])(?P<anvp>(?:(?! \ 1).)+) \ 1 '
_AUTH_KEY = b ' \x31 \xc2 \x42 \x84 \x9e \x73 \xa0 \xce '
_AUTH_KEY = b ' \x31 \xc2 \x42 \x84 \x9e \x73 \xa0 \xce '
def __init__ ( self , * args , * * kwargs ) :
def __init__ ( self , * args , * * kwargs ) :
@ -217,9 +237,42 @@ class AnvatoIE(InfoExtractor):
' subtitles ' : subtitles ,
' subtitles ' : subtitles ,
}
}
@staticmethod
def _extract_urls ( ie , webpage , video_id ) :
entries = [ ]
for mobj in re . finditer ( AnvatoIE . _ANVP_RE , webpage ) :
anvplayer_data = ie . _parse_json (
mobj . group ( ' anvp ' ) , video_id , transform_source = unescapeHTML ,
fatal = False )
if not anvplayer_data :
continue
video = anvplayer_data . get ( ' video ' )
if not isinstance ( video , compat_str ) or not video . isdigit ( ) :
continue
access_key = anvplayer_data . get ( ' accessKey ' )
if not access_key :
mcp = anvplayer_data . get ( ' mcp ' )
if mcp :
access_key = AnvatoIE . _MCP_TO_ACCESS_KEY_TABLE . get (
mcp . lower ( ) )
if not access_key :
continue
entries . append ( ie . url_result (
' anvato: %s : %s ' % ( access_key , video ) , ie = AnvatoIE . ie_key ( ) ,
video_id = video ) )
return entries
def _extract_anvato_videos ( self , webpage , video_id ) :
def _extract_anvato_videos ( self , webpage , video_id ) :
anvplayer_data = self . _parse_json ( self . _html_search_regex (
anvplayer_data = self . _parse_json (
r ' <script[^>]+data-anvp= \' ([^ \' ]+) \' ' , webpage ,
self . _html_search_regex (
' Anvato player data ' ) , video_id )
self . _ANVP_RE , webpage , ' Anvato player data ' , group = ' anvp ' ) ,
video_id )
return self . _get_anvato_videos (
return self . _get_anvato_videos (
anvplayer_data [ ' accessKey ' ] , anvplayer_data [ ' video ' ] )
anvplayer_data [ ' accessKey ' ] , anvplayer_data [ ' video ' ] )
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
access_key , video_id = mobj . group ( ' access_key_or_mcp ' , ' id ' )
if access_key not in self . _ANVACK_TABLE :
access_key = self . _MCP_TO_ACCESS_KEY_TABLE [ access_key ]
return self . _get_anvato_videos ( access_key , video_id )