@ -1,23 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from . common import InfoExtractor
from . . compat import compat_str
from . . utils import (
determine_ext ,
float_or_none ,
int_or_none ,
mimetype2ext ,
try_get ,
url encode_postdata ,
url join ,
)
class YandexDiskIE ( InfoExtractor ) :
_VALID_URL = r ' https?://yadi \ .sk/[di]/(?P<id>[^/?#&]+) '
_VALID_URL = r ''' (?x)https?://
( ? P < domain >
yadi \. sk |
disk \. yandex \.
( ? :
az |
by |
co ( ? : m ( ? : \. ( ? : am | ge | tr ) ) ? | \. il ) |
ee |
fr |
k [ gz ] |
l [ tv ] |
md |
t [ jm ] |
u [ az ] |
ru
)
) / ( ? : [ di ] / | public . * ? \bhash = ) ( ? P < id > [ ^ / ? #&]+)'''
_TESTS = [ {
' url ' : ' https://yadi.sk/i/VdOeDou8eZs6Y ' ,
' md5 ' : ' 33955d7ae052f15853dc41f35f17581c ' ,
' md5 ' : ' a4a8d52958c8fddcf9845935070402ae ' ,
' info_dict ' : {
' id ' : ' VdOeDou8eZs6Y ' ,
' ext ' : ' mp4 ' ,
@ -27,92 +47,101 @@ class YandexDiskIE(InfoExtractor):
' uploader_id ' : ' 300043621 ' ,
' view_count ' : int ,
} ,
' expected_warnings ' : [ ' Unable to download JSON metadata ' ] ,
} , {
' url ' : ' https://yadi.sk/d/h3WAXvDS3Li3Ce ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6 % 2B %2F uhNqk38 % 3D ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
status = self . _download_webpage (
' https://disk.yandex.com/auth/status ' , video_id , query = {
' urlOrigin ' : url ,
' source ' : ' public ' ,
' md5 ' : ' false ' ,
} )
sk = self . _search_regex (
r ' ([ " \' ])sk(?:External)? \ 1 \ s*: \ s*([ " \' ])(?P<value>(?:(?! \ 2).)+) \ 2 ' ,
status , ' sk ' , group = ' value ' )
domain , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
webpage = self . _download_webpage ( url , video_id )
models = self . _parse_json (
self . _search_regex (
r ' <script[^>]+id=[ " \' ]models-client[^>]+> \ s*( \ [.+? \ ]) \ s*</script ' ,
webpage , ' video JSON ' ) ,
video_id )
data = next (
model [ ' data ' ] for model in models
if model . get ( ' model ' ) == ' resource ' )
video_hash = data [ ' id ' ]
title = data [ ' name ' ]
models = self . _download_json (
' https://disk.yandex.com/models/ ' , video_id ,
data = urlencode_postdata ( {
' _model.0 ' : ' videoInfo ' ,
' id.0 ' : video_hash ,
' _model.1 ' : ' do-get-resource-url ' ,
' id.1 ' : video_hash ,
' version ' : ' 13.6 ' ,
' sk ' : sk ,
} ) , query = { ' _m ' : ' videoInfo ' } ) [ ' models ' ]
videos = try_get ( models , lambda x : x [ 0 ] [ ' data ' ] [ ' videos ' ] , list ) or [ ]
source_url = try_get (
models , lambda x : x [ 1 ] [ ' data ' ] [ ' file ' ] , compat_str )
store = self . _parse_json ( self . _search_regex (
r ' <script[^>]+id= " store-prefetch " [^>]*> \ s*( { .+?}) \ s*</script> ' ,
webpage , ' store ' ) , video_id )
resource = store [ ' resources ' ] [ store [ ' rootResourceId ' ] ]
title = resource [ ' name ' ]
meta = resource . get ( ' meta ' ) or { }
public_url = meta . get ( ' short_url ' )
if public_url :
video_id = self . _match_id ( public_url )
source_url = ( self . _download_json (
' https://cloud-api.yandex.net/v1/disk/public/resources/download ' ,
video_id , query = { ' public_key ' : url } , fatal = False ) or { } ) . get ( ' href ' )
video_streams = resource . get ( ' videoStreams ' ) or { }
video_hash = resource . get ( ' hash ' ) or url
environment = store . get ( ' environment ' ) or { }
sk = environment . get ( ' sk ' )
yandexuid = environment . get ( ' yandexuid ' )
if sk and yandexuid and not ( source_url and video_streams ) :
self . _set_cookie ( domain , ' yandexuid ' , yandexuid )
def call_api ( action ) :
return ( self . _download_json (
urljoin ( url , ' /public/api/ ' ) + action , video_id , data = json . dumps ( {
' hash ' : video_hash ,
' sk ' : sk ,
} ) . encode ( ) , headers = {
' Content-Type ' : ' text/plain ' ,
} , fatal = False ) or { } ) . get ( ' data ' ) or { }
if not source_url :
# TODO: figure out how to detect if download limit has
# been reached and then avoid unnecessary source format
# extraction requests
source_url = call_api ( ' download-url ' ) . get ( ' url ' )
if not video_streams :
video_streams = call_api ( ' get-video-streams ' )
formats = [ ]
if source_url :
formats . append ( {
' url ' : source_url ,
' format_id ' : ' source ' ,
' ext ' : determine_ext ( title , ' mp4 ' ) ,
' ext ' : determine_ext ( title , meta . get ( ' ext ' ) or mimetype2ext ( meta . get ( ' mime_type ' ) ) or ' mp4 ' ) ,
' quality ' : 1 ,
' filesize ' : int_or_none ( meta . get ( ' size ' ) )
} )
for video in videos :
for video in ( video_streams . get ( ' videos ' ) or [ ] ) :
format_url = video . get ( ' url ' )
if not format_url :
continue
if determine_ext( format_url ) == ' m3u8 ' :
if video. get ( ' dimension ' ) == ' adaptive ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
format_url , video_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
else :
size = video . get ( ' size ' ) or { }
height = int_or_none ( size . get ( ' height ' ) )
format_id = ' hls '
if height :
format_id + = ' - %d p ' % height
formats . append ( {
' ext ' : ' mp4 ' ,
' format_id ' : format_id ,
' height ' : height ,
' protocol ' : ' m3u8_native ' ,
' url ' : format_url ,
' width ' : int_or_none ( size . get ( ' width ' ) ) ,
} )
self . _sort_formats ( formats )
duration = float_or_none ( try_get (
models , lambda x : x [ 0 ] [ ' data ' ] [ ' duration ' ] ) , 1000 )
uploader = try_get (
data , lambda x : x [ ' user ' ] [ ' display_name ' ] , compat_str )
uploader_id = try_get (
data , lambda x : x [ ' user ' ] [ ' uid ' ] , compat_str )
view_count = int_or_none ( try_get (
data , lambda x : x [ ' meta ' ] [ ' views_counter ' ] ) )
uid = resource . get ( ' uid ' )
display_name = try_get ( store , lambda x : x [ ' users ' ] [ uid ] [ ' displayName ' ] )
return {
' id ' : video_id ,
' title ' : title ,
' duration ' : duration,
' uploader ' : uploader ,
' uploader_id ' : u ploader_ id,
' view_count ' : view_count,
' duration ' : float_or_none ( video_streams . get ( ' duration ' ) , 1000 ) ,
' uploader ' : display_name ,
' uploader_id ' : u id,
' view_count ' : int_or_none( meta . get ( ' views _counter' ) ) ,
' formats ' : formats ,
}