@ -1,17 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . utils import (
js_to_json ,
remove_end ,
)
class SBSIE ( InfoExtractor ) :
IE_DESC = ' sbs.com.au '
_VALID_URL = r ' https?://(?:www \ .)?sbs \ .com \ .au/ ondemand/video/(?:single/)?(?P<id>[0-9]+)'
_VALID_URL = r ' https?://(?:www \ .)?sbs \ .com \ .au/ (?: ondemand|news) /video/(?:single/)?(?P<id>[0-9]+)'
_TESTS = [ {
# Original URL is handled by the generic IE which finds the iframe:
@ -21,39 +16,36 @@ class SBSIE(InfoExtractor):
' info_dict ' : {
' id ' : ' 320403011771 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Dingo Conservation ' ,
' description ' : ' Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction ' ,
' title ' : ' Dingo Conservation (The Feed) ' ,
' description ' : ' md5:f250a9856fca50d22dec0b5b8015f8a5 ' ,
' thumbnail ' : ' re:http://.* \ .jpg ' ,
' duration ' : 308 ,
} ,
' add_ies ' : [ ' generic ' ] ,
} , {
' url ' : ' http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9 ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
player = self . _search_regex (
r ' (?s)playerParams \ .releaseUrls \ s*= \ s*( \ { .*? \ n \ }); \ n ' ,
webpage , ' player ' )
player = re . sub ( r " ' \ s* \ + \ s*[ \ da-zA-Z_]+ \ s* \ + \ s* ' " , ' ' , player )
release_urls = self . _parse_json ( js_to_json ( player ) , video_id )
webpage = self . _download_webpage (
' http://www.sbs.com.au/ondemand/video/single/ %s ?context=web ' % video_id , video_id )
theplatform_url = release_urls . get ( ' progressive ' ) or release_urls [ ' standard ' ]
player_params = self . _parse_json (
self . _search_regex (
r ' (?s)var \ s+playerParams \ s*= \ s*( { .+?}); ' , webpage , ' playerParams ' ) ,
video_id )
title = remove_end ( self . _og_search_title ( webpage ) , ' (The Feed) ' )
description = self . _html_search_meta ( ' description ' , webpage )
thumbnail = self . _og_search_thumbnail ( webpage )
urls = player_params [ ' releaseUrls ' ]
theplatform_url = ( urls . get ( ' progressive ' ) or urls . get ( ' standard ' ) or
urls . get ( ' html ' ) or player_params [ ' relatedItemsURL ' ] )
return {
' _type ' : ' url_transparent ' ,
' id ' : video_id ,
' url ' : theplatform_url ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
}