@ -1,4 +1,6 @@
# coding: utf-8
# coding: utf-8
from __future__ import unicode_literals
import re
import re
from . common import InfoExtractor
from . common import InfoExtractor
@ -6,16 +8,17 @@ from .common import InfoExtractor
class RadioFranceIE ( InfoExtractor ) :
class RadioFranceIE ( InfoExtractor ) :
_VALID_URL = r ' ^https?://maison \ .radiofrance \ .fr/radiovisions/(?P<id>[^?#]+) '
_VALID_URL = r ' ^https?://maison \ .radiofrance \ .fr/radiovisions/(?P<id>[^?#]+) '
IE_NAME = u ' radiofrance '
IE_NAME = ' radiofrance '
_TEST = {
_TEST = {
u ' url ' : u ' http://maison.radiofrance.fr/radiovisions/one-one ' ,
' url ' : ' http://maison.radiofrance.fr/radiovisions/one-one ' ,
u ' file ' : u ' one-one.ogg ' ,
' md5 ' : ' bdbb28ace95ed0e04faab32ba3160daf ' ,
u ' md5 ' : u ' bdbb28ace95ed0e04faab32ba3160daf ' ,
' info_dict ' : {
u ' info_dict ' : {
' id ' : ' one-one ' ,
u " title " : u " One to one " ,
' ext ' : ' ogg ' ,
u " description " : u " Plutôt que d ' imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j ' ai l ' intime conviction que la radio continuera d ' être un grand média de proximité pour les auditeurs. " ,
" title " : " One to one " ,
u " uploader " : u " Thomas Hercouët " ,
" description " : " Plutôt que d ' imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j ' ai l ' intime conviction que la radio continuera d ' être un grand média de proximité pour les auditeurs. " ,
" uploader " : " Thomas Hercouët " ,
} ,
} ,
}
}
@ -24,27 +27,28 @@ class RadioFranceIE(InfoExtractor):
video_id = m . group ( ' id ' )
video_id = m . group ( ' id ' )
webpage = self . _download_webpage ( url , video_id )
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex ( r ' <h1>(.*?)</h1> ' , webpage , u ' title ' )
title = self . _html_search_regex ( r ' <h1>(.*?)</h1> ' , webpage , ' title ' )
description = self . _html_search_regex (
description = self . _html_search_regex (
r ' <div class= " bloc_page_wrapper " ><div class= " text " >(.*?)</div> ' ,
r ' <div class= " bloc_page_wrapper " ><div class= " text " >(.*?)</div> ' ,
webpage , u ' description ' , fatal = False )
webpage , ' description ' , fatal = False )
uploader = self . _html_search_regex (
uploader = self . _html_search_regex (
r ' <div class= " credit " > © (.*?)</div> ' ,
r ' <div class= " credit " > © (.*?)</div> ' ,
webpage , u ' uploader ' , fatal = False )
webpage , ' uploader ' , fatal = False )
formats_str = self . _html_search_regex (
formats_str = self . _html_search_regex (
r ' class= " jp-jplayer[^ " ]* " data-source= " ([^ " ]+) " > ' ,
r ' class= " jp-jplayer[^ " ]* " data-source= " ([^ " ]+) " > ' ,
webpage , u ' audio URLs ' )
webpage , ' audio URLs ' )
formats = [
formats = [
{
{
' format_id ' : fm [ 0 ] ,
' format_id ' : fm [ 0 ] ,
' url ' : fm [ 1 ] ,
' url ' : fm [ 1 ] ,
' vcodec ' : ' none ' ,
' vcodec ' : ' none ' ,
' preference ' : i ,
}
}
for fm in
for i, fm in
re . findall ( r " ([a-z0-9]+) \ s*: \ s* ' ([^ ' ]+) ' " , formats_str )
enumerate ( re . findall ( r " ([a-z0-9]+) \ s*: \ s* ' ([^ ' ]+) ' " , formats_str ) )
]
]
# No sorting, we don't know any more about these formats
self . _sort_formats ( formats )
return {
return {
' id ' : video_id ,
' id ' : video_id ,