@ -404,7 +404,7 @@ class InfoExtractor(object):
video_info [ ' title ' ] = playlist_title
video_info [ ' title ' ] = playlist_title
return video_info
return video_info
def _search_regex ( self , pattern , string , name , default = _NO_DEFAULT , fatal = True , flags = 0 ):
def _search_regex ( self , pattern , string , name , default = _NO_DEFAULT , fatal = True , flags = 0 , group = None ):
"""
"""
Perform a regex search on the given string , using a single or a list of
Perform a regex search on the given string , using a single or a list of
patterns returning the first matching group .
patterns returning the first matching group .
@ -425,8 +425,11 @@ class InfoExtractor(object):
_name = name
_name = name
if mobj :
if mobj :
if group is None :
# return the first matching group
# return the first matching group
return next ( g for g in mobj . groups ( ) if g is not None )
return next ( g for g in mobj . groups ( ) if g is not None )
else :
return mobj . group ( group )
elif default is not _NO_DEFAULT :
elif default is not _NO_DEFAULT :
return default
return default
elif fatal :
elif fatal :
@ -436,11 +439,11 @@ class InfoExtractor(object):
' please report this issue on http://yt-dl.org/bug ' % _name )
' please report this issue on http://yt-dl.org/bug ' % _name )
return None
return None
def _html_search_regex ( self , pattern , string , name , default = _NO_DEFAULT , fatal = True , flags = 0 ):
def _html_search_regex ( self , pattern , string , name , default = _NO_DEFAULT , fatal = True , flags = 0 , group = None ):
"""
"""
Like _search_regex , but strips HTML tags and unescapes entities .
Like _search_regex , but strips HTML tags and unescapes entities .
"""
"""
res = self . _search_regex ( pattern , string , name , default , fatal , flags )
res = self . _search_regex ( pattern , string , name , default , fatal , flags , group )
if res :
if res :
return clean_html ( res ) . strip ( )
return clean_html ( res ) . strip ( )
else :
else :
@ -534,9 +537,9 @@ class InfoExtractor(object):
display_name = name
display_name = name
return self . _html_search_regex (
return self . _html_search_regex (
r ''' (?ix)<meta
r ''' (?ix)<meta
( ? = [ ^ > ] + ( ? : itemprop | name | property ) = [" \' ]? %s [ " \' ]? )
( ? = [ ^ > ] + ( ? : itemprop | name | property ) = ([ " \' ]?) %s \1 )
[ ^ > ] + content = [" \' ]([^ " \' ]+)[ " \' ] ' ' ' % re . escape ( name ) ,
[ ^ > ] + content = ([ " \' ])(?P<content>.*?) \1 ' ' ' % r e.escape(name) ,
html , display_name , fatal = fatal , * * kwargs )
html , display_name , fatal = fatal , group = ' content ' , * * kwargs )
def _dc_search_uploader ( self , html ) :
def _dc_search_uploader ( self , html ) :
return self . _html_search_meta ( ' dc.creator ' , html , ' uploader ' )
return self . _html_search_meta ( ' dc.creator ' , html , ' uploader ' )