@ -3481,20 +3481,20 @@ class XVideosIE(InfoExtractor):
self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
class SoundcloudIE(Information Extractor):
class SoundcloudIE(InfoExtractor):
"""Information extractor for soundcloud.com
"""Information extractor for soundcloud.com
To access the media, the uid of the song and a stream token
To access the media, the uid of the song and a stream token
must be extracted from the page source and the script must make
must be extracted from the page source and the script must make
a request to media.soundcloud.com/crossdomain.xml. Then
a request to media.soundcloud.com/crossdomain.xml. Then
the media can be grabbed by requesting from an url composed
the media can be grabbed by requesting from an url composed
of the stream token and uid
of the stream token and uid
"""
"""
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'soundcloud'
IE_NAME = u'soundcloud'
def __init__(self, downloader=None):
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
InfoExtractor.__init__(self, downloader)
def report_webpage(self, video_id):
def report_webpage(self, video_id):
"""Report information extraction."""
"""Report information extraction."""
@ -3504,8 +3504,8 @@ class SoundcloudIE(InformationExtractor):
"""Report information extraction."""
"""Report information extraction."""
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
def _real_initialize(self):
def _real_initialize(self):
return
return
def _real_extract(self, url):
def _real_extract(self, url):
htmlParser = HTMLParser.HTMLParser()
htmlParser = HTMLParser.HTMLParser()
@ -3515,10 +3515,10 @@ class SoundcloudIE(InformationExtractor):
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
return
# extract uploader (which is in the url)
# extract uploader (which is in the url)
uploader = mobj.group(3 ).decode('utf-8')
uploader = mobj.group(1 ).decode('utf-8')
# extract simple title (uploader + slug of song title)
# extract simple title (uploader + slug of song title)
slug_title = mobj.group(4 ).decode('utf-8')
slug_title = mobj.group(2 ).decode('utf-8')
simple_title = uploader + '-' + slug_title
simple_title = uploader + '-' + slug_title
self.report_webpage('%s/%s' % (uploader, slug_title))
self.report_webpage('%s/%s' % (uploader, slug_title))
@ -3532,32 +3532,36 @@ class SoundcloudIE(InformationExtractor):
self.report_extraction('%s/%s' % (uploader, slug_title))
self.report_extraction('%s/%s' % (uploader, slug_title))
# extract uid and access token
# extract uid and access token
mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)
mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)
if mobj:
if mobj:
video_id = match.group(1)
video_id = match.group(1)
stream_token = match.group(2)
stream_token = match.group(2)
# construct media url (with uid/token) to request song
# construct media url (with uid/token) to request song
mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
mediaURL = mediaURL % (video_id, stream_token)
mediaURL = mediaURL % (video_id, stream_token)
# description
# description
description = u'No description available'
description = u'No description available'
mobj = re.search('track-description-value"><p>(.*?)</p>', page)
mobj = re.search('track-description-value"><p>(.*?)</p>', page)
if mobj:
if mobj:
description = mobj.group(1)
description = mobj.group(1)
# upload date
# upload date
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", page)
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", page)
if mobj:
if mobj:
try:
try:
upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
except:
except:
pass
pass
try:
# for soundcloud, a request must be made to a cross domain to establish
self._download.process_info({
# needed cookies
request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
try:
self._downloader.process_info({
'id': video_id,
'id': video_id,
'url': video_url,
'url': video_url,
'uploader': uploader,
'uploader': uploader,
@ -3567,8 +3571,10 @@ class SoundcloudIE(InformationExtractor):
'ext': u'mp3',
'ext': u'mp3',
'format': u'NA',
'format': u'NA',
'player_url': None,
'player_url': None,
'description': description
'description': description
})
})
except UnavailableVideoError:
self._downloader.trouble(u'\nERROR: unable to download video')
class PostProcessor(object):
class PostProcessor(object):
"""Post Processor class.
"""Post Processor class.
@ -3966,6 +3972,7 @@ def gen_extractors():
EscapistIE(),
EscapistIE(),
CollegeHumorIE(),
CollegeHumorIE(),
XVideosIE(),
XVideosIE(),
SoundcloudIE(),
GenericIE()
GenericIE()
]
]