@ -11,14 +11,13 @@ from .common import (
from . . compat import (
compat_str ,
compat_urlparse ,
compat_urllib_parse_urlencode ,
)
from . . utils import (
ExtractorError ,
float_or_none ,
HEADRequest ,
int_or_none ,
KNOWN_EXTENSIONS ,
merge_dicts ,
mimetype2ext ,
str_or_none ,
try_get ,
@ -28,6 +27,20 @@ from ..utils import (
)
class SoundcloudEmbedIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:w|player|p) \ .soundcloud \ .com/player/?.*?url=(?P<id>.*) '
@staticmethod
def _extract_urls ( webpage ) :
return [ m . group ( ' url ' ) for m in re . finditer (
r ' <iframe[^>]+src=([ " \' ])(?P<url>(?:https?://)?(?:w \ .)?soundcloud \ .com/player.+?) \ 1 ' ,
webpage ) ]
def _real_extract ( self , url ) :
return self . url_result ( compat_urlparse . parse_qs (
compat_urlparse . urlparse ( url ) . query ) [ ' url ' ] [ 0 ] )
class SoundcloudIE ( InfoExtractor ) :
""" Information extractor for soundcloud.com
To access the media , the uid of the song and a stream token
@ -44,9 +57,8 @@ class SoundcloudIE(InfoExtractor):
( ? ! ( ? : tracks | albums | sets ( ? : / . + ? ) ? | reposts | likes | spotlight ) / ? ( ? : $ | [ ? #]))
( ? P < title > [ \w \d - ] + ) / ?
( ? P < token > [ ^ ? ] + ? ) ? ( ? : [ ? ] . * ) ? $ )
| ( ? : api \. soundcloud \. com / tracks / ( ? P < track_id > \d + )
| ( ? : api ( ? : - v2 ) ? \. soundcloud \. com / tracks / ( ? P < track_id > \d + )
( ? : / ? \? secret_token = ( ? P < secret_token > [ ^ & ] + ) ) ? )
| ( ? P < player > ( ? : w | player | p . ) \. soundcloud \. com / player / ? . * ? url = . * )
)
'''
IE_NAME = ' soundcloud '
@ -60,6 +72,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1 ' ,
' description ' : ' No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d ' ,
' uploader ' : ' E.T. ExTerrestrial Music ' ,
' uploader_id ' : ' 1571244 ' ,
' timestamp ' : 1349920598 ,
' upload_date ' : ' 20121011 ' ,
' duration ' : 143.216 ,
@ -79,6 +92,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Goldrushed ' ,
' description ' : ' From Stockholm Sweden \r \n Povel / Magnus / Filip / David \r \n www.theroyalconcept.com ' ,
' uploader ' : ' The Royal Concept ' ,
' uploader_id ' : ' 9615865 ' ,
' timestamp ' : 1337635207 ,
' upload_date ' : ' 20120521 ' ,
' duration ' : 30 ,
@ -92,6 +106,7 @@ class SoundcloudIE(InfoExtractor):
# rtmp
' skip_download ' : True ,
} ,
' skip ' : ' Preview ' ,
} ,
# private link
{
@ -103,6 +118,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Youtube - Dl Test Video \' \' Ä↭ ' ,
' description ' : ' test chars: \" \' / \\ ä↭ ' ,
' uploader ' : ' jaimeMF ' ,
' uploader_id ' : ' 69767071 ' ,
' timestamp ' : 1386604920 ,
' upload_date ' : ' 20131209 ' ,
' duration ' : 9.927 ,
@ -123,6 +139,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Youtube - Dl Test Video \' \' Ä↭ ' ,
' description ' : ' test chars: \" \' / \\ ä↭ ' ,
' uploader ' : ' jaimeMF ' ,
' uploader_id ' : ' 69767071 ' ,
' timestamp ' : 1386604920 ,
' upload_date ' : ' 20131209 ' ,
' duration ' : 9.927 ,
@ -143,6 +160,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Bus Brakes ' ,
' description ' : ' md5:0053ca6396e8d2fd7b7e1595ef12ab66 ' ,
' uploader ' : ' oddsamples ' ,
' uploader_id ' : ' 73680509 ' ,
' timestamp ' : 1389232924 ,
' upload_date ' : ' 20140109 ' ,
' duration ' : 17.346 ,
@ -163,6 +181,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav] ' ,
' description ' : ' md5:fa20ee0fca76a3d6df8c7e57f3715366 ' ,
' uploader ' : ' Ori Uplift Music ' ,
' uploader_id ' : ' 12563093 ' ,
' timestamp ' : 1504206263 ,
' upload_date ' : ' 20170831 ' ,
' duration ' : 7449.096 ,
@ -183,6 +202,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Sideways (Prod. Mad Real) ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' uploader ' : ' garyvee ' ,
' uploader_id ' : ' 2366352 ' ,
' timestamp ' : 1488152409 ,
' upload_date ' : ' 20170226 ' ,
' duration ' : 207.012 ,
@ -207,6 +227,7 @@ class SoundcloudIE(InfoExtractor):
' title ' : ' Mezzo Valzer ' ,
' description ' : ' md5:4138d582f81866a530317bae316e8b61 ' ,
' uploader ' : ' Giovanni Sarani ' ,
' uploader_id ' : ' 3352531 ' ,
' timestamp ' : 1551394171 ,
' upload_date ' : ' 20190228 ' ,
' duration ' : 180.157 ,
@ -221,114 +242,81 @@ class SoundcloudIE(InfoExtractor):
}
]
_API_BASE = ' https://api.soundcloud.com/ '
_API_V2_BASE = ' https://api-v2.soundcloud.com/ '
_BASE_URL = ' https://soundcloud.com/ '
_CLIENT_ID = ' BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI '
@staticmethod
def _extract_urls ( webpage ) :
return [ m . group ( ' url ' ) for m in re . finditer (
r ' <iframe[^>]+src=([ " \' ])(?P<url>(?:https?://)?(?:w \ .)?soundcloud \ .com/player.+?) \ 1 ' ,
webpage ) ]
_IMAGE_REPL_RE = r ' -([0-9a-z]+) \ .jpg '
_ARTWORK_MAP = {
' mini ' : 16 ,
' tiny ' : 20 ,
' small ' : 32 ,
' badge ' : 47 ,
' t67x67 ' : 67 ,
' large ' : 100 ,
' t300x300 ' : 300 ,
' crop ' : 400 ,
' t500x500 ' : 500 ,
' original ' : 0 ,
}
@classmethod
def _resolv_url ( cls , url ) :
return ' https://api.soundcloud.com/resolve.json?url= ' + url + ' &client_id= ' + cls . _CLIENT_ID
return SoundcloudIE . _API_V2_BASE + ' resolve ?url=' + url + ' &client_id= ' + cls . _CLIENT_ID
def _extract_info_dict ( self , info , full_title = None , quiet = False , secret_token = None ) :
def _extract_info_dict ( self , info , full_title = None , secret_token= None , version = 2 ) :
track_id = compat_str ( info [ ' id ' ] )
title = info [ ' title ' ]
name = full_title or track_id
if quiet :
self . report_extraction ( name )
thumbnail = info . get ( ' artwork_url ' ) or info . get ( ' user ' , { } ) . get ( ' avatar_url ' )
if isinstance ( thumbnail , compat_str ) :
thumbnail = thumbnail . replace ( ' -large ' , ' -t500x500 ' )
username = try_get ( info , lambda x : x [ ' user ' ] [ ' username ' ] , compat_str )
def extract_count ( key ) :
return int_or_none ( info . get ( ' %s _count ' % key ) )
like_count = extract_count ( ' favoritings ' )
if like_count is None :
like_count = extract_count ( ' likes ' )
result = {
' id ' : track_id ,
' uploader ' : username ,
' timestamp ' : unified_timestamp ( info . get ( ' created_at ' ) ) ,
' title ' : title ,
' description ' : info . get ( ' description ' ) ,
' thumbnail ' : thumbnail ,
' duration ' : float_or_none ( info . get ( ' duration ' ) , 1000 ) ,
' webpage_url ' : info . get ( ' permalink_url ' ) ,
' license ' : info . get ( ' license ' ) ,
' view_count ' : extract_count ( ' playback ' ) ,
' like_count ' : like_count ,
' comment_count ' : extract_count ( ' comment ' ) ,
' repost_count ' : extract_count ( ' reposts ' ) ,
' genre ' : info . get ( ' genre ' ) ,
}
track_base_url = self . _API_BASE + ' tracks/ %s ' % track_id
format_urls = set ( )
formats = [ ]
query = { ' client_id ' : self . _CLIENT_ID }
if secret_token is not None :
if secret_token :
query [ ' secret_token ' ] = secret_token
if info . get ( ' downloadable ' , False ) :
# We can build a direct link to the song
if info . get ( ' downloadable ' ) :
format_url = update_url_query (
' https://api.soundcloud.com/tracks/ %s /download ' % track_id , query )
info . get ( ' download_url ' ) or track_base_url + ' /download ' , query )
format_urls . add ( format_url )
if version == 2 :
v1_info = self . _download_json (
track_base_url , track_id , query = query , fatal = False ) or { }
else :
v1_info = info
formats . append ( {
' format_id ' : ' download ' ,
' ext ' : info . get ( ' original_format ' , ' mp3 ' ) ,
' ext ' : v1_info . get ( ' original_format ' ) or ' mp3 ' ,
' filesize ' : int_or_none ( v1_info . get ( ' original_content_size ' ) ) ,
' url ' : format_url ,
' vcodec ' : ' none ' ,
' preference ' : 10 ,
} )
# Old API, does not work for some tracks (e.g.
# https://soundcloud.com/giovannisarani/mezzo-valzer)
format_dict = self . _download_json (
' https://api.soundcloud.com/i1/tracks/ %s /streams ' % track_id ,
track_id , ' Downloading track url ' , query = query , fatal = False )
if format_dict :
for key , stream_url in format_dict . items ( ) :
if stream_url in format_urls :
continue
format_urls . add ( stream_url )
ext , abr = ' mp3 ' , None
mobj = re . search ( r ' _([^_]+)_( \ d+)_url ' , key )
if mobj :
ext , abr = mobj . groups ( )
abr = int ( abr )
if key . startswith ( ' http ' ) :
stream_formats = [ {
' format_id ' : key ,
' ext ' : ext ,
' url ' : stream_url ,
} ]
elif key . startswith ( ' rtmp ' ) :
# The url doesn't have an rtmp app, we have to extract the playpath
url , path = stream_url . split ( ' mp3: ' , 1 )
stream_formats = [ {
' format_id ' : key ,
' url ' : url ,
' play_path ' : ' mp3: ' + path ,
' ext ' : ' flv ' ,
} ]
elif key . startswith ( ' hls ' ) :
stream_formats = self . _extract_m3u8_formats (
stream_url , track_id , ext , entry_protocol = ' m3u8_native ' ,
m3u8_id = key , fatal = False )
else :
continue
if abr :
for f in stream_formats :
f [ ' abr ' ] = abr
def invalid_url ( url ) :
return not url or url in format_urls or re . search ( r ' /(?:preview|playlist)/0/30/ ' , url )
formats . extend ( stream_formats )
def add_format ( f , protocol ) :
mobj = re . search ( r ' \ .(?P<abr> \ d+) \ .(?P<ext>[0-9a-z] { 3,4})(?=[/?]) ' , stream_url )
if mobj :
for k , v in mobj . groupdict ( ) . items ( ) :
if not f . get ( k ) :
f [ k ] = v
format_id_list = [ ]
if protocol :
format_id_list . append ( protocol )
for k in ( ' ext ' , ' abr ' ) :
v = f . get ( k )
if v :
format_id_list . append ( v )
abr = f . get ( ' abr ' )
if abr :
f [ ' abr ' ] = int ( abr )
f . update ( {
' format_id ' : ' _ ' . join ( format_id_list ) ,
' protocol ' : ' m3u8_native ' if protocol == ' hls ' else ' http ' ,
} )
formats . append ( f )
# New API
transcodings = try_get (
@ -337,129 +325,165 @@ class SoundcloudIE(InfoExtractor):
if not isinstance ( t , dict ) :
continue
format_url = url_or_none ( t . get ( ' url ' ) )
if not format_url :
if not format_url or t . get ( ' snipped ' ) or ' /preview/ ' in format_url :
continue
stream = self . _download_json (
update_url_query( format_url , query ) , track_id , fatal = False )
format_url, track_id , query = query , fatal = False )
if not isinstance ( stream , dict ) :
continue
stream_url = url_or_none ( stream . get ( ' url ' ) )
if not stream_url :
continue
if stream_url in format_urls :
if invalid_url ( stream_url ) :
continue
format_urls . add ( stream_url )
protocol = try_get ( t , lambda x : x [ ' format ' ] [ ' protocol ' ] , compat_str )
stream_format = t . get ( ' format ' ) or { }
protocol = stream_format . get ( ' protocol ' )
if protocol != ' hls ' and ' /hls ' in format_url :
protocol = ' hls '
ext = None
preset = str_or_none ( t . get ( ' preset ' ) )
if preset :
ext = preset . split ( ' _ ' ) [ 0 ]
if ext not in KNOWN_EXTENSIONS :
mimetype = try_get (
t , lambda x : x [ ' format ' ] [ ' mime_type ' ] , compat_str )
ext = mimetype2ext ( mimetype ) or ' mp3 '
format_id_list = [ ]
if protocol :
format_id_list . append ( protocol )
format_id_list . append ( ext )
format_id = ' _ ' . join ( format_id_list )
formats . append ( {
if ext not in KNOWN_EXTENSIONS :
ext = mimetype2ext ( stream_format . get ( ' mime_type ' ) )
add_format ( {
' url ' : stream_url ,
' format_id ' : format_id ,
' ext ' : ext ,
' protocol ' : ' m3u8_native ' if protocol == ' hls ' else ' http ' ,
} )
} , ' http ' if protocol == ' progressive ' else protocol )
if not formats :
# Old API, does not work for some tracks (e.g.
# https://soundcloud.com/giovannisarani/mezzo-valzer)
# and might serve preview URLs (e.g.
# http://www.soundcloud.com/snbrn/ele)
format_dict = self . _download_json (
track_base_url + ' /streams ' , track_id ,
' Downloading track url ' , query = query , fatal = False ) or { }
for key , stream_url in format_dict . items ( ) :
if invalid_url ( stream_url ) :
continue
format_urls . add ( stream_url )
mobj = re . search ( r ' (http|hls)_([^_]+)_( \ d+)_url ' , key )
if mobj :
protocol , ext , abr = mobj . groups ( )
add_format ( {
' abr ' : abr ,
' ext ' : ext ,
' url ' : stream_url ,
} , protocol )
if not formats :
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
formats . append ( {
' format_id ' : ' fallback ' ,
' url ' : update_url_query ( info [ ' stream_url ' ] , query ) ,
' ext ' : ' mp3 ' ,
} )
self . _check_formats ( formats , track_id )
urlh = self . _request_webpage (
HEADRequest ( info . get ( ' stream_url ' ) or track_base_url + ' /stream ' ) ,
track_id , query = query , fatal = False )
if urlh :
stream_url = urlh . geturl ( )
if not invalid_url ( stream_url ) :
add_format ( { ' url ' : stream_url } , ' http ' )
for f in formats :
f [ ' vcodec ' ] = ' none '
self . _sort_formats ( formats )
result [ ' formats ' ] = formats
return result
user = info . get ( ' user ' ) or { }
thumbnails = [ ]
artwork_url = info . get ( ' artwork_url ' )
thumbnail = artwork_url or user . get ( ' avatar_url ' )
if isinstance ( thumbnail , compat_str ) :
if re . search ( self . _IMAGE_REPL_RE , thumbnail ) :
for image_id , size in self . _ARTWORK_MAP . items ( ) :
i = {
' id ' : image_id ,
' url ' : re . sub ( self . _IMAGE_REPL_RE , ' - %s .jpg ' % image_id , thumbnail ) ,
}
if image_id == ' tiny ' and not artwork_url :
size = 18
elif image_id == ' original ' :
i [ ' preference ' ] = 10
if size :
i . update ( {
' width ' : size ,
' height ' : size ,
} )
thumbnails . append ( i )
else :
thumbnails = [ { ' url ' : thumbnail } ]
def extract_count ( key ) :
return int_or_none ( info . get ( ' %s _count ' % key ) )
return {
' id ' : track_id ,
' uploader ' : user . get ( ' username ' ) ,
' uploader_id ' : str_or_none ( user . get ( ' id ' ) ) or user . get ( ' permalink ' ) ,
' uploader_url ' : user . get ( ' permalink_url ' ) ,
' timestamp ' : unified_timestamp ( info . get ( ' created_at ' ) ) ,
' title ' : title ,
' description ' : info . get ( ' description ' ) ,
' thumbnails ' : thumbnails ,
' duration ' : float_or_none ( info . get ( ' duration ' ) , 1000 ) ,
' webpage_url ' : info . get ( ' permalink_url ' ) ,
' license ' : info . get ( ' license ' ) ,
' view_count ' : extract_count ( ' playback ' ) ,
' like_count ' : extract_count ( ' favoritings ' ) or extract_count ( ' likes ' ) ,
' comment_count ' : extract_count ( ' comment ' ) ,
' repost_count ' : extract_count ( ' reposts ' ) ,
' genre ' : info . get ( ' genre ' ) ,
' formats ' : formats
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url , flags = re . VERBOSE )
if mobj is None :
raise ExtractorError ( ' Invalid URL: %s ' % url )
mobj = re . match ( self . _VALID_URL , url )
track_id = mobj . group ( ' track_id ' )
new_info = { }
if track_id is not None :
info_json_url = ' https://api.soundcloud.com/tracks/ ' + track_id + ' .json?client_id= ' + self . _CLIENT_ID
query = {
' client_id ' : self . _CLIENT_ID ,
}
if track_id :
info_json_url = self . _API_V2_BASE + ' tracks/ ' + track_id
full_title = track_id
token = mobj . group ( ' secret_token ' )
if token :
info_json_url + = ' &secret_token= ' + token
elif mobj . group ( ' player ' ) :
query = compat_urlparse . parse_qs ( compat_urlparse . urlparse ( url ) . query )
real_url = query [ ' url ' ] [ 0 ]
# If the token is in the query of the original url we have to
# manually add it
if ' secret_token ' in query :
real_url + = ' ?secret_token= ' + query [ ' secret_token ' ] [ 0 ]
return self . url_result ( real_url )
query [ ' secret_token ' ] = token
else :
# extract uploader (which is in the url)
uploader = mobj . group ( ' uploader ' )
# extract simple title (uploader + slug of song title)
slug_title = mobj . group ( ' title ' )
full_title = resolve_title = ' %s / %s ' % mobj . group ( ' uploader ' , ' title ' )
token = mobj . group ( ' token ' )
full_title = resolve_title = ' %s / %s ' % ( uploader , slug_title )
if token :
resolve_title + = ' / %s ' % token
info_json_url = self . _resolv_url ( self . _BASE_URL + resolve_title )
webpage = self . _download_webpage ( url , full_title , fatal = False )
if webpage :
entries = self . _parse_json (
self . _search_regex (
r ' var \ s+c \ s*= \ s*( \ [.+? \ ]) \ s*, \ s*o \ s*=Date \ b ' , webpage ,
' data ' , default = ' [] ' ) , full_title , fatal = False )
if entries :
for e in entries :
if not isinstance ( e , dict ) :
continue
if e . get ( ' id ' ) != 67 :
continue
data = try_get ( e , lambda x : x [ ' data ' ] [ 0 ] , dict )
if data :
new_info = data
break
info_json_url = self . _resolv_url (
' https://soundcloud.com/ %s ' % resolve_title )
# Contains some additional info missing from new_info
version = 2
info = self . _download_json (
info_json_url , full_title , ' Downloading info JSON ' )
info_json_url , full_title , ' Downloading info JSON ' , query = query , fatal = False )
if not info :
info = self . _download_json (
info_json_url . replace ( self . _API_V2_BASE , self . _API_BASE ) ,
full_title , ' Downloading info JSON ' , query = query )
version = 1
return self . _extract_info_dict (
merge_dicts ( info , new_info ) , full_title , secret_token = token )
return self . _extract_info_dict ( info , full_title , token , version )
class SoundcloudPlaylistBaseIE ( SoundcloudIE ) :
@staticmethod
def _extract_id ( e ) :
return compat_str ( e [ ' id ' ] ) if e . get ( ' id ' ) else None
def _extract_track_entries ( self , tracks ) :
return [
self . url_result (
track [ ' permalink_url ' ] , SoundcloudIE . ie_key ( ) ,
video_id = self . _extract_id ( track ) )
for track in tracks if track . get ( ' permalink_url ' ) ]
def _extract_track_entries ( self , tracks , token = None ) :
entries = [ ]
for track in tracks :
track_id = str_or_none ( track . get ( ' id ' ) )
url = track . get ( ' permalink_url ' )
if not url :
if not track_id :
continue
url = self . _API_V2_BASE + ' tracks/ ' + track_id
if token :
url + = ' ?secret_token= ' + token
entries . append ( self . url_result (
url , SoundcloudIE . ie_key ( ) , track_id ) )
return entries
class SoundcloudSetIE ( SoundcloudPlaylistBaseIE ) :
@ -480,41 +504,28 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
# extract uploader (which is in the url)
uploader = mobj . group ( ' uploader ' )
# extract simple title (uploader + slug of song title)
slug_title = mobj . group ( ' slug_title ' )
full_title = ' %s /sets/ %s ' % ( uploader , slug_title )
url = ' https://soundcloud.com/ %s /sets/ %s ' % ( uploader , slug_title )
full_title = ' %s /sets/ %s ' % mobj . group ( ' uploader ' , ' slug_title ' )
token = mobj . group ( ' token ' )
if token :
full_title + = ' / ' + token
url + = ' / ' + token
resolv_url = self . _resolv_url ( url )
info = self . _download_json ( resolv_url , full_title )
info = self . _download_json ( self . _resolv_url (
self . _BASE_URL + full_title ) , full_title )
if ' errors ' in info :
msgs = ( compat_str ( err [ ' error_message ' ] ) for err in info [ ' errors ' ] )
raise ExtractorError ( ' unable to download video webpage: %s ' % ' , ' . join ( msgs ) )
entries = self . _extract_track_entries ( info [ ' tracks ' ] )
entries = self . _extract_track_entries ( info [ ' tracks ' ] , token )
return {
' _type ' : ' playlist ' ,
' entries ' : entries ,
' id ' : ' %s ' % info [ ' id ' ] ,
' title ' : info [ ' title ' ] ,
}
return self . playlist_result (
entries , str_or_none ( info . get ( ' id ' ) ) , info . get ( ' title ' ) )
class SoundcloudPagedPlaylistBaseIE ( SoundcloudPlaylistBaseIE ) :
_API_V2_BASE = ' https://api-v2.soundcloud.com '
def _extract_playlist ( self , base_url , playlist_id , playlist_title ) :
COMMON_QUERY = {
' limit ' : 5 0,
' limit ' : 200000000 0,
' client_id ' : self . _CLIENT_ID ,
' linked_partitioning ' : ' 1 ' ,
}
@ -522,12 +533,13 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
query = COMMON_QUERY . copy ( )
query [ ' offset ' ] = 0
next_href = base_url + ' ? ' + compat_urllib_parse_urlencode ( query )
next_href = base_url
entries = [ ]
for i in itertools . count ( ) :
response = self . _download_json (
next_href , playlist_id , ' Downloading track page %s ' % ( i + 1 ) )
next_href , playlist_id ,
' Downloading track page %s ' % ( i + 1 ) , query = query )
collection = response [ ' collection ' ]
@ -546,9 +558,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
continue
return self . url_result (
permalink_url ,
ie = SoundcloudIE . ie_key ( ) if SoundcloudIE . suitable ( permalink_url ) else None ,
video_id = self . _extract_id ( cand ) ,
video_title = cand . get ( ' title ' ) )
SoundcloudIE . ie_key ( ) if SoundcloudIE . suitable ( permalink_url ) else None ,
str_or_none ( cand . get ( ' id ' ) ) , cand . get ( ' title ' ) )
for e in collection :
entry = resolve_entry ( ( e , e . get ( ' track ' ) , e . get ( ' playlist ' ) ) )
@ -559,11 +570,10 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
if not next_href :
break
parsed_next_href = compat_urlparse . urlparse ( response [ ' next_href ' ] )
qs = compat_urlparse . parse_qs ( parsed_next_href . query )
qs . update ( COMMON_QUERY )
next_href = compat_urlparse . urlunparse (
parsed_next_href . _replace ( query = compat_urllib_parse_urlencode ( qs , True ) ) )
next_href = response [ ' next_href ' ]
parsed_next_href = compat_urlparse . urlparse ( next_href )
query = compat_urlparse . parse_qs ( parsed_next_href . query )
query . update ( COMMON_QUERY )
return {
' _type ' : ' playlist ' ,
@ -609,7 +619,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
' url ' : ' https://soundcloud.com/jcv246/sets ' ,
' info_dict ' : {
' id ' : ' 12982173 ' ,
' title ' : ' Jordi / cv ( Playlis ts)' ,
' title ' : ' Jordi / cv ( Se ts)' ,
} ,
' playlist_mincount ' : 2 ,
} , {
@ -636,39 +646,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
} ]
_BASE_URL_MAP = {
' all ' : ' %s /stream/users/ %% s ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' tracks ' : ' %s /users/ %% s/tracks ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' albums ' : ' %s /users/ %% s/albums ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' sets ' : ' %s /users/ %% s/playlists ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' reposts ' : ' %s /stream/users/ %% s/reposts ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' likes ' : ' %s /users/ %% s/likes ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' spotlight ' : ' %s /users/ %% s/spotlight ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
}
_TITLE_MAP = {
' all ' : ' All ' ,
' tracks ' : ' Tracks ' ,
' albums ' : ' Albums ' ,
' sets ' : ' Playlists ' ,
' reposts ' : ' Reposts ' ,
' likes ' : ' Likes ' ,
' spotlight ' : ' Spotlight ' ,
' all ' : ' stream/users/ %s ' ,
' tracks ' : ' users/ %s /tracks ' ,
' albums ' : ' users/ %s /albums ' ,
' sets ' : ' users/ %s /playlists ' ,
' reposts ' : ' stream/users/ %s /reposts ' ,
' likes ' : ' users/ %s /likes ' ,
' spotlight ' : ' users/ %s /spotlight ' ,
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
uploader = mobj . group ( ' user ' )
url = ' https://soundcloud.com/ %s / ' % uploader
resolv_url = self . _resolv_url ( url )
user = self . _download_json (
resolv_url , uploader , ' Downloading user info ' )
self . _resolv_url ( self . _BASE_URL + uploader ) ,
uploader , ' Downloading user info ' )
resource = mobj . group ( ' rsrc ' ) or ' all '
return self . _extract_playlist (
self . _BASE_URL_MAP [ resource ] % user [ ' id ' ] , compat_str ( user [ ' id ' ] ) ,
' %s ( %s ) ' % ( user [ ' username ' ] , self . _TITLE_MAP [ resource ] ) )
self . _API_V2_BASE + self . _BASE_URL_MAP [ resource ] % user [ ' id ' ] ,
str_or_none ( user . get ( ' id ' ) ) ,
' %s ( %s ) ' % ( user [ ' username ' ] , resource . capitalize ( ) ) )
class SoundcloudTrackStationIE ( SoundcloudPagedPlaylistBaseIE ) :
@ -678,7 +678,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
' url ' : ' https://soundcloud.com/stations/track/officialsundial/your-text ' ,
' info_dict ' : {
' id ' : ' 286017854 ' ,
' title ' : ' Track station: your - text' ,
' title ' : ' Track station: your text' ,
} ,
' playlist_mincount ' : 47 ,
} ]
@ -686,19 +686,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract ( self , url ) :
track_name = self . _match_id ( url )
webpage = self . _download_webpage ( url , track_name )
track = self . _download_json ( self . _resolv_url ( url ) , track_name )
track_id = self . _search_regex (
r ' soundcloud:track-stations:( \ d+) ' , webpage , ' track id ' )
r ' soundcloud:track-stations:( \ d+) ' , track[ ' id ' ] , ' track id ' )
return self . _extract_playlist (
' %s /stations/soundcloud:track-stations: %s /tracks '
% ( self . _API_V2_BASE , track_id ) ,
track_id , ' Track station: %s ' % track_name )
self . _API_V2_BASE + ' stations/ %s /tracks ' % track [ ' id ' ] ,
track_id , ' Track station: %s ' % track [ ' title ' ] )
class SoundcloudPlaylistIE ( SoundcloudPlaylistBaseIE ) :
_VALID_URL = r ' https?://api \ .soundcloud \ .com/playlists/(?P<id>[0-9]+)(?:/? \ ?secret_token=(?P<token>[^&]+?))?$ '
_VALID_URL = r ' https?://api (?:-v2)? \ .soundcloud \ .com/playlists/(?P<id>[0-9]+)(?:/? \ ?secret_token=(?P<token>[^&]+?))?$ '
IE_NAME = ' soundcloud:playlist '
_TESTS = [ {
' url ' : ' https://api.soundcloud.com/playlists/4110309 ' ,
@ -713,29 +711,22 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
playlist_id = mobj . group ( ' id ' )
base_url = ' %s //api.soundcloud.com/playlists/ %s .json? ' % ( self . http_scheme ( ) , playlist_id )
data_dict = {
query = {
' client_id ' : self . _CLIENT_ID ,
}
token = mobj . group ( ' token ' )
if token :
data_dict [ ' secret_token ' ] = token
query [ ' secret_token ' ] = token
data = compat_urllib_parse_urlencode ( data_dict )
data = self . _download_json (
base_url + data , playlist_id , ' Downloading playlist ' )
self . _API_V2_BASE + ' playlists/ ' + playlist_id ,
playlist_id , ' Downloading playlist ' , query = query )
entries = self . _extract_track_entries ( data [ ' tracks ' ] )
entries = self . _extract_track_entries ( data [ ' tracks ' ] , token )
return {
' _type ' : ' playlist ' ,
' id ' : playlist_id ,
' title ' : data . get ( ' title ' ) ,
' description ' : data . get ( ' description ' ) ,
' entries ' : entries ,
}
return self . playlist_result (
entries , playlist_id , data . get ( ' title ' ) , data . get ( ' description ' ) )
class SoundcloudSearchIE ( SearchInfoExtractor , SoundcloudIE ) :
@ -753,18 +744,18 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
_SEARCH_KEY = ' scsearch '
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = ' https://api-v2.soundcloud.com '
def _get_collection ( self , endpoint , collection_id , * * query ) :
limit = min (
query . get ( ' limit ' , self . _DEFAULT_RESULTS_PER_PAGE ) ,
self . _MAX_RESULTS_PER_PAGE )
query [ ' limit ' ] = limit
query [ ' client_id ' ] = self . _CLIENT_ID
query [ ' linked_partitioning ' ] = ' 1 '
query [ ' offset ' ] = 0
data = compat_urllib_parse_urlencode ( query )
next_url = ' {0} {1} ? {2} ' . format ( self . _API_V2_BASE , endpoint , data )
query . update ( {
' limit ' : limit ,
' client_id ' : self . _CLIENT_ID ,
' linked_partitioning ' : 1 ,
' offset ' : 0 ,
} )
next_url = update_url_query ( self . _API_V2_BASE + endpoint , query )
collected_results = 0
@ -791,5 +782,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
break
def _get_n_results ( self , query , n ) :
tracks = self . _get_collection ( ' / search/tracks' , query , limit = n , q = query )
tracks = self . _get_collection ( ' search/tracks' , query , limit = n , q = query )
return self . playlist_result ( tracks , playlist_title = query )