[chirbit] add profile extractor.

pull/5032/head
Leslie P. Polzer 10 years ago
parent 5da6bd0083
commit 365577f567

@ -63,7 +63,7 @@ from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .chirbit import ChirbitIE
from .chirbit import ChirbitIE, ChirbitProfileIE
from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE

@ -1,7 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import clean_html
class ChirbitIE(InfoExtractor):
@ -32,3 +35,63 @@ class ChirbitIE(InfoExtractor):
'title': audio_title,
'url': audio_url
}
class ChirbitProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?P<id>[^/]+)'
_TEST = {
'url': 'http://chirbit.com/ScarletBeauty',
'playlist_count': 3,
'info_dict': {
'_type': 'playlist',
'title': 'ScarletBeauty',
'id': 'ScarletBeauty'
}
}
def _real_extract(self, url):
profile_id = self._match_id(url)
# Chirbit has a pretty weird "Last Page" navigation behavior.
# We grab the profile's oldest entry to determine when to
# stop fetching entries.
oldestpage = self._download_webpage(url + '/24599', profile_id)
oldest_page_entries = re.findall(
r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''',
oldestpage);
oldestentry = clean_html(oldest_page_entries[-1]);
ids = []
titles = []
n = 0
while True:
page = self._download_webpage(url + '/' + str(n), profile_id)
page_ids = re.findall(
r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''',
page);
page_titles = re.findall(
r'''<div\s+class="chirbit_title"\s*>(.*?)</div>''',
page);
ids += page_ids
titles += page_titles
if oldestentry in page_ids:
break
n += 1
entries = []
i = 0
for id in ids:
entries.append({
'id': id,
'title': titles[i],
'url': 'http://audio.chirbit.com/' + id + '.mp3'
});
i += 1
info_dict = {
'_type': 'playlist',
'id': profile_id,
'title': profile_id,
'entries': entries
}
return info_dict;

Loading…
Cancel
Save