[cloudy] Fix extraction (closes #12525)

pull/2/head
Sergey M․ 8 years ago
parent ca5ed022e9
commit 579c99a284
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -1,97 +1,56 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_HTTPError,
)
from ..utils import ( from ..utils import (
ExtractorError, str_to_int,
HEADRequest, unified_strdate,
remove_end,
) )
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' _IE_DESC = 'cloudy.ec'
_VALID_URL = r'''(?x) _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
https?://(?:www\.)?cloudy\.ec/ _TESTS = [{
(?:v/|embed\.php\?id=)
(?P<id>[A-Za-z0-9]+)
'''
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
_MAX_TRIES = 2
_TEST = {
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',
'md5': '5cb253ace826a42f35b4740539bedf07', 'md5': '29832b05028ead1b58be86bf319397ca',
'info_dict': { 'info_dict': {
'id': 'af511e2527aac', 'id': 'af511e2527aac',
'ext': 'flv', 'ext': 'mp4',
'title': 'Funny Cats and Animals Compilation june 2013', 'title': 'Funny Cats and Animals Compilation june 2013',
'upload_date': '20130913',
'view_count': int,
} }
} }, {
'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
def _extract_video(self, video_id, file_key, error_url=None, try_num=0): 'only_matching': True,
}]
if try_num > self._MAX_TRIES - 1:
raise ExtractorError('Unable to extract video URL', expected=True)
form = {
'file': video_id,
'key': file_key,
}
if error_url:
form.update({
'numOfErrors': try_num,
'errorCode': '404',
'errorUrl': error_url,
})
player_data = self._download_webpage(
self._API_URL, video_id, 'Downloading player data', query=form)
data = compat_parse_qs(player_data)
try_num += 1
if 'error' in data:
raise ExtractorError(
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
expected=True)
title = data.get('title', [None])[0] def _real_extract(self, url):
if title: video_id = self._match_id(url)
title = remove_end(title, '&asdasdas').strip()
video_url = data.get('url', [None])[0] webpage = self._download_webpage(
'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
if video_url: info = self._parse_html5_media_entries(url, webpage, video_id)[0]
try:
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
self.report_warning('Invalid video URL, requesting another', video_id)
return self._extract_video(video_id, file_key, video_url, try_num)
return { webpage = self._download_webpage(
'id': video_id, 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
'url': video_url,
'title': title,
}
def _real_extract(self, url): if webpage:
mobj = re.match(self._VALID_URL, url) info.update({
video_id = mobj.group('id') 'title': self._search_regex(
r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
'upload_date': unified_strdate(self._search_regex(
r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
'upload date', fatal=False)),
'view_count': str_to_int(self._search_regex(
r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
})
url = self._EMBED_URL % video_id if not info.get('title'):
webpage = self._download_webpage(url, video_id) info['title'] = video_id
file_key = self._search_regex( info['id'] = video_id
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
webpage, 'file_key')
return self._extract_video(video_id, file_key) return info

Loading…
Cancel
Save