[crooksandliars] Improve embed extractor and remove article extractor

pull/5407/merge
Sergey M․ 10 years ago
parent af14ded75e
commit 7a91d1fc43

@ -90,7 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE, CrooksAndLiarsArticleIE from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import ( from .crunchyroll import (
CrunchyrollIE, CrunchyrollIE,
CrunchyrollShowPlaylistIE CrunchyrollShowPlaylistIE

@ -1,71 +1,60 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
mimetype2ext, int_or_none,
qualities,
) )
class CrooksAndLiarsIE(InfoExtractor): class CrooksAndLiarsIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//embed.crooksandliars.com/embed/(?P<id>[A-Za-z0-9]+)(?:$|[?#])' _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
'info_dict': { 'info_dict': {
'id': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'id': '8RUoRhRi',
'ext': 'mp4',
'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
'description': "Fox News, Fox & Friends Weekend, April 4, 2015. Read more... http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists", 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1428207000, 'timestamp': 1428207000,
'thumbnail': 'https://crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', 'upload_date': '20150405',
'uploader': "Heather", 'uploader': 'Heather',
'duration': 236,
} }
}, {
'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
manifest = json.loads(self._html_search_regex(r'var manifest = ({.*?})\n', webpage, 'manifest JSON')) webpage = self._download_webpage(
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
formats = [] manifest = self._parse_json(
for item in manifest['flavors']: self._search_regex(
if not item['mime'].startswith('video/'): # XXX: or item['exclude']? r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
continue video_id)
formats.append({
'format_id': item['type'], quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
'ext': mimetype2ext(item['mime']),
formats = [{
'url': item['url'], 'url': item['url'],
}) 'format_id': item['type'],
'quality': quality(item['type']),
} for item in manifest['flavors'] if item['mime'].startswith('video/')]
self._sort_formats(formats)
# XXX: manifest['url']?
return { return {
'url': url, 'url': url,
'id': video_id, 'id': video_id,
'uploader': manifest['author'],
'title': manifest['title'], 'title': manifest['title'],
'description': manifest['description'], 'description': manifest.get('description'),
'thumbnail': self._proto_relative_url(manifest['poster']), 'thumbnail': self._proto_relative_url(manifest.get('poster')),
'duration': manifest['duration'], 'timestamp': int_or_none(manifest.get('created')),
'timestamp': int(manifest['created']), 'uploader': manifest.get('author'),
'duration': int_or_none(manifest.get('duration')),
'formats': formats, 'formats': formats,
} }
class CrooksAndLiarsArticleIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//crooksandliars.com/\d+/\d+/(?P<id>[a-z\-]+)(?:/|$)'
_TESTS = [{
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_url = self._proto_relative_url(self._html_search_regex(r'<iframe src="(//embed.crooksandliars.com/.*)"', webpage, 'embedded player'))
return {
'_type': 'url',
'url': player_url
}

Loading…
Cancel
Save