[lifenews] Add support for lifenews.ru

pull/8/head
dst 11 years ago
parent 8ccda826d5
commit 659aa21ba1

@ -110,6 +110,7 @@ from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .keek import KeekIE from .keek import KeekIE
from .la7 import LA7IE from .la7 import LA7IE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE from .livestream import LivestreamIE, LivestreamOriginalIE
from .lynda import ( from .lynda import (

@ -0,0 +1,63 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import unified_strdate
class LifeNewsIE(InfoExtractor):
IE_NAME = 'lifenews'
IE_DESC = 'LIFE | NEWS'
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
_TEST = {
'url': 'http://lifenews.ru/news/126342',
'file': '126342.mp4',
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
'info_dict': {
'title': 'МВД разыскивает троих мужчин, оставивших в IKEA сумку с автоматом',
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
'upload_date': '20140130',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
video_url = self._html_search_regex(
r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
title = self._og_search_title(webpage)
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
description = self._og_search_description(webpage)
view_count = self._html_search_regex(
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
comment_count = self._html_search_regex(
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
upload_date = self._html_search_regex(
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
return {
'id': video_id,
'url': video_url,
'thumbnail': thumbnail,
'title': title,
'description': description,
'view_count': view_count,
'comment_count': comment_count,
'upload_date': unified_strdate(upload_date),
}
Loading…
Cancel
Save