[telegram] Add extractor (#2922)

Closes #2910

Authored by: hatienl0i261299
pull/2960/head
Ha Tien Loi 3 years ago committed by GitHub
parent ded9f32667
commit 5bcccbfec3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1593,6 +1593,7 @@ from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE
from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE from .telemb import TeleMBIE
from .telemundo import TelemundoIE from .telemundo import TelemundoIE
from .telequebec import ( from .telequebec import (

@ -0,0 +1,37 @@
from .common import InfoExtractor
class TelegramEmbedIE(InfoExtractor):
IE_NAME = 'telegram:embed'
_VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://t.me/europa_press/613',
'info_dict': {
'id': '613',
'ext': 'mp4',
'title': 'Europa Press',
'description': '6ce2d7e8d56eda16d80607b23db7b252',
'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
formats = [{
'url': self._proto_relative_url(self._search_regex(
'<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
'ext': 'mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
webpage_embed, 'thumbnail'),
'formats': formats,
}
Loading…
Cancel
Save