[ie/vk] Improve metadata extraction (#12510)

Closes #12509
Authored by: seproDev
master
sepro 5 days ago committed by GitHub
parent bd0a668169
commit 05c8023a27
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -116,6 +116,7 @@ class VKIE(VKBaseIE):
'id': '-77521_162222515', 'id': '-77521_162222515',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня', 'title': 'ProtivoGunz - Хуёвая песня',
'description': 'Видео из официальной группы Noize MC\nhttp://vk.com/noizemc',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'uploader_id': '39545378', 'uploader_id': '39545378',
'duration': 195, 'duration': 195,
@ -165,6 +166,7 @@ class VKIE(VKBaseIE):
'id': '-93049196_456239755', 'id': '-93049196_456239755',
'ext': 'mp4', 'ext': 'mp4',
'title': '8 серия (озвучка)', 'title': '8 серия (озвучка)',
'description': 'Видео из официальной группы Noize MC\nhttp://vk.com/noizemc',
'duration': 8383, 'duration': 8383,
'comment_count': int, 'comment_count': int,
'uploader': 'Dizi2021', 'uploader': 'Dizi2021',
@ -240,6 +242,7 @@ class VKIE(VKBaseIE):
'upload_date': '20221005', 'upload_date': '20221005',
'uploader': 'Шальная Императрица', 'uploader': 'Шальная Императрица',
'uploader_id': '-74006511', 'uploader_id': '-74006511',
'description': 'md5:f9315f7786fa0e84e75e4f824a48b056',
}, },
}, },
{ {
@ -278,6 +281,25 @@ class VKIE(VKBaseIE):
}, },
'skip': 'No formats found', 'skip': 'No formats found',
}, },
{
'note': 'video has chapters',
'url': 'https://vkvideo.ru/video-18403220_456239696',
'info_dict': {
'id': '-18403220_456239696',
'ext': 'mp4',
'title': 'Трамп отменяет гранты // DeepSeek - Революция в ИИ // Илон Маск читер',
'description': 'md5:b112ea9de53683b6d03d29076f62eec2',
'uploader': 'Руслан Усачев',
'uploader_id': '-18403220',
'comment_count': int,
'like_count': int,
'duration': 1983,
'thumbnail': r're:https?://.+\.jpg',
'chapters': 'count:21',
'timestamp': 1738252883,
'upload_date': '20250130',
},
},
{ {
# live stream, hls and rtmp links, most likely already finished live # live stream, hls and rtmp links, most likely already finished live
# stream by the time you are reading this comment # stream by the time you are reading this comment
@ -449,7 +471,6 @@ class VKIE(VKBaseIE):
return self.url_result(opts_url) return self.url_result(opts_url)
data = player['params'][0] data = player['params'][0]
title = unescapeHTML(data['md_title'])
# 2 = live # 2 = live
# 3 = post live (finished live) # 3 = post live (finished live)
@ -507,17 +528,29 @@ class VKIE(VKBaseIE):
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'subtitles': subtitles,
'thumbnail': data.get('jpg'), **traverse_obj(mv_data, {
'uploader': data.get('md_author'), 'title': ('title', {unescapeHTML}),
'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')), 'description': ('desc', {clean_html}, filter),
'duration': int_or_none(data.get('duration') or mv_data.get('duration')), 'duration': ('duration', {int_or_none}),
'like_count': ('likes', {int_or_none}),
'comment_count': ('commcount', {int_or_none}),
}),
**traverse_obj(data, {
'title': ('md_title', {unescapeHTML}),
'description': ('description', {clean_html}, filter),
'thumbnail': ('jpg', {url_or_none}),
'uploader': ('md_author', {str}),
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
'duration': ('duration', {int_or_none}),
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {
'title': ('text', {str}),
'start_time': 'time',
}),
}),
'timestamp': timestamp, 'timestamp': timestamp,
'view_count': view_count, 'view_count': view_count,
'like_count': int_or_none(mv_data.get('likes')),
'comment_count': int_or_none(mv_data.get('commcount')),
'is_live': is_live, 'is_live': is_live,
'subtitles': subtitles,
'_format_sort_fields': ('res', 'source'), '_format_sort_fields': ('res', 'source'),
} }

Loading…
Cancel
Save