[porncom] Extract categories and tags (Closes #10510)

pull/10517/head
Sergey M․ 8 years ago
parent 196c6ba067
commit 7a3e849f6e
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
'duration': 551, 'duration': 551,
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
'categories': list,
'tags': list,
}, },
}, { }, {
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
@ -75,7 +77,14 @@ class PornComIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count')) r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
'view count', fatal=False))
def extract_list(kind):
s = self._search_regex(
r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
webpage, kind, fatal=False)
return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
return { return {
'id': video_id, 'id': video_id,
@ -86,4 +95,6 @@ class PornComIE(InfoExtractor):
'view_count': view_count, 'view_count': view_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
'categories': extract_list('categories'),
'tags': extract_list('tags'),
} }

Loading…
Cancel
Save