Merge branch 'ytdl-org:master' into df-bitchute-ovrhaul
commit
7fb0a87d7c
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,173 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from ..utils import (
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BlerpIE(InfoExtractor):
|
||||
IE_NAME = 'blerp'
|
||||
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
|
||||
'info_dict': {
|
||||
'id': '6320fe8745636cb4dd677a5a',
|
||||
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
|
||||
'uploader': 'luminousaj',
|
||||
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||
'ext': 'mp3',
|
||||
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||
'info_dict': {
|
||||
'id': '5bc94ef4796001000498429f',
|
||||
'title': 'Yee',
|
||||
'uploader': '179617322678353920',
|
||||
'uploader_id': '5ba99cf71386730004552c42',
|
||||
'ext': 'mp3',
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||
}
|
||||
}]
|
||||
|
||||
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||
_GRAPHQL_QUERY = (
|
||||
'''query webBitePageGetBite($_id: MongoID!) {
|
||||
web {
|
||||
biteById(_id: $_id) {
|
||||
...bitePageFrag
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment bitePageFrag on Bite {
|
||||
_id
|
||||
title
|
||||
userKeywords
|
||||
keywords
|
||||
color
|
||||
visibility
|
||||
isPremium
|
||||
owned
|
||||
price
|
||||
extraReview
|
||||
isAudioExists
|
||||
image {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
userReactions {
|
||||
_id
|
||||
reactions
|
||||
createdAt
|
||||
__typename
|
||||
}
|
||||
topReactions
|
||||
totalSaveCount
|
||||
saved
|
||||
blerpLibraryType
|
||||
license
|
||||
licenseMetaData
|
||||
playCount
|
||||
totalShareCount
|
||||
totalFavoriteCount
|
||||
totalAddedToBoardCount
|
||||
userCategory
|
||||
userAudioQuality
|
||||
audioCreationState
|
||||
transcription
|
||||
userTranscription
|
||||
description
|
||||
createdAt
|
||||
updatedAt
|
||||
author
|
||||
listingType
|
||||
ownerObject {
|
||||
_id
|
||||
username
|
||||
profileImage {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
transcription
|
||||
favorited
|
||||
visibility
|
||||
isCurated
|
||||
sourceUrl
|
||||
audienceRating
|
||||
strictAudienceRating
|
||||
ownerId
|
||||
reportObject {
|
||||
reportedContentStatus
|
||||
__typename
|
||||
}
|
||||
giphy {
|
||||
mp4
|
||||
gif
|
||||
__typename
|
||||
}
|
||||
audio {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
mp3 {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
''')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
data = {
|
||||
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {
|
||||
'_id': audio_id
|
||||
}
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||
|
||||
bite_json = json_result['data']['web']['biteById']
|
||||
|
||||
info_dict = {
|
||||
'id': bite_json['_id'],
|
||||
'url': bite_json['audio']['mp3']['url'],
|
||||
'title': bite_json['title'],
|
||||
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||
'ext': 'mp3',
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||
}
|
||||
|
||||
return info_dict
|
@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
|
||||
|
||||
BaseInfoExtractor = InfoExtractor
|
||||
|
||||
import re
|
||||
|
||||
class InfoExtractor(BaseInfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _match_valid_url(cls, url):
|
||||
return re.match(cls._VALID_URL, url)
|
||||
|
||||
|
||||
class FifaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
|
||||
'info_dict': {
|
||||
'id': '7on10qPcnyLajDDU3ntg6y',
|
||||
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
|
||||
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments'],
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
|
||||
'duration': 8165,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
|
||||
'info_dict': {
|
||||
'id': '1cg5r5Qt6Qt12ilkDgb1sV',
|
||||
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
|
||||
'description': 'md5:d908c74ee66322b804ae2e521b02a855',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments', 'Highlights'],
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
|
||||
'duration': 902,
|
||||
'release_timestamp': 1404777600,
|
||||
'release_date': '20140708',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
|
||||
'info_dict': {
|
||||
'id': '3C6gQH9C2DLwzNx7BMRQdp',
|
||||
'title': 'Josimar goal against Northern Ireland | Classic Goals',
|
||||
'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments', 'Goal'],
|
||||
'duration': 28,
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, locale = self._match_valid_url(url).group('id', 'locale')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
preconnect_link = self._search_regex(
|
||||
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
|
||||
|
||||
video_details = self._download_json(
|
||||
'{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
|
||||
|
||||
preplay_parameters = self._download_json(
|
||||
'{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
|
||||
|
||||
content_data = self._download_json(
|
||||
# 1. query string is expected to be sent as-is
|
||||
# 2. `sig` must be appended
|
||||
# 3. if absent, the call appears to work but the manifest is bad (404)
|
||||
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
|
||||
video_id, 'Downloading Content Data')
|
||||
|
||||
# formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_details['title'],
|
||||
'description': video_details.get('description'),
|
||||
'duration': int_or_none(video_details.get('duration')),
|
||||
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
|
||||
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
|
||||
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -0,0 +1,87 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_id,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
MONTH_NAMES,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MyVideoGeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.myvideo.ge/v/3941048',
|
||||
'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
|
||||
'info_dict': {
|
||||
'id': '3941048',
|
||||
'ext': 'mp4',
|
||||
'title': 'The best prikol',
|
||||
'upload_date': '20200611',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'chixa33',
|
||||
'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
|
||||
},
|
||||
# working from local dev system
|
||||
'skip': 'site blocks CI servers',
|
||||
}
|
||||
_MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
|
||||
|
||||
_quality = staticmethod(qualities(('SD', 'HD')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = (
|
||||
self._og_search_title(webpage, default=None)
|
||||
or clean_html(get_element_by_class('my_video_title', webpage))
|
||||
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
|
||||
|
||||
jwplayer_sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
|
||||
or '',
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
|
||||
for f in formats or []:
|
||||
f['preference'] = self._quality(f['format_id'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = (
|
||||
self._og_search_description(webpage)
|
||||
or get_element_by_id('long_desc_holder', webpage)
|
||||
or self._html_search_meta('description', webpage))
|
||||
|
||||
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
|
||||
|
||||
upload_date = get_element_by_class('mv_vid_upl_date', webpage)
|
||||
# as ka locale may not be present roll a local date conversion
|
||||
upload_date = (unified_strdate(
|
||||
# translate any ka month to an en one
|
||||
re.sub('|'.join(self._MONTH_NAMES_KA),
|
||||
lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
|
||||
upload_date, re.I))
|
||||
if upload_date else None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': upload_date,
|
||||
'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
|
||||
'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
|
||||
'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
|
||||
}
|
@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PeekVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?peekvids\.com/
|
||||
(?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
|
||||
(?P<id>[^/?&#]*)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
|
||||
'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
|
||||
'info_dict': {
|
||||
'id': '1262717',
|
||||
'display_id': 'BSyLMbN0YCd',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1642579329,
|
||||
'upload_date': '20220119',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.peekvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-video-block', html)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, expected_status=429)
|
||||
if '>Rate Limit Exceeded' in webpage:
|
||||
raise ExtractorError(
|
||||
'[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
display_id = video_id
|
||||
video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
|
||||
srcs = self._download_json(
|
||||
'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
|
||||
note='Downloading list of source files')
|
||||
formats = [{
|
||||
'url': f_url,
|
||||
'format_id': f_id,
|
||||
'height': int_or_none(f_id),
|
||||
} for f_url, f_id in (
|
||||
(url_or_none(f_v), f_match.group(1))
|
||||
for f_v, f_match in (
|
||||
(v, re.match(r'^data-src(\d{3,})$', k))
|
||||
for k, v in srcs.items() if v) if f_match)
|
||||
if f_url
|
||||
]
|
||||
if not formats:
|
||||
formats = [{'url': url} for url in srcs.values()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
|
||||
info.pop('url', None)
|
||||
# may not have found the thumbnail if it was in a list in the ld+json
|
||||
info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
|
||||
detail = self._get_detail(webpage) or ''
|
||||
info['description'] = self._html_search_regex(
|
||||
r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
|
||||
detail, 'description', default=None) or None
|
||||
info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
|
||||
|
||||
def cat_tags(name, html):
|
||||
l = self._html_search_regex(
|
||||
r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
|
||||
html, name, default='')
|
||||
return [x for x in re.split(r'\s+', l) if x]
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'categories': cat_tags('Categories', detail),
|
||||
'tags': cat_tags('Tags', detail),
|
||||
'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
|
||||
}, info)
|
||||
|
||||
|
||||
class PlayVidsIE(PeekVidsIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'md5': '2f12e50213dd65f142175da633c4564c',
|
||||
'info_dict': {
|
||||
'id': '1978030',
|
||||
'display_id': 'U3pBrYhsjXM',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1640435839,
|
||||
'upload_date': '20211225',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
|
||||
'md5': 'e783986e596cafbf46411a174ab42ba6',
|
||||
'info_dict': {
|
||||
'id': '762385',
|
||||
'display_id': 'bKmGLe3IwjZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
|
||||
'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
|
||||
'timestamp': 1516958544,
|
||||
'upload_date': '20180126',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 480,
|
||||
'uploader': 'Brazzers',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/v/47iUho33toY',
|
||||
'md5': 'b056b5049d34b648c1e86497cf4febce',
|
||||
'info_dict': {
|
||||
'id': '700621',
|
||||
'display_id': '47iUho33toY',
|
||||
'ext': 'mp4',
|
||||
'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
|
||||
'description': None,
|
||||
'timestamp': 1507052209,
|
||||
'upload_date': '20171003',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 332,
|
||||
'uploader': 'Cacerenele',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
|
||||
'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
|
||||
'info_dict': {
|
||||
'id': '1523518',
|
||||
'display_id': 'z3_7iwWCmqt',
|
||||
'ext': 'mp4',
|
||||
'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
|
||||
'description': None,
|
||||
'timestamp': 1607470323,
|
||||
'upload_date': '20201208',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 593,
|
||||
'uploader': 'yorours',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.playvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-block', html)
|
@ -0,0 +1,105 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
import re
|
||||
from ..utils import (
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
class Pr0grammStaticIE(InfoExtractor):
|
||||
# Possible urls:
|
||||
# https://pr0gramm.com/static/5466437
|
||||
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'md5': '52fa540d70d3edc286846f8ca85938aa',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5466437 by g11st',
|
||||
'uploader': 'g11st',
|
||||
'upload_date': '20221221',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Fetch media sources
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
media_info = entries[0]
|
||||
|
||||
# this raises if there are no formats
|
||||
self._sort_formats(media_info.get('formats') or [])
|
||||
|
||||
# Fetch author
|
||||
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
|
||||
|
||||
# Fetch approx upload timestamp from filename
|
||||
# Have None-defaults in case the extraction fails
|
||||
uploadDay = None
|
||||
uploadMon = None
|
||||
uploadYear = None
|
||||
uploadTimestr = None
|
||||
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
|
||||
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
|
||||
|
||||
if (m):
|
||||
# Up to a day of accuracy should suffice...
|
||||
uploadDay = m.groupdict().get('day')
|
||||
uploadMon = m.groupdict().get('mon')
|
||||
uploadYear = m.groupdict().get('year')
|
||||
uploadTimestr = uploadYear + uploadMon + uploadDay
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
|
||||
'uploader': uploader,
|
||||
'upload_date': uploadTimestr
|
||||
}, media_info)
|
||||
|
||||
|
||||
# This extractor is for the primary url (used for sharing, and appears in the
|
||||
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
|
||||
# video information here. So let's redirect to a compatibility version of
|
||||
# the site, which does contain the <video>-element by itself, without requiring
|
||||
# js to be ran.
|
||||
class Pr0grammIE(InfoExtractor):
|
||||
# Possible urls:
|
||||
# https://pr0gramm.com/new/546637
|
||||
# https://pr0gramm.com/new/video/546637
|
||||
# https://pr0gramm.com/top/546637
|
||||
# https://pr0gramm.com/top/video/546637
|
||||
# https://pr0gramm.com/user/g11st/uploads/5466437
|
||||
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
|
||||
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
|
||||
# https://pr0gramm.com/user/froschler/1elf/5232030
|
||||
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
|
||||
# https://pr0gramm.com/top/fruher war alles damals/5498175
|
||||
|
||||
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
|
||||
_TEST = {
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5466437 by g11st',
|
||||
'uploader': 'g11st',
|
||||
'upload_date': '20221221',
|
||||
}
|
||||
}
|
||||
|
||||
def _generic_title():
|
||||
return "oof"
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self.url_result(
|
||||
'https://pr0gramm.com/static/' + video_id,
|
||||
video_id=video_id,
|
||||
ie=Pr0grammStaticIE.ie_key())
|
@ -0,0 +1,218 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class ThisVidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
|
||||
'md5': '839becb572995687e11a69dc4358a386',
|
||||
'info_dict': {
|
||||
'id': '3533241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sitting on ball tight jeans',
|
||||
'description': 'md5:372353bb995883d1b65fddf507489acd',
|
||||
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
|
||||
'uploader_id': '150629',
|
||||
'uploader': 'jeanslevisjeans',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://thisvid.com/embed/3533241/',
|
||||
'md5': '839becb572995687e11a69dc4358a386',
|
||||
'info_dict': {
|
||||
'id': '3533241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sitting on ball tight jeans',
|
||||
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
|
||||
'uploader_id': '150629',
|
||||
'uploader': 'jeanslevisjeans',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, main_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
|
||||
webpage, 'title')
|
||||
|
||||
if type_ == 'embed':
|
||||
# look for more metadata
|
||||
video_alt_url = url_or_none(self._search_regex(
|
||||
r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
|
||||
webpage, 'video_alt_url', default=None))
|
||||
if video_alt_url and video_alt_url != url:
|
||||
webpage = self._download_webpage(
|
||||
video_alt_url, main_id,
|
||||
note='Redirecting embed to main page', fatal=False) or webpage
|
||||
|
||||
video_holder = get_element_by_class('video-holder', webpage) or ''
|
||||
if '>This video is a private video' in video_holder:
|
||||
self.raise_login_required(
|
||||
(clean_html(video_holder) or 'Private video').split('\n', 1)[0])
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
|
||||
webpage, 'uploader', default='')
|
||||
uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
|
||||
if len(uploader) == 2:
|
||||
# id must be non-empty, uploader could be ''
|
||||
uploader_id, uploader = uploader
|
||||
uploader = uploader or None
|
||||
else:
|
||||
uploader_id = uploader = None
|
||||
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
}, self.url_result(url, ie='Generic'))
|
||||
|
||||
|
||||
class ThisVidMemberIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/members/2140501/',
|
||||
'info_dict': {
|
||||
'id': '2140501',
|
||||
'title': 'Rafflesia\'s Profile',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/members/2140501/favourite_videos/',
|
||||
'info_dict': {
|
||||
'id': '2140501',
|
||||
'title': 'Rafflesia\'s Favourite Videos',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/members/636468/public_videos/',
|
||||
'info_dict': {
|
||||
'id': '636468',
|
||||
'title': 'Happymouth\'s Public Videos',
|
||||
},
|
||||
'playlist_mincount': 196,
|
||||
},
|
||||
]
|
||||
|
||||
def _urls(self, html):
|
||||
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
|
||||
yield m.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, pl_id)
|
||||
|
||||
title = re.split(
|
||||
r'(?i)\s*\|\s*ThisVid\.com\s*$',
|
||||
self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
|
||||
|
||||
def entries(page_url, html=None):
|
||||
for page in itertools.count(1):
|
||||
if not html:
|
||||
html = self._download_webpage(
|
||||
page_url, pl_id, note='Downloading page %d' % (page, ),
|
||||
fatal=False) or ''
|
||||
for u in self._urls(html):
|
||||
yield u
|
||||
next_page = get_element_by_class('pagination-next', html) or ''
|
||||
if next_page:
|
||||
# member list page
|
||||
next_page = urljoin(url, self._search_regex(
|
||||
r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
|
||||
next_page, 'next page link', group='url', default=None))
|
||||
# in case a member page should have pagination-next with empty link, not just `else:`
|
||||
if next_page is None:
|
||||
# playlist page
|
||||
parsed_url = compat_urlparse.urlparse(page_url)
|
||||
base_path, num = parsed_url.path.rsplit('/', 1)
|
||||
num = int_or_none(num)
|
||||
if num is None:
|
||||
base_path, num = parsed_url.path.rstrip('/'), 1
|
||||
parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
|
||||
next_page = compat_urlparse.urlunparse(parsed_url)
|
||||
if page_url == next_page:
|
||||
next_page = None
|
||||
if not next_page:
|
||||
break
|
||||
page_url, html = next_page, None
|
||||
|
||||
return self.playlist_from_matches(
|
||||
entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
|
||||
|
||||
|
||||
class ThisVidPlaylistIE(ThisVidMemberIE):
|
||||
_VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
|
||||
'info_dict': {
|
||||
'id': '6615',
|
||||
'title': 'Underwear Stuff',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
|
||||
'info_dict': {
|
||||
'id': '1072387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Italian Booty 28',
|
||||
'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
|
||||
'uploader_id': '367912',
|
||||
'uploader': 'Jcmusclefun',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_video_url(self, pl_url):
|
||||
video_id = re.match(self._VALID_URL, pl_url).group('video_id')
|
||||
return urljoin(pl_url, '/videos/%s/' % (video_id, ))
|
||||
|
||||
def _urls(self, html):
|
||||
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
|
||||
yield self._get_video_url(m.group('url'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just the featured video because of --no-playlist')
|
||||
return self.url_result(self._get_video_url(url), 'ThisVid')
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
|
||||
result = super(ThisVidPlaylistIE, self)._real_extract(url)
|
||||
|
||||
# rework title returned as `the title - the title`
|
||||
title = result['title']
|
||||
t_len = len(title)
|
||||
if t_len > 5 and t_len % 2 != 0:
|
||||
t_len = t_len // 2
|
||||
if title[t_len] == '-':
|
||||
title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
|
||||
if title[0] and title[0] == title[1]:
|
||||
result['title'] = title[0]
|
||||
return result
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue