@ -6,8 +6,15 @@ import re
from . common import InfoExtractor
from . . utils import (
clean_html ,
ExtractorError ,
GeoRestrictedError ,
get_element_by_class ,
get_element_by_id ,
orderedSet ,
strip_or_none ,
unified_strdate ,
unified_timestamp ,
urlencode_postdata ,
)
@ -15,16 +22,17 @@ from ..utils import (
class BitChuteIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?bitchute \ .com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+) '
_TESTS = [ {
' url ' : ' https://www.bitchute.com/video/ szoMrox2JEI /' ,
' md5 ' : ' 66c4a70e6bfc40dcb6be3eb1d74939e b' ,
' url ' : ' https://www.bitchute.com/video/ UGlrF9o9b-Q /' ,
' md5 ' : ' 7e427d7ed7af5a75b5855705ec750e2 b' ,
' info_dict ' : {
' id ' : ' szoMrox2JEI ' ,
' id ' : ' UGlrF9o9b-Q ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Fuck bitches get money ' ,
' description ' : ' md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a ' ,
' title ' : ' This is the first video on #BitChute ! ' ,
' timestamp ' : 1483425420 ,
' upload_date ' : ' 20170103 ' ,
' description ' : ' md5:a0337e7b1fe39e32336974af8173a034 ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Victoria X Rave ' ,
' upload_date ' : ' 20170813 ' ,
' uploader ' : ' BitChute ' ,
} ,
} , {
' url ' : ' https://www.bitchute.com/embed/lbb5G1hjPhw/ ' ,
@ -34,6 +42,13 @@ class BitChuteIE(InfoExtractor):
' only_matching ' : True ,
} ]
@staticmethod
def _extract_urls ( webpage ) :
urls = re . finditer (
r ''' <(?:script|iframe) \ b[^>]+ \ bsrc \ s*= \ s*([ " ' ])(?P<url> %s ) ''' % ( BitChuteIE . _VALID_URL , ) ,
webpage )
return ( mobj . group ( ' url ' ) for mobj in urls )
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
@ -42,43 +57,55 @@ class BitChuteIE(InfoExtractor):
' User-Agent ' : ' Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36 ' ,
} )
title = self . _html_search_regex (
( r ' <[^>]+ \ bid=[ " \' ]video-title[^>]+>([^<]+) ' , r ' <title>([^<]+) ' ) ,
webpage , ' title ' , default = None ) or self . _html_search_meta (
' description ' , webpage , ' title ' ,
default = None ) or self . _og_search_description ( webpage )
title = (
self . _og_search_title ( webpage , default = None )
or strip_or_none ( clean_html ( get_element_by_id ( ' video-title ' , webpage ) ) )
or self . _html_search_regex ( r ' (?s)<title \ b[^>]*>.*?</title ' , webpage , ' title ' ) )
format_urls = [ ]
for mobj in re . finditer (
r ' addWebSeed \ s* \ ( \ s*([ " \' ])(?P<url>(?:(?! \ 1).)+) \ 1 ' , webpage ) :
format_urls . append ( mobj . group ( ' url ' ) )
format_urls . extend ( re . findall ( r ' as=(https?://[^& " \' ]+) ', webpage ) )
format_urls = [
mobj . group ( ' url ' )
for mobj in re . finditer (
r ''' \ baddWebSeed \ s* \ ( \ s*([ " ' ])(?P<url>(?:(?! \ 1).)+) \ 1 ''' , webpage ) ]
format_urls . extend ( re . findall ( r ' ''as=(https?://[^& " ' ]+) '' ', webpage ) )
formats = [
{ ' url ' : format_url }
for format_url in orderedSet ( format_urls ) ]
if not formats :
formats = self . _parse_html5_media_entries (
url , webpage , video_id ) [ 0 ] [ ' formats ' ]
entries = self . _parse_html5_media_entries (
url , webpage , video_id )
if not entries :
error = strip_or_none ( clean_html ( self . get_element_by_id ( ' video-title ' ) ) ) or ' Cannot find video '
if error == ' Video Unavailable ' :
raise GeoRestrictedError ( error )
raise ExtractorError ( error )
formats = entries [ 0 ] [ ' formats ' ]
self . _check_formats ( formats , video_id )
self . _sort_formats ( formats )
description = self . _html_search_regex (
r ' (?s)<div \ b[^>]+ \ bclass=[ " \' ]full hidden[^>]+>(.+?)</div> ' ,
webpage , ' description ' , fatal = False )
thumbnail = self . _og_search_thumbnail (
webpage , default = None ) or self . _html_search_meta (
' twitter:image:src ' , webpage , ' thumbnail ' )
description = (
self . _og_search_description ( webpage )
or clean_html ( get_element_by_id ( ' video-description ' , webpage ) )
or self . _html_search_regex (
r ' (?s)<div \ b[^>]+ \ bclass=[ " \' ]full hidden[^>]+>(.+?)</div> ' ,
webpage , ' description ' , fatal = False ) )
thumbnail = self . _html_search_meta (
( ' og:image ' , ' twitter:image:src ' ) , webpage , ' thumbnail ' , fatal = False )
uploader = self . _html_search_regex (
( r ' (?s)<div class=[ " \' ]channel-banner.*?<p \ b[^>]+ \ bclass=[ " \' ]name[^>]+>(.+?)</p> ' ,
r ' (?s)<p \ b[^>]+ \ bclass =[" \' ]video-author[^>]+>(.+?)</p> ') ,
( r ' ''(?s)<div \ b[^>]+? \ bclass \ s*= \ s*[ " ' ]channel-banner.*?<p \ b[^>]+ \ bclass \ s*= \ s*[ " ' ]name \ b[^>]+>(.+?)</p> '' ',
r ' '' (?s)<p \ b[^>]+ \ bclass \ s*= \ s*[ " ' ]video-author \ b[^>]+>(.+?)</p> '' ') ,
webpage , ' uploader ' , fatal = False )
upload_date = unified_strdate ( self . _search_regex (
r ' class=[ " \' ]video-publish-date[^>]+>[^<]+ at \ d+: \ d+ UTC on (.+?) \ . ' ,
webpage , ' upload date ' , fatal = False ) )
def more_unified_timestamp ( x ) :
# ... at hh:mm TZ on month nth.
y = re . split ( r ' \ s+at \ s+ ' , x or ' ' ) [ - 1 ]
y = re . sub ( r ' (?:^ \ s+| \ s+$| \ .+$|(?<= \ d)(?:st|nd|rd|th)) ' , ' ' , y )
y = ' ' . join ( reversed ( re . split ( r ' \ s+on \ s+ ' , y , 1 ) ) )
return unified_timestamp ( y ) or unified_timestamp ( x )
timestamp = more_unified_timestamp ( get_element_by_class ( ' video-publish-date ' , webpage ) )
return {
' id ' : video_id ,
@ -86,7 +113,7 @@ class BitChuteIE(InfoExtractor):
' description ' : description ,
' thumbnail ' : thumbnail ,
' uploader ' : uploader ,
' upload_da te' : upload_da te,
' tim estamp ' : tim estamp ,
' formats ' : formats ,
}
@ -94,22 +121,22 @@ class BitChuteIE(InfoExtractor):
class BitChuteChannelIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?bitchute \ .com/channel/(?P<id>[^/?#&]+) '
_TEST = {
' url ' : ' https://www.bitchute.com/channel/ victoriaxrave /' ,
' playlist_mincount ' : 1 8 5,
' url ' : ' https://www.bitchute.com/channel/ livesonnet /' ,
' playlist_mincount ' : 1 3 5,
' info_dict ' : {
' id ' : ' victoriaxrave ' ,
' id ' : ' livesonnet ' ,
} ,
}
_TOKEN = ' zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7 '
def _entries ( self , channel_id ) :
channel_url = ' https://www.bitchute.com/channel/ %s / ' % channel_id
channel_url = ' https://www.bitchute.com/channel/ %s ' % ( channel_id , )
offset = 0
for page_num in itertools . count ( 1 ) :
data = self . _download_json (
' %s extend/' % channel_url , channel_id ,
' Downloading channel page %d ' % page_num ,
channel_url + ' / extend/' , channel_id ,
' Downloading channel page %d ' % ( page_num , ) ,
data = urlencode_postdata ( {
' csrfmiddlewaretoken ' : self . _TOKEN ,
' name ' : ' ' ,
@ -118,7 +145,7 @@ class BitChuteChannelIE(InfoExtractor):
' Content-Type ' : ' application/x-www-form-urlencoded; charset=UTF-8 ' ,
' Referer ' : channel_url ,
' X-Requested-With ' : ' XMLHttpRequest ' ,
' Cookie ' : ' csrftoken= %s ' % self . _TOKEN ,
' Cookie ' : ' csrftoken= ' + self . _TOKEN ,
} )
if data . get ( ' success ' ) is False :
break
@ -126,14 +153,14 @@ class BitChuteChannelIE(InfoExtractor):
if not html :
break
video_ids = re . findall (
r ' class=[ " \ ']channel-videos-image-container[^>]+> \ s*<a \ b[^>]+ \ bhref =[" \' ]/video/([^ " \' /]+) ',
r ' ''class \ s*= \ s*[ " ']channel-videos-image-container[^>]+> \ s*<a \ b[^>]+ \ bhref \ s*= \ s*[ " ' ]/video/([^ " ' /]+) '' ',
html )
if not video_ids :
break
offset + = len ( video_ids )
for video_id in video_ids :
yield self . url_result (
' https://www.bitchute.com/video/ %s ' % video_id ,
' https://www.bitchute.com/video/ ' + video_id ,
ie = BitChuteIE . ie_key ( ) , video_id = video_id )
def _real_extract ( self , url ) :