@ -4,20 +4,28 @@ from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . compat import compat_chr
from . . compat import (
compat_chr ,
compat_zip as zip ,
)
from . . utils import (
clean_html ,
decode_packed_codes ,
determine_ext ,
ExtractorError ,
get_element_by_id ,
int_or_none ,
js_to_json ,
merge_dicts ,
T ,
traverse_obj ,
url_or_none ,
urlencode_postdata ,
)
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
def aa_decode ( aa_code ) :
symbol_table = [
symbol_table = (
( ' 7 ' , ' ((゚ー゚) + (o^_^o)) ' ) ,
( ' 6 ' , ' ((o^_^o) +(o^_^o)) ' ) ,
( ' 5 ' , ' ((゚ー゚) + (゚Θ゚)) ' ) ,
@ -26,84 +34,180 @@ def aa_decode(aa_code):
( ' 3 ' , ' (o^_^o) ' ) ,
( ' 1 ' , ' (゚Θ゚) ' ) ,
( ' 0 ' , ' (c^_^o) ' ) ,
]
( ' + ' , ' ' ) ,
)
delim = ' (゚Д゚)[゚ε゚]+ '
ret = ' '
for aa_char in aa_code . split ( delim ) :
def chr_from_code ( c ) :
for val , pat in symbol_table :
aa_char = aa_char . replace ( pat , val )
aa_char = aa_char . replace ( ' + ' , ' ' )
m = re . match ( r ' ^ \ d+ ' , aa_char )
if m :
ret + = compat_chr ( int ( m . group ( 0 ) , 8 ) )
c = c . replace ( pat , val )
if c . startswith ( ( ' u ' , ' U ' ) ) :
base = 16
c = c [ 1 : ]
else :
m = re . match ( r ' ^u([ \ da-f]+) ' , aa_char )
if m :
ret + = compat_chr ( int ( m . group ( 1 ) , 16 ) )
return ret
base = 10
c = int_or_none ( c , base = base )
return ' ' if c is None else compat_chr ( c )
return ' ' . join (
chr_from_code ( aa_char )
for aa_char in aa_code . split ( delim ) )
class XFileShareIE ( InfoExtractor ) :
_SITES = (
( r ' aparat \ .cam ' , ' Aparat ' ) ,
( r ' clipwatching \ .com ' , ' ClipWatching ' ) ,
( r ' gounlimited \ .to ' , ' GoUnlimited ' ) ,
( r ' govid \ .me ' , ' GoVid ' ) ,
( r ' holavid \ .com ' , ' HolaVid ' ) ,
( r ' streamty \ .com ' , ' Streamty ' ) ,
( r ' thevideobee \ .to ' , ' TheVideoBee ' ) ,
( r ' uqload \ .com ' , ' Uqload ' ) ,
( r ' vidbom \ .com ' , ' VidBom ' ) ,
( r ' vidlo \ .us ' , ' vidlo ' ) ,
( r ' vidlocker \ .xyz ' , ' VidLocker ' ) ,
( r ' vidshare \ .tv ' , ' VidShare ' ) ,
( r ' vup \ .to ' , ' VUp ' ) ,
# status check 2024-02: site availability, G site: search
( r ' aparat \ .cam ' , ' Aparat ' ) , # Cloudflare says host error 522, apparently changed to wolfstreeam.tv
( r ' filemoon \ .sx/. ' , ' FileMoon ' ) ,
( r ' gounlimited \ .to ' , ' GoUnlimited ' ) , # no media pages listed
( r ' govid \ .me ' , ' GoVid ' ) , # no media pages listed
( r ' highstream \ .tv ' , ' HighStream ' ) , # clipwatching.com redirects here
( r ' holavid \ .com ' , ' HolaVid ' ) , # Cloudflare says host error 522
# (r'streamty\.com', 'Streamty'), # no media pages listed, connection timeout
# (r'thevideobee\.to', 'TheVideoBee'), # no pages listed, refuses connection
( r ' uqload \ .to ' , ' Uqload ' ) , # .com, .co redirect here
( r ' (?:vedbam \ .xyz|vadbam.net) ' , ' V?dB?m ' ) , # vidbom.com redirects here, but no valid media pages listed
( r ' vidlo \ .us ' , ' vidlo ' ) , # no valid media pages listed
( r ' vidlocker \ .xyz ' , ' VidLocker ' ) , # no media pages listed
( r ' (?:w \ d \ .)?viidshar \ .com ' , ' VidShare ' ) , # vidshare.tv redirects here
# (r'vup\.to', 'VUp'), # domain not found
( r ' wolfstream \ .tv ' , ' WolfStream ' ) ,
( r ' xvideosharing \ .com ' , ' XVideoSharing ' ) ,
( r ' xvideosharing \ .com ' , ' XVideoSharing ' ) , # just started showing 'maintenance mode'
)
IE_DESC = ' XFileShare based sites: %s ' % ' , ' . join ( list ( zip ( * _SITES ) ) [ 1 ] )
IE_DESC = ' XFileShare - based sites: %s ' % ' , ' . join ( list ( zip ( * _SITES ) ) [ 1 ] )
_VALID_URL = ( r ' https?://(?:www \ .)?(?P<host> %s )/(?:embed-)?(?P<id>[0-9a-zA-Z]+) '
% ' | ' . join ( site for site in list ( zip ( * _SITES ) ) [ 0 ] ) )
_EMBED_REGEX = [ r ' <iframe \ b[^>]+ \ bsrc=([ " \' ])(?P<url>(?:https?:)?//(?: %s )/embed-[0-9a-zA-Z]+.*?) \ 1 ' % ' | ' . join ( site for site in list ( zip ( * _SITES ) ) [ 0 ] ) ]
_FILE_NOT_FOUND_REGEXES = (
r ' >(?:404 - )?File Not Found< ' ,
r ' >The file was removed by administrator< ' ,
)
_TITLE_REGEXES = (
r ' style= " z-index: [0-9]+; " >([^<]+)</span> ' ,
r ' <td nowrap>([^<]+)</td> ' ,
r ' h4-fine[^>]*>([^<]+)< ' ,
r ' >Watch (.+)[ <] ' ,
r ' <h2 class= " video-page-head " >([^<]+)</h2> ' ,
r ' <h2 style= " [^ " ]*color:#403f3d[^ " ]* " [^>]*>([^<]+)< ' , # streamin.to (dead)
r ' title \ s*: \ s* " ([^ " ]+) " ' , # govid.me
)
_SOURCE_URL_REGEXES = (
r ' (?:file|src) \ s*: \ s*([ " \' ])(?P<url>http(?:(?! \ 1).)+ \ .(?:m3u8|mp4|flv)(?:(?! \ 1).)*) \ 1 ' ,
r ' file_link \ s*= \ s*([ " \' ])(?P<url>http(?:(?! \ 1).)+) \ 1 ' ,
r ' addVariable \ (( \\ ?[ " \' ])file \ 1 \ s*, \ s*( \\ ?[ " \' ])(?P<url>http(?:(?! \ 2).)+) \ 2 \ ) ' ,
r ' <embed[^>]+src=([ " \' ])(?P<url>http(?:(?! \ 1).)+ \ .(?:m3u8|mp4|flv)(?:(?! \ 1).)*) \ 1 ' ,
)
_THUMBNAIL_REGEXES = (
r ' <video[^>]+poster= " ([^ " ]+) " ' ,
r ' (?:image|poster) \ s*: \ s*[ " \' ](http[^ " \' ]+)[ " \' ], ' ,
)
_TESTS = [ {
' url ' : ' http://xvideosharing.com/fq65f94nd2ve ' ,
' md5 ' : ' 4181f63957e8fe90ac836fa58dc3c8a6 ' ,
' note ' : ' link in `sources` ' ,
' url ' : ' https://uqload.to/dcsu06gdb45o ' ,
' md5 ' : ' 7f8db187b254379440bf4fcad094ae86 ' ,
' info_dict ' : {
' id ' : ' fq65f94nd2ve ' ,
' id ' : ' dcsu06gdb45o ' ,
' ext ' : ' mp4 ' ,
' title ' : ' sample ' ,
' thumbnail ' : r ' re:http://.* \ .jpg ' ,
' title ' : ' f2e31015957e74c8c8427982e161c3fc mp4 ' ,
' thumbnail ' : r ' re:https://.* \ .jpg '
} ,
' params ' : {
' nocheckcertificate ' : True ,
} ,
' expected_warnings ' : [ ' Unable to extract JWPlayer data ' ] ,
} , {
' note ' : ' link in decoded `sources` ' ,
' url ' : ' https://xvideosharing.com/1tlg6agrrdgc ' ,
' md5 ' : ' 2608ce41932c1657ae56258a64e647d9 ' ,
' info_dict ' : {
' id ' : ' 1tlg6agrrdgc ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 0121 ' ,
' thumbnail ' : r ' re:https?://.* \ .jpg ' ,
} ,
' skip ' : ' This server is in maintenance mode. ' ,
} , {
' note ' : ' JWPlayer link in un-p,a,c,k,e,d JS ' ,
' url ' : ' https://filemoon.sx/e/dw40rxrzruqz ' ,
' md5 ' : ' 5a713742f57ac4aef29b74733e8dda01 ' ,
' info_dict ' : {
' id ' : ' dw40rxrzruqz ' ,
' title ' : ' dw40rxrzruqz ' ,
' ext ' : ' mp4 '
} ,
} , {
' note ' : ' JWPlayer link in un-p,a,c,k,e,d JS ' ,
' url ' : ' https://vadbam.net/6lnbkci96wly.html ' ,
' md5 ' : ' a1616800076177e2ac769203957c54bc ' ,
' info_dict ' : {
' id ' : ' 6lnbkci96wly ' ,
' title ' : ' Heart Crime S01 E03 weciima autos ' ,
' ext ' : ' mp4 '
} ,
} , {
' note ' : ' JWPlayer link in clear ' ,
' url ' : ' https://w1.viidshar.com/nnibe0xf0h79.html ' ,
' md5 ' : ' f0a580ce9df06cc61b4a5c979d672367 ' ,
' info_dict ' : {
' id ' : ' nnibe0xf0h79 ' ,
' title ' : ' JaGa 68ar ' ,
' ext ' : ' mp4 '
} ,
' params ' : {
' skip_download ' : ' ffmpeg ' ,
} ,
' expected_warnings ' : [ ' hlsnative has detected features it does not support ' ] ,
} , {
' note ' : ' JWPlayer link in clear ' ,
' url ' : ' https://wolfstream.tv/a3drtehyrg52.html ' ,
' md5 ' : ' 1901d86a79c5e0c6a51bdc9a4cfd3769 ' ,
' info_dict ' : {
' id ' : ' a3drtehyrg52 ' ,
' title ' : ' NFL 2023 W04 DET@GB ' ,
' ext ' : ' mp4 '
} ,
} , {
' url ' : ' https://aparat.cam/n4d6dh0wvlpr ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://wolfstream.tv/nthme29v9u2x ' ,
' url ' : ' https://uqload.to/ug5somm0ctnk.html ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://highstream.tv/2owiyz3sjoux ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://vedbam.xyz/6lnbkci96wly.html ' ,
' only_matching ' : True ,
} ]
@staticmethod
def _extract_urls ( webpage ) :
return [
mobj . group ( ' url ' )
for mobj in re . finditer (
r ' <iframe \ b[^>]+ \ bsrc=([ " \' ])(?P<url>(?:https?:)?//(?: %s )/embed-[0-9a-zA-Z]+.*?) \ 1 '
% ' | ' . join ( site for site in list ( zip ( * XFileShareIE . _SITES ) ) [ 0 ] ) ,
webpage ) ]
@classmethod
def _extract_urls ( cls , webpage ) :
def yield_urls ( ) :
for regex in cls . _EMBED_REGEX :
for mobj in re . finditer ( regex , webpage ) :
yield mobj . group ( ' url ' )
return list ( yield_urls ( ) )
def _real_extract ( self , url ) :
host , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
host , video_id = self . _match_valid_url ( url ) . group ( ' host ' , ' id ' )
url = ' https:// %s / ' % host + ( ' embed- %s .html ' % video_id if host in ( ' govid.me ' , ' vidlo.us ' ) else video_id )
url = ' https:// %s / %s ' % (
host ,
' embed- %s .html ' % video_id if host in ( ' govid.me ' , ' vidlo.us ' ) else video_id )
webpage = self . _download_webpage ( url , video_id )
if any ( re . search ( p , webpage ) for p in self . _FILE_NOT_FOUND_REGEXES ) :
container_div = get_element_by_id ( ' container ' , webpage ) or webpage
if self . _search_regex (
r ' >This server is in maintenance mode \ . ' , container_div ,
' maint error ' , group = 0 , default = None ) :
raise ExtractorError ( clean_html ( container_div ) , expected = True )
if self . _search_regex (
self . _FILE_NOT_FOUND_REGEXES , container_div ,
' missing video error ' , group = 0 , default = None ) :
raise ExtractorError ( ' Video %s does not exist ' % video_id , expected = True )
fields = self . _hidden_inputs ( webpage )
@ -122,59 +226,43 @@ class XFileShareIE(InfoExtractor):
' Content-type ' : ' application/x-www-form-urlencoded ' ,
} )
title = ( self . _search_regex (
( r ' style= " z-index: [0-9]+; " >([^<]+)</span> ' ,
r ' <td nowrap>([^<]+)</td> ' ,
r ' h4-fine[^>]*>([^<]+)< ' ,
r ' >Watch (.+)[ <] ' ,
r ' <h2 class= " video-page-head " >([^<]+)</h2> ' ,
r ' <h2 style= " [^ " ]*color:#403f3d[^ " ]* " [^>]*>([^<]+)< ' , # streamin.to
r ' title \ s*: \ s* " ([^ " ]+) " ' ) , # govid.me
webpage , ' title ' , default = None ) or self . _og_search_title (
webpage , default = None ) or video_id ) . strip ( )
for regex , func in (
( r ' (eval \ (function \ (p,a,c,k,e,d \ ) { .+) ' , decode_packed_codes ) ,
( r ' (゚.+) ' , aa_decode ) ) :
obf_code = self . _search_regex ( regex , webpage , ' obfuscated code ' , default = None )
if obf_code :
webpage = webpage . replace ( obf_code , func ( obf_code ) )
formats = [ ]
jwplayer_data = self . _search_regex (
[
r ' jwplayer \ ( " [^ " ]+ " \ ) \ .load \ ( \ [( { .+?}) \ ] \ ); ' ,
r ' jwplayer \ ( " [^ " ]+ " \ ) \ .setup \ (( { .+?}) \ ); ' ,
] , webpage ,
' jwplayer data ' , default = None )
if jwplayer_data :
jwplayer_data = self . _parse_json (
jwplayer_data . replace ( r " \ ' " , " ' " ) , video_id , js_to_json )
title = (
self . _search_regex ( self . _TITLE_REGEXES , webpage , ' title ' , default = None )
or self . _og_search_title ( webpage , default = None )
or video_id ) . strip ( )
obf_code = True
while obf_code :
for regex , func in (
( r ' (?s)(?<!-) \ b(eval \ (function \ (p,a,c,k,e,d \ ) \ { (?:(?!</script>).)+ \ ) \ )) ' ,
decode_packed_codes ) ,
( r ' (゚.+) ' , aa_decode ) ) :
obf_code = self . _search_regex ( regex , webpage , ' obfuscated code ' , default = None )
if obf_code :
webpage = webpage . replace ( obf_code , func ( obf_code ) )
break
jwplayer_data = self . _find_jwplayer_data (
webpage . replace ( r ' \' ' , ' \' ' ) , video_id )
result = self . _parse_jwplayer_data (
jwplayer_data , video_id , require_title = False ,
m3u8_id = ' hls ' , mpd_id = ' dash ' )
if not traverse_obj ( result , ' formats ' ) :
if jwplayer_data :
formats = self . _parse_jwplayer_data (
jwplayer_data , video_id , False ,
m3u8_id = ' hls ' , mpd_id = ' dash ' ) [ ' formats ' ]
if not formats :
urls = [ ]
for regex in (
r ' (?:file|src) \ s*: \ s*([ " \' ])(?P<url>http(?:(?! \ 1).)+ \ .(?:m3u8|mp4|flv)(?:(?! \ 1).)*) \ 1 ' ,
r ' file_link \ s*= \ s*([ " \' ])(?P<url>http(?:(?! \ 1).)+) \ 1 ' ,
r ' addVariable \ (( \\ ?[ " \' ])file \ 1 \ s*, \ s*( \\ ?[ " \' ])(?P<url>http(?:(?! \ 2).)+) \ 2 \ ) ' ,
r ' <embed[^>]+src=([ " \' ])(?P<url>http(?:(?! \ 1).)+ \ .(?:m3u8|mp4|flv)(?:(?! \ 1).)*) \ 1 ' ) :
self . report_warning (
' Failed to extract JWPlayer formats ' , video_id = video_id )
urls = set ( )
for regex in self . _SOURCE_URL_REGEXES :
for mobj in re . finditer ( regex , webpage ) :
video_url = mobj . group ( ' url ' )
if video_url not in urls :
urls . append ( video_url )
urls . add ( mobj . group ( ' url ' ) )
sources = self . _search_regex (
r ' sources \ s*: \ s*( \ [(?! { )[^ \ ]]+ \ ]) ' , webpage , ' sources ' , default = None )
if sources :
urls . extend ( self . _parse_json ( sources , video_id ) )
urls . update ( traverse_obj ( sources , ( T ( lambda s : self . _parse_json ( s , video_id ) ) , Ellipsis ) ) )
formats = [ ]
for video_url in urls:
for video_url in traverse_obj( urls, ( Ellipsis , T ( url_or_none ) ) ) :
if determine_ext ( video_url ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
video_url , video_id , ' mp4 ' ,
@ -185,17 +273,19 @@ class XFileShareIE(InfoExtractor):
' url ' : video_url ,
' format_id ' : ' sd ' ,
} )
self . _sort_formats ( formats )
result = { ' formats ' : formats }
self . _sort_formats ( result [ ' formats ' ] )
thumbnail = self . _search_regex (
[
r ' <video[^>]+poster= " ([^ " ]+) " ' ,
r ' (?:image|poster) \ s*: \ s*[ " \' ](http[^ " \' ]+)[ " \' ], ' ,
] , webpage , ' thumbnail ' , default = None )
self . _THUMBNAIL_REGEXES , webpage , ' thumbnail ' , default = None )
if not ( title or result . get ( ' title ' ) ) :
title = self . _generic_title ( url ) or video_id
return {
return merge_dicts ( result , {
' id ' : video_id ,
' title ' : title ,
' title ' : title or None ,
' thumbnail ' : thumbnail ,
' formats' : formats ,
}
' http_headers' : { ' Referer ' : url }
} )