@ -11,60 +11,100 @@ from ..utils import (
determine_ext ,
determine_ext ,
)
)
import re
import re
import random
class NitterIE ( InfoExtractor ) :
class NitterIE ( InfoExtractor ) :
# Taken from https://github.com/zedeus/nitter/wiki/Instances
# Taken from https://github.com/zedeus/nitter/wiki/Instances
INSTANCES = ( ' nitter.net ' ,
' nitter.snopyta.org ' ,
NON_HTTP_INSTANCES = (
' 3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion ' ,
' nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion ' ,
' nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion ' ,
' npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion ' ,
' nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion ' ,
' i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion ' ,
' 26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion ' ,
' nitter.i2p ' ,
' u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p ' ,
' nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion ' ,
)
HTTP_INSTANCES = (
' nitter.42l.fr ' ,
' nitter.42l.fr ' ,
' nitter.nixnet.services ' ,
' nitter.13ad.de ' ,
' nitter.pussthecat.org ' ,
' nitter.pussthecat.org ' ,
' nitter.nixnet.services ' ,
' nitter.mastodont.cat ' ,
' nitter.mastodont.cat ' ,
' nitter.dark.fail ' ,
' nitter.tedomum.net ' ,
' nitter.tedomum.net ' ,
' nitter.cattube.org ' ,
' nitter.fdn.fr ' ,
' nitter.fdn.fr ' ,
' nitter.1d4.us ' ,
' nitter.1d4.us ' ,
' nitter.kavin.rocks ' ,
' nitter.kavin.rocks ' ,
' tweet.lambda.dance ' ,
' tweet.lambda.dance ' ,
' nitter.cc ' ,
' nitter.cc ' ,
' nitter.vxempire.xyz ' ,
' nitter.unixfox.eu ' ,
' nitter.domain.glass ' ,
' nitter.himiko.cloud ' ,
' nitter.eu ' ,
' nitter.namazso.eu ' ,
' nitter.mailstation.de ' ,
' nitter.actionsack.com ' ,
' nitter.cattube.org ' ,
' nitter.dark.fail ' ,
' birdsite.xanny.family ' ,
' nitter.40two.app ' ,
' nitter.skrep.in ' ,
# not in the list anymore
' nitter.snopyta.org ' ,
)
DEAD_INSTANCES = (
# maintenance
' nitter.ethibox.fr ' ,
# official, rate limited
' nitter.net ' ,
# offline
' nitter.13ad.de ' ,
' nitter.weaponizedhumiliation.com ' ,
' nitter.weaponizedhumiliation.com ' ,
' 3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion ' ,
)
' nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion ' ,
' nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion ' )
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
_INSTANCES_RE = ' (?: ' + ' | ' . join ( [ re . escape ( instance ) for instance in INSTANCES ] ) + ' ) '
_INSTANCES_RE = ' (?: ' + ' | ' . join ( [ re . escape ( instance ) for instance in INSTANCES ] ) + ' ) '
_VALID_URL = r ' https?:// %(instance)s /(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)? ' % { ' instance ' : _INSTANCES_RE }
_VALID_URL = r ' https?:// %(instance)s /(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)? ' % { ' instance ' : _INSTANCES_RE }
current_instance = INSTANCES [ 0 ] # the test and official instance
current_instance = random . choice ( HTTP_INSTANCES )
_TESTS = [
_TESTS = [
{
{
# GIF (wrapped in mp4)
# GIF (wrapped in mp4)
' url ' : ' https:// ' + current_instance + ' /firefox/status/1314279897502629888#m ' ,
' url ' : ' https:// %s /firefox/status/1314279897502629888#m ' % current_instance ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1314279897502629888 ' ,
' id ' : ' 1314279897502629888 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet ' ,
' title ' : ' Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n \n Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n \n #UnfckTheInternet' ,
' description ' : ' You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet ' ,
' description ' : ' You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n \n Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n \n #UnfckTheInternet' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Firefox 🔥 ' ,
' uploader ' : ' Firefox 🔥 ' ,
' uploader_id ' : ' firefox ' ,
' uploader_id ' : ' firefox ' ,
' uploader_url ' : ' https:// ' + current_instance + ' /firefox ' ,
' uploader_url ' : ' https:// %s /firefox ' % current_instance ,
' upload_date ' : ' 20201008 ' ,
' upload_date ' : ' 20201008 ' ,
' timestamp ' : 1602183720 ,
' timestamp ' : 1602183720 ,
} ,
} ,
} , { # normal video
} , { # normal video
' url ' : ' https:// ' + current_instance + ' /Le___Doc/status/1299715685392756737#m ' ,
' url ' : ' https:// %s /Le___Doc/status/1299715685392756737#m ' % current_instance ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1299715685392756737 ' ,
' id ' : ' 1299715685392756737 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Le Doc - " Je ne prédis jamais rien " D Raoult, Août 2020...' ,
' title ' : ' Le Doc - " Je ne prédis jamais rien " \n D Raoult, Août 2020...' ,
' description ' : ' " Je ne prédis jamais rien " D Raoult, Août 2020...' ,
' description ' : ' " Je ne prédis jamais rien " \n D Raoult, Août 2020...' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Le Doc ' ,
' uploader ' : ' Le Doc ' ,
' uploader_id ' : ' Le___Doc ' ,
' uploader_id ' : ' Le___Doc ' ,
' uploader_url ' : ' https:// ' + current_instance + ' /Le___Doc ' ,
' uploader_url ' : ' https:// %s /Le___Doc ' % current_instance ,
' upload_date ' : ' 20200829 ' ,
' upload_date ' : ' 20200829 ' ,
' timestamp ' : 1598711341 ,
' timestamp ' : 1598711341 ,
' view_count ' : int ,
' view_count ' : int ,
@ -73,31 +113,51 @@ class NitterIE(InfoExtractor):
' comment_count ' : int ,
' comment_count ' : int ,
} ,
} ,
} , { # video embed in a "Streaming Political Ads" box
} , { # video embed in a "Streaming Political Ads" box
' url ' : ' https:// ' + current_instance + ' /mozilla/status/1321147074491092994#m ' ,
' url ' : ' https:// %s /mozilla/status/1321147074491092994#m ' % current_instance ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1321147074491092994 ' ,
' id ' : ' 1321147074491092994 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : " Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn' t a real political ad, but if you ' re watching streaming TV in the U.S., chances are you ' ve seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds" ,
' title ' : " Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? \n \n This isn' t a real political ad, but if you ' re watching streaming TV in the U.S., chances are you ' ve seen quite a few. \n \n Learn more ➡️ https://mzl.la/StreamingAds" ,
' description ' : " Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn' t a real political ad, but if you ' re watching streaming TV in the U.S., chances are you ' ve seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds" ,
' description ' : " Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? \n \n This isn' t a real political ad, but if you ' re watching streaming TV in the U.S., chances are you ' ve seen quite a few. \n \n Learn more ➡️ https://mzl.la/StreamingAds" ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Mozilla ' ,
' uploader ' : ' Mozilla ' ,
' uploader_id ' : ' mozilla ' ,
' uploader_id ' : ' mozilla ' ,
' uploader_url ' : ' https:// ' + current_instance + ' /mozilla ' ,
' uploader_url ' : ' https:// %s /mozilla ' % current_instance ,
' upload_date ' : ' 20201027 ' ,
' upload_date ' : ' 20201027 ' ,
' timestamp ' : 1603820982
' timestamp ' : 1603820982
} ,
} ,
} ,
} , { # not the first tweet but main-tweet
' url ' : ' https:// %s /TheNaturalNu/status/1379050895539724290#m ' % current_instance ,
' info_dict ' : {
' id ' : ' 1379050895539724290 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Dorothy Zbornak - This had me hollering!! ' ,
' description ' : ' This had me hollering!! ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Dorothy Zbornak ' ,
' uploader_id ' : ' TheNaturalNu ' ,
' uploader_url ' : ' https:// %s /TheNaturalNu ' % current_instance ,
' timestamp ' : 1617626329 ,
' upload_date ' : ' 20210405 '
}
}
]
]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
parsed_url = compat_urlparse . urlparse ( url )
parsed_url = compat_urlparse . urlparse ( url )
base_url = parsed_url . scheme + ' :// ' + parsed_url . netloc
base_url = ' %s :// %s ' % ( parsed_url . scheme , parsed_url . netloc )
self . _set_cookie ( parsed_url . netloc , ' hlsPlayback ' , ' on ' )
self . _set_cookie ( parsed_url . netloc , ' hlsPlayback ' , ' on ' )
webpage = self . _download_webpage ( url , video_id )
full_ webpage = self . _download_webpage ( url , video_id )
video_url = base_url + self . _html_search_regex ( r ' (?:<video[^>]+data-url|<source[^>]+src)= " ([^ " ]+) " ' , webpage , ' video url ' )
main_tweet_start = full_webpage . find ( ' class= " main-tweet " ' )
if main_tweet_start > 0 :
webpage = full_webpage [ main_tweet_start : ]
if not webpage :
webpage = full_webpage
video_url = ' %s %s ' % ( base_url , self . _html_search_regex ( r ' (?:<video[^>]+data-url|<source[^>]+src)= " ([^ " ]+) " ' , webpage , ' video url ' ) )
ext = determine_ext ( video_url )
ext = determine_ext ( video_url )
if ext == ' unknown_video ' :
if ext == ' unknown_video ' :
@ -108,32 +168,33 @@ class NitterIE(InfoExtractor):
' ext ' : ext
' ext ' : ext
} ]
} ]
title = (
title = self . _og_search_description ( full_webpage )
self . _og_search_description ( webpage ) . replace ( ' \n ' , ' ' )
if not title :
or self . _html_search_regex ( r ' <div class= " tweet-content[^>]+>([^<]+)</div> ' , webpage , ' title ' ) )
title = self . _html_search_regex ( r ' <div class= " tweet-content[^>]+>([^<]+)</div> ' , webpage , ' title ' )
description = title
description = title
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
uploader_id = (
uploader_id = (
mobj . group ( ' uploader_id ' )
mobj . group ( ' uploader_id ' )
or self . _html_search_regex ( r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False ) )
or self . _html_search_regex ( r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False )
)
if uploader_id :
if uploader_id :
uploader_url = base_url + ' / ' + uploader_id
uploader_url = ' %s / %s ' % ( base_url , uploader_id )
uploader = self . _html_search_regex ( r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False )
uploader = self . _html_search_regex ( r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False )
if uploader :
if uploader :
title = uploader + ' - ' + title
title = ' %s - %s ' % ( uploader , title )
view_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-play[^>]*></span> \ s([^<]+)</div> ' , webpage , ' view count ' , fatal = False ) )
view_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-play[^>]*></span> \ s([^<]+)</div> ' , webpage , ' view count ' , fatal = False ) )
like_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-heart[^>]*></span> \ s([^<]+)</div> ' , webpage , ' like count ' , fatal = False ) )
like_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-heart[^>]*></span> \ s([^<]+)</div> ' , webpage , ' like count ' , fatal = False ) )
repost_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-retweet[^>]*></span> \ s([^<]+)</div> ' , webpage , ' repost count ' , fatal = False ) )
repost_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-retweet[^>]*></span> \ s([^<]+)</div> ' , webpage , ' repost count ' , fatal = False ) )
comment_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-comment[^>]*></span> \ s([^<]+)</div> ' , webpage , ' repost count ' , fatal = False ) )
comment_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-comment[^>]*></span> \ s([^<]+)</div> ' , webpage , ' repost count ' , fatal = False ) )
thumbnail = self . _html_search_meta ( ' og:image ' , webpage, ' thumbnail url ' )
thumbnail = self . _html_search_meta ( ' og:image ' , full_ webpage, ' thumbnail url ' )
if not thumbnail :
if not thumbnail :
thumbnail = base_url + self . _html_search_regex ( r ' <video[^>]+poster= " ([^ " ]+) " ' , webpage , ' thumbnail url ' , fatal = False )
thumbnail = ' %s %s ' % ( base_url , self . _html_search_regex ( r ' <video[^>]+poster= " ([^ " ]+) " ' , webpage , ' thumbnail url ' , fatal = False ) )
thumbnail = remove_end ( thumbnail , ' % 3Asmall ' )
thumbnail = remove_end ( thumbnail , ' % 3Asmall ' )
thumbnails = [ ]
thumbnails = [ ]