From 71aff18809a70b7fa32d8fd07f4fb2f64641aea5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 21:30:06 +0800 Subject: [PATCH] [socks] Support SOCKS proxies --- youtube_dl/socks.py | 198 ++++++++++++-------------------------------- youtube_dl/utils.py | 63 +++++++++++++- 2 files changed, 116 insertions(+), 145 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index b0c36a189..95795b5a9 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -1,77 +1,30 @@ -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# For more information, please refer to -# -# Example: -# import socks -# import ftplib -# import socket -# -# socks.patch_socket() -# -# f = ftplib.FTP('ftp.kernel.org') -# f.login() -# print f.retrlines('LIST') -# f.quit() -# -# s = socket.create_connection(('www.google.com', 80)) -# s.sendall('HEAD / HTTP/1.0\r\n\r\n') -# print s.recv(1024) -# s.close() +# Public Domain SOCKS proxy protocol implementation +# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 + from __future__ import unicode_literals -import os -import struct -import socket -import time -__author__ = 'Timo Schmid ' +import collections +import socket -_orig_socket = socket.socket +from .compat import ( + struct_pack, + struct_unpack, +) -try: - from collections import namedtuple -except ImportError: - from Collections import namedtuple +__author__ = 'Timo Schmid ' -try: - from urllib.parse import urlparse -except: - from urlparse import urlparse -try: - from enum import Enum -except ImportError: - Enum = object +class ProxyError(IOError): + pass -class ProxyError(IOError): pass class Socks4Error(ProxyError): CODES = { 0x5B: 'request rejected or failed', 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', 0x5D: 'request rejected because the client program and identd report different user-ids' } + def __init__(self, code=None, msg=None): if code is not None and msg is None: msg = self.CODES.get(code) @@ -79,6 +32,7 @@ class Socks4Error(ProxyError): msg = 'unknown error' super(Socks4Error, self).__init__(code, msg) + class Socks5Error(Socks4Error): CODES = { 0x01: 'general SOCKS server failure', @@ -93,68 +47,19 @@ class Socks5Error(Socks4Error): 0xFF: 'all offered authentication methods were rejected' } -class ProxyType(Enum): - SOCKS4 = 0 + +class ProxyType(object): + SOCKS4 = 0 SOCKS4A = 1 - SOCKS5 = 2 - -Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) - -_default_proxy = None - -def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): - global _default_proxy - if allow_env_override: - all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) - if all_proxy: - all_proxy = urlparse(all_proxy) - if all_proxy.scheme.startswith('socks'): - if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': - proxytype = ProxyType.SOCKS4 - elif all_proxy.scheme == 'socks4a': - proxytype = ProxyType.SOCKS4A - elif all_proxy.scheme == 'socks5': - proxytype = ProxyType.SOCKS5 - addr = all_proxy.hostname - port = all_proxy.port - username = all_proxy.username - password = all_proxy.password - - if proxytype is not None: - _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) - - -def wrap_socket(sock): - return socksocket(_sock=sock._sock) - -def wrap_module(module): - if hasattr(module, 'socket'): - sock = module.socket - if isinstance(sock, socket.socket): - module.socket = sockssocket - elif hasattr(socket, 'socket'): - socket.socket = sockssocket - -def patch_socket(): - import sys - if 'socket' not in sys.modules: - import socket - sys.modules['socket'].socket = sockssocket + SOCKS5 = 2 +Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) -class sockssocket(socket.socket): - def __init__(self, *args, **kwargs): - self.__proxy = None - if 'proxy' in kwargs: - self.__proxy = kwargs['proxy'] - del kwargs['proxy'] - super(sockssocket, self).__init__(*args, **kwargs) +class sockssocket(socket.socket): @property def _proxy(self): - if self.__proxy: - return self.__proxy - return _default_proxy + return self.__proxy @property def _proxy_port(self): @@ -175,7 +80,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError("{0} bytes missing".format(cnt-len(data))) + raise IOError('{0} bytes missing'.format(cnt - len(data))) data += cur return data @@ -186,39 +91,42 @@ class sockssocket(socket.socket): ipaddr = socket.inet_aton(destaddr) except socket.error: if is_4a and self._proxy.remote_dns: - ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) else: ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr if self._proxy.username: username = self._proxy.username if hasattr(username, 'encode'): username = username.encode() - packet += struct.pack('!{0}s'.format(len(username)+1), username) + packet += struct_pack('!{0}s'.format(len(username) + 1), username) else: packet += b'\x00' if is_4a and self._proxy.remote_dns: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) self.sendall(packet) packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) # check valid response if nbyte != 0x00: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) # access granted if resp_code != 0x5a: self.close() raise Socks4Error(resp_code) + return (dsthost, dstport) + def _setup_socks5(self, address): destaddr, port = address @@ -234,19 +142,20 @@ class sockssocket(socket.socket): if self._proxy.username and self._proxy.password: # two auth methods available auth_methods = 2 - packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth if self._proxy.username and self._proxy.password: - packet += struct.pack('!B', 0x02) # user/pass auth + packet += struct_pack('!B', 0x02) # user/pass auth self.sendall(packet) packet = self.recvall(2) - version, method = struct.unpack('!BB', packet) + version, method = struct_unpack('!BB', packet) # check valid response if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) # no auth methods if method == 0xFF: @@ -261,41 +170,42 @@ class sockssocket(socket.socket): password = self._proxy.password if hasattr(password, 'encode'): password = password.encode() - packet = struct.pack('!BB', 1, len(username)) + username - packet += struct.pack('!B', len(password)) + password + packet = struct_pack('!BB', 1, len(username)) + username + packet += struct_pack('!B', len(password)) + password self.sendall(packet) packet = self.recvall(2) - version, status = struct.unpack('!BB', packet) + version, status = struct_unpack('!BB', packet) if version != 0x01: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) if status != 0x00: self.close() raise Socks5Error(1) - elif method == 0x00: # no auth + elif method == 0x00: # no auth pass - - packet = struct.pack('!BBB', 5, 1, 0) + packet = struct_pack('!BBB', 5, 1, 0) if ipaddr is None: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + packet += struct_pack('!BB', 3, len(destaddr)) + destaddr else: - packet += struct.pack('!B', 1) + ipaddr - packet += struct.pack('!H', port) + packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!H', port) self.sendall(packet) packet = self.recvall(4) - version, status, _, atype = struct.unpack('!BBBB', packet) + version, status, _, atype = struct_unpack('!BBBB', packet) if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) if status != 0x00: self.close() @@ -304,11 +214,13 @@ class sockssocket(socket.socket): if atype == 0x01: destaddr = self.recvall(4) elif atype == 0x03: - alen = struct.unpack('!B', self.recv(1))[0] + alen = struct_unpack('!B', self.recv(1))[0] destaddr = self.recvall(alen) elif atype == 0x04: destaddr = self.recvall(16) - destport = struct.unpack('!H', self.recvall(2))[0] + destport = struct_unpack('!H', self.recvall(2))[0] + + return (destaddr, destport) def _make_proxy(self, connect_func, address): if self._proxy.type == ProxyType.SOCKS4: @@ -330,7 +242,7 @@ class sockssocket(socket.socket): return connect_func(self, address) def connect(self, address): - self._make_proxy(_orig_socket.connect, address) + self._make_proxy(socket.socket.connect, address) def connect_ex(self, address): - return self._make_proxy(_orig_socket.connect_ex, address) + return self._make_proxy(socket.socket.connect_ex, address) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fa16a42ad..b2e4a2dfb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -55,6 +55,11 @@ from .compat import ( struct_pack, ) +from .socks import ( + ProxyType, + sockssocket, +) + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -752,8 +757,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): self._params = params def http_open(self, req): + conn_class = compat_http_client.HTTPConnection + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, compat_http_client.HTTPConnection, False), + _create_http_connection, self, conn_class, False), req) @staticmethod @@ -849,6 +861,41 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response +def make_socks_conn_class(base_class, socks_proxy): + assert issubclass(base_class, ( + compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) + + url_components = compat_urlparse.urlparse(socks_proxy) + if url_components.scheme.lower() == 'socks5': + socks_type = ProxyType.SOCKS5 + elif url_components.scheme.lower() in ('socks', 'socks4'): + socks_type = ProxyType.SOCKS4 + + proxy_args = ( + socks_type, + url_components.hostname, url_components.port or 1080, + True, # Remote DNS + url_components.username, url_components.password + ) + + class SocksConnection(base_class): + def connect(self): + self.sock = sockssocket() + self.sock.setproxy(*proxy_args) + if type(self.timeout) in (int, float): + self.sock.settimeout(self.timeout) + self.sock.connect((self.host, self.port)) + + if isinstance(self, compat_http_client.HTTPSConnection): + if hasattr(self, '_context'): # Python > 2.6 + self.sock = self._context.wrap_socket( + self.sock, server_hostname=self.host) + else: + self.sock = ssl.wrap_socket(self.sock) + + return SocksConnection + + class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def __init__(self, params, https_conn_class=None, *args, **kwargs): compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) @@ -857,12 +904,20 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def https_open(self, req): kwargs = {} + conn_class = self._https_conn_class + if hasattr(self, '_context'): # python > 2.6 kwargs['context'] = self._context if hasattr(self, '_check_hostname'): # python 3.x kwargs['check_hostname'] = self._check_hostname + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, self._https_conn_class, True), + _create_http_connection, self, conn_class, True), req, **kwargs) @@ -2683,6 +2738,10 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + req.add_header('Ytdl-socks-proxy', proxy) + # youtube-dl's http/https handlers do wrapping the socket with socks + return None return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type)