yt-dlp/youtube_dl/utils.py

#!/usr/bin/env python
# coding: utf-8

from __future__ import unicode_literals

import base64
import binascii
import calendar
import codecs
import contextlib
import ctypes
import datetime
import email.utils
import email.header
import errno
import functools
import gzip
import io
import itertools
import json
import locale
import math
import operator
import os
import platform
import random
import re
import socket
import ssl
import subprocess
import sys
import tempfile
import traceback
import xml.etree.ElementTree
import zlib

from .compat import (
    compat_HTMLParseError,
    compat_HTMLParser,
    compat_basestring,
    compat_chr,
    compat_cookiejar,
    compat_ctypes_WINFUNCTYPE,
    compat_etree_fromstring,
    compat_expanduser,
    compat_html_entities,
    compat_html_entities_html5,
    compat_http_client,
    compat_kwargs,
    compat_os_name,
    compat_parse_qs,
    compat_shlex_quote,
    compat_str,
    compat_struct_pack,
    compat_struct_unpack,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_parse_urlencode,
    compat_urllib_parse_urlparse,
    compat_urllib_parse_unquote_plus,
    compat_urllib_request,
    compat_urlparse,
    compat_xpath,
)

from .socks import (
    ProxyType,
    sockssocket,
)


def register_socks_protocols():
    # "Register" SOCKS protocols
    # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
    # URLs with protocols not in urlparse.uses_netloc are not handled correctly
    for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
        if scheme not in compat_urlparse.uses_netloc:
            compat_urlparse.uses_netloc.append(scheme)


# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))

std_headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'en-us,en;q=0.5',
}


USER_AGENTS = {
    'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}


NO_DEFAULT = object()

ENGLISH_MONTH_NAMES = [
    'January', 'February', 'March', 'April', 'May', 'June',
    'July', 'August', 'September', 'October', 'November', 'December']

MONTH_NAMES = {
    'en': ENGLISH_MONTH_NAMES,
    'fr': [
        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
}

KNOWN_EXTENSIONS = (
    'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
    'flv', 'f4v', 'f4a', 'f4b',
    'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
    'mkv', 'mka', 'mk3d',
    'avi', 'divx',
    'mov',
    'asf', 'wmv', 'wma',
    '3gp', '3g2',
    'mp3',
    'flac',
    'ape',
    'wav',
    'f4f', 'f4m', 'm3u8', 'smil')

# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))

DATE_FORMATS = (
    '%d %B %Y',
    '%d %b %Y',
    '%B %d %Y',
    '%B %dst %Y',
    '%B %dnd %Y',
    '%B %dth %Y',
    '%b %d %Y',
    '%b %dst %Y',
    '%b %dnd %Y',
    '%b %dth %Y',
    '%b %dst %Y %I:%M',
    '%b %dnd %Y %I:%M',
    '%b %dth %Y %I:%M',
    '%Y %m %d',
    '%Y-%m-%d',
    '%Y/%m/%d',
    '%Y/%m/%d %H:%M',
    '%Y/%m/%d %H:%M:%S',
    '%Y-%m-%d %H:%M',
    '%Y-%m-%d %H:%M:%S',
    '%Y-%m-%d %H:%M:%S.%f',
    '%d.%m.%Y %H:%M',
    '%d.%m.%Y %H.%M',
    '%Y-%m-%dT%H:%M:%SZ',
    '%Y-%m-%dT%H:%M:%S.%fZ',
    '%Y-%m-%dT%H:%M:%S.%f0Z',
    '%Y-%m-%dT%H:%M:%S',
    '%Y-%m-%dT%H:%M:%S.%f',
    '%Y-%m-%dT%H:%M',
    '%b %d %Y at %H:%M',
    '%b %d %Y at %H:%M:%S',
    '%B %d %Y at %H:%M',
    '%B %d %Y at %H:%M:%S',
)

DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
    '%d-%m-%Y',
    '%d.%m.%Y',
    '%d.%m.%y',
    '%d/%m/%Y',
    '%d/%m/%y',
    '%d/%m/%Y %H:%M:%S',
])

DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
    '%m-%d-%Y',
    '%m.%d.%Y',
    '%m/%d/%Y',
    '%m/%d/%y',
    '%m/%d/%Y %H:%M:%S',
])

PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'


def preferredencoding():
    """Get preferred encoding.

    Returns the best encoding scheme for the system, based on
    locale.getpreferredencoding() and some further tweaks.
    """
    try:
        pref = locale.getpreferredencoding()
        'TEST'.encode(pref)
    except Exception:
        pref = 'UTF-8'

    return pref


def write_json_file(obj, fn):
    """ Encode obj as JSON and write it to fn, atomically if possible """

    fn = encodeFilename(fn)
    if sys.version_info < (3, 0) and sys.platform != 'win32':
        encoding = get_filesystem_encoding()
        # os.path.basename returns a bytes object, but NamedTemporaryFile
        # will fail if the filename contains non ascii characters unless we
        # use a unicode object
        path_basename = lambda f: os.path.basename(fn).decode(encoding)
        # the same for os.path.dirname
        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
    else:
        path_basename = os.path.basename
        path_dirname = os.path.dirname

    args = {
        'suffix': '.tmp',
        'prefix': path_basename(fn) + '.',
        'dir': path_dirname(fn),
        'delete': False,
    }

    # In Python 2.x, json.dump expects a bytestream.
    # In Python 3.x, it writes to a character stream
    if sys.version_info < (3, 0):
        args['mode'] = 'wb'
    else:
        args.update({
            'mode': 'w',
            'encoding': 'utf-8',
        })

    tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))

    try:
        with tf:
            json.dump(obj, tf)
        if sys.platform == 'win32':
            # Need to remove existing file on Windows, else os.rename raises
            # WindowsError or FileExistsError.
            try:
                os.unlink(fn)
            except OSError:
                pass
        os.rename(tf.name, fn)
    except Exception:
        try:
            os.remove(tf.name)
        except OSError:
            pass
        raise


if sys.version_info >= (2, 7):
    def find_xpath_attr(node, xpath, key, val=None):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z_-]+$', key)
        expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
        return node.find(expr)
else:
    def find_xpath_attr(node, xpath, key, val=None):
        for f in node.findall(compat_xpath(xpath)):
            if key not in f.attrib:
                continue
            if val is None or f.attrib.get(key) == val:
                return f
        return None

# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter


def xpath_with_ns(path, ns_map):
    components = [c.split(':') for c in path.split('/')]
    replaced = []
    for c in components:
        if len(c) == 1:
            replaced.append(c[0])
        else:
            ns, tag = c
            replaced.append('{%s}%s' % (ns_map[ns], tag))
    return '/'.join(replaced)


def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    def _find_xpath(xpath):
        return node.find(compat_xpath(xpath))

    if isinstance(xpath, (str, compat_str)):
        n = _find_xpath(xpath)
    else:
        for xp in xpath:
            n = _find_xpath(xp)
            if n is not None:
                break

    if n is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = xpath if name is None else name
            raise ExtractorError('Could not find XML element %s' % name)
        else:
            return None
    return n


def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    n = xpath_element(node, xpath, name, fatal=fatal, default=default)
    if n is None or n == default:
        return n
    if n.text is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = xpath if name is None else name
            raise ExtractorError('Could not find XML element\'s text %s' % name)
        else:
            return None
    return n.text


def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
    n = find_xpath_attr(node, xpath, key)
    if n is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = '%s[@%s]' % (xpath, key) if name is None else name
            raise ExtractorError('Could not find XML attribute %s' % name)
        else:
            return None
    return n.attrib[key]


def get_element_by_id(id, html):
    """Return the content of the tag with the specified ID in the passed HTML document"""
    return get_element_by_attribute('id', id, html)


def get_element_by_class(class_name, html):
    """Return the content of the first tag with the specified class in the passed HTML document"""
    retval = get_elements_by_class(class_name, html)
    return retval[0] if retval else None


def get_element_by_attribute(attribute, value, html, escape_value=True):
    retval = get_elements_by_attribute(attribute, value, html, escape_value)
    return retval[0] if retval else None


def get_elements_by_class(class_name, html):
    """Return the content of all tags with the specified class in the passed HTML document as a list"""
    return get_elements_by_attribute(
        'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
        html, escape_value=False)


def get_elements_by_attribute(attribute, value, html, escape_value=True):
    """Return the content of the tag with the specified attribute in the passed HTML document"""

    value = re.escape(value) if escape_value else value

    retlist = []
    for m in re.finditer(r'''(?xs)
        <([a-zA-Z0-9:._-]+)
         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
         \s+%s=['"]?%s['"]?
         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
        \s*>
        (?P<content>.*?)
        </\1>
    ''' % (re.escape(attribute), value), html):
        res = m.group('content')

        if res.startswith('"') or res.startswith("'"):
            res = res[1:-1]

        retlist.append(unescapeHTML(res))

    return retlist


class HTMLAttributeParser(compat_HTMLParser):
    """Trivial HTML parser to gather the attributes for a single element"""
    def __init__(self):
        self.attrs = {}
        compat_HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        self.attrs = dict(attrs)


def extract_attributes(html_element):
    """Given a string for an HTML element such as
    <el
         a="foo" B="bar" c="&98;az" d=boz
         empty= noval entity="&amp;"
         sq='"' dq="'"
    >
    Decode and return a dictionary of attributes.
    {
        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
        'empty': '', 'noval': None, 'entity': '&',
        'sq': '"', 'dq': '\''
    }.
    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
    """
    parser = HTMLAttributeParser()
    try:
        parser.feed(html_element)
        parser.close()
    # Older Python may throw HTMLParseError in case of malformed HTML
    except compat_HTMLParseError:
        pass
    return parser.attrs


def clean_html(html):
    """Clean an HTML snippet into a readable string"""

    if html is None:  # Convenience for sanitizing descriptions etc.
        return html

    # Newline vs <br />
    html = html.replace('\n', ' ')
    html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
    html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
    # Strip html tags
    html = re.sub('<.*?>', '', html)
    # Replace html entities
    html = unescapeHTML(html)
    return html.strip()


def sanitize_open(filename, open_mode):
    """Try to open the given filename, and slightly tweak it if this fails.

    Attempts to open the given filename. If this fails, it tries to change
    the filename slightly, step by step, until it's either able to open it
    or it fails and raises a final exception, like the standard open()
    function.

    It returns the tuple (stream, definitive_file_name).
    """
    try:
        if filename == '-':
            if sys.platform == 'win32':
                import msvcrt
                msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
            return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
        stream = open(encodeFilename(filename), open_mode)
        return (stream, filename)
    except (IOError, OSError) as err:
        if err.errno in (errno.EACCES,):
            raise

        # In case of error, try to remove win32 forbidden chars
        alt_filename = sanitize_path(filename)
        if alt_filename == filename:
            raise
        else:
            # An exception here should be caught in the caller
            stream = open(encodeFilename(alt_filename), open_mode)
            return (stream, alt_filename)


def timeconvert(timestr):
    """Convert RFC 2822 defined time string into system timestamp"""
    timestamp = None
    timetuple = email.utils.parsedate_tz(timestr)
    if timetuple is not None:
        timestamp = email.utils.mktime_tz(timetuple)
    return timestamp


def sanitize_filename(s, restricted=False, is_id=False):
    """Sanitizes a string so it could be used as part of a filename.
    If restricted is set, use a stricter subset of allowed characters.
    Set is_id if this is not an arbitrary string, but an ID that should be kept
    if possible.
    """
    def replace_insane(char):
        if restricted and char in ACCENT_CHARS:
            return ACCENT_CHARS[char]
        if char == '?' or ord(char) < 32 or ord(char) == 127:
            return ''
        elif char == '"':
            return '' if restricted else '\''
        elif char == ':':
            return '_-' if restricted else ' -'
        elif char in '\\/|*<>':
            return '_'
        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
            return '_'
        if restricted and ord(char) > 127:
            return '_'
        return char

    # Handle timestamps
    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
    result = ''.join(map(replace_insane, s))
    if not is_id:
        while '__' in result:
            result = result.replace('__', '_')
        result = result.strip('_')
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
        if result.startswith('-'):
            result = '_' + result[len('-'):]
        result = result.lstrip('.')
        if not result:
            result = '_'
    return result


def sanitize_path(s):
    """Sanitizes and normalizes path on Windows"""
    if sys.platform != 'win32':
        return s
    drive_or_unc, _ = os.path.splitdrive(s)
    if sys.version_info < (2, 7) and not drive_or_unc:
        drive_or_unc, _ = os.path.splitunc(s)
    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
    if drive_or_unc:
        norm_path.pop(0)
    sanitized_path = [
        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
        for path_part in norm_path]
    if drive_or_unc:
        sanitized_path.insert(0, drive_or_unc + os.path.sep)
    return os.path.join(*sanitized_path)


def sanitize_url(url):
    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
    # the number of unwanted failures due to missing protocol
    if url.startswith('//'):
        return 'http:%s' % url
    # Fix some common typos seen so far
    COMMON_TYPOS = (
        # https://github.com/ytdl-org/youtube-dl/issues/15649
        (r'^httpss://', r'https://'),
        # https://bx1.be/lives/direct-tv/
        (r'^rmtp([es]?)://', r'rtmp\1://'),
    )
    for mistake, fixup in COMMON_TYPOS:
        if re.match(mistake, url):
            return re.sub(mistake, fixup, url)
    return url


def sanitized_Request(url, *args, **kwargs):
    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)


def expand_path(s):
    """Expand shell variables and ~"""
    return os.path.expandvars(compat_expanduser(s))


def orderedSet(iterable):
    """ Remove all duplicates from the input iterable """
    res = []
    for el in iterable:
        if el not in res:
            res.append(el)
    return res


def _htmlentity_transform(entity_with_semicolon):
    """Transforms an HTML entity to a character."""
    entity = entity_with_semicolon[:-1]

    # Known non-numeric HTML entity
    if entity in compat_html_entities.name2codepoint:
        return compat_chr(compat_html_entities.name2codepoint[entity])

    # TODO: HTML5 allows entities without a semicolon. For example,
    # '&Eacuteric' should be decoded as 'Éric'.
    if entity_with_semicolon in compat_html_entities_html5:
        return compat_html_entities_html5[entity_with_semicolon]

    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
    if mobj is not None:
        numstr = mobj.group(1)
        if numstr.startswith('x'):
            base = 16
            numstr = '0%s' % numstr
        else:
            base = 10
        # See https://github.com/ytdl-org/youtube-dl/issues/7518
        try:
            return compat_chr(int(numstr, base))
        except ValueError:
            pass

    # Unknown entity in name, return its literal representation
    return '&%s;' % entity


def unescapeHTML(s):
    if s is None:
        return None
    assert type(s) == compat_str

    return re.sub(
        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)


def get_subprocess_encoding():
    if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
        # For subprocess calls, encode with locale encoding
        # Refer to http://stackoverflow.com/a/9951851/35070
        encoding = preferredencoding()
    else:
        encoding = sys.getfilesystemencoding()
    if encoding is None:
        encoding = 'utf-8'
    return encoding


def encodeFilename(s, for_subprocess=False):
    """
    @param s The name of the file
    """

    assert type(s) == compat_str

    # Python 3 has a Unicode API
    if sys.version_info >= (3, 0):
        return s

    # Pass '' directly to use Unicode APIs on Windows 2000 and up
    # (Detecting Windows NT 4 is tricky because 'major >= 4' would
    # match Windows 9x series as well. Besides, NT 4 is obsolete.)
    if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
        return s

    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
    if sys.platform.startswith('java'):
        return s

    return s.encode(get_subprocess_encoding(), 'ignore')


def decodeFilename(b, for_subprocess=False):

    if sys.version_info >= (3, 0):
        return b

    if not isinstance(b, bytes):
        return b

    return b.decode(get_subprocess_encoding(), 'ignore')


def encodeArgument(s):
    if not isinstance(s, compat_str):
        # Legacy code that uses byte strings
        # Uncomment the following line after fixing all post processors
        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
        s = s.decode('ascii')
    return encodeFilename(s, True)


def decodeArgument(b):
    return decodeFilename(b, True)


def decodeOption(optval):
    if optval is None:
        return optval
    if isinstance(optval, bytes):
        optval = optval.decode(preferredencoding())

    assert isinstance(optval, compat_str)
    return optval


def formatSeconds(secs):
    if secs > 3600:
        return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
    elif secs > 60:
        return '%d:%02d' % (secs // 60, secs % 60)
    else:
        return '%d' % secs


def make_HTTPS_handler(params, **kwargs):
    opts_no_check_certificate = params.get('nocheckcertificate', False)
    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
        if opts_no_check_certificate:
            context.check_hostname = False
            context.verify_mode = ssl.CERT_NONE
        try:
            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
        except TypeError:
            # Python 2.7.8
            # (create_default_context present but HTTPSHandler has no context=)
            pass

    if sys.version_info < (3, 2):
        return YoutubeDLHTTPSHandler(params, **kwargs)
    else:  # Python < 3.4
        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
        context.verify_mode = (ssl.CERT_NONE
                               if opts_no_check_certificate
                               else ssl.CERT_REQUIRED)
        context.set_default_verify_paths()
        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)


def bug_reports_message():
    if ytdl_is_updateable():
        update_cmd = 'type  youtube-dl -U  to update'
    else:
        update_cmd = 'see  https://yt-dl.org/update  on how to update'
    msg = '; please report this issue on https://yt-dl.org/bug .'
    msg += ' Make sure you are using the latest version; %s.' % update_cmd
    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
    return msg


class YoutubeDLError(Exception):
    """Base exception for YoutubeDL errors."""
    pass


class ExtractorError(YoutubeDLError):
    """Error during info extraction."""

    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
        """ tb, if given, is the original traceback (so that it can be printed out).
        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
        """

        if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
            expected = True
        if video_id is not None:
            msg = video_id + ': ' + msg
        if cause:
            msg += ' (caused by %r)' % cause
        if not expected:
            msg += bug_reports_message()
        super(ExtractorError, self).__init__(msg)

        self.traceback = tb
        self.exc_info = sys.exc_info()  # preserve original exception
        self.cause = cause
        self.video_id = video_id

    def format_traceback(self):
        if self.traceback is None:
            return None
        return ''.join(traceback.format_tb(self.traceback))


class UnsupportedError(ExtractorError):
    def __init__(self, url):
        super(UnsupportedError, self).__init__(
            'Unsupported URL: %s' % url, expected=True)
        self.url = url


class RegexNotFoundError(ExtractorError):
    """Error when a regex didn't match"""
    pass


class GeoRestrictedError(ExtractorError):
    """Geographic restriction Error exception.

    This exception may be thrown when a video is not available from your
    geographic location due to geographic restrictions imposed by a website.
    """
    def __init__(self, msg, countries=None):
        super(GeoRestrictedError, self).__init__(msg, expected=True)
        self.msg = msg
        self.countries = countries


class DownloadError(YoutubeDLError):
    """Download Error exception.

    This exception may be thrown by FileDownloader objects if they are not
    configured to continue on errors. They will contain the appropriate
    error message.
    """

    def __init__(self, msg, exc_info=None):
        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
        super(DownloadError, self).__init__(msg)
        self.exc_info = exc_info


class SameFileError(YoutubeDLError):
    """Same File exception.

    This exception will be thrown by FileDownloader objects if they detect
    multiple files would have to be downloaded to the same file on disk.
    """
    pass


class PostProcessingError(YoutubeDLError):
    """Post Processing exception.

    This exception may be raised by PostProcessor's .run() method to
    indicate an error in the postprocessing task.
    """

    def __init__(self, msg):
        super(PostProcessingError, self).__init__(msg)
        self.msg = msg


class MaxDownloadsReached(YoutubeDLError):
    """ --max-downloads limit has been reached. """
    pass


class UnavailableVideoError(YoutubeDLError):
    """Unavailable Format exception.

    This exception will be thrown when a video is requested
    in a format that is not available for that video.
    """
    pass


class ContentTooShortError(YoutubeDLError):
    """Content Too Short exception.

    This exception may be raised by FileDownloader objects when a file they
    download is too small for what the server announced first, indicating
    the connection was probably interrupted.
    """

    def __init__(self, downloaded, expected):
        super(ContentTooShortError, self).__init__(
            'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
        )
        # Both in bytes
        self.downloaded = downloaded
        self.expected = expected


class XAttrMetadataError(YoutubeDLError):
    def __init__(self, code=None, msg='Unknown error'):
        super(XAttrMetadataError, self).__init__(msg)
        self.code = code
        self.msg = msg

        # Parsing code and msg
        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
            self.reason = 'NO_SPACE'
        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
            self.reason = 'VALUE_TOO_LONG'
        else:
            self.reason = 'NOT_SUPPORTED'


class XAttrUnavailableError(YoutubeDLError):
    pass


def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
    # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
    # expected HTTP responses to meet HTTP/1.0 or later (see also
    # https://github.com/ytdl-org/youtube-dl/issues/6727)
    if sys.version_info < (3, 0):
        kwargs['strict'] = True
    hc = http_class(*args, **compat_kwargs(kwargs))
    source_address = ydl_handler._params.get('source_address')

    if source_address is not None:
        # This is to workaround _create_connection() from socket where it will try all
        # address data from getaddrinfo() including IPv6. This filters the result from
        # getaddrinfo() based on the source_address value.
        # This is based on the cpython socket.create_connection() function.
        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
            host, port = address
            err = None
            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
            ip_addrs = [addr for addr in addrs if addr[0] == af]
            if addrs and not ip_addrs:
                ip_version = 'v4' if af == socket.AF_INET else 'v6'
                raise socket.error(
                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
                    % (ip_version, source_address[0]))
            for res in ip_addrs:
                af, socktype, proto, canonname, sa = res
                sock = None
                try:
                    sock = socket.socket(af, socktype, proto)
                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
                        sock.settimeout(timeout)
                    sock.bind(source_address)
                    sock.connect(sa)
                    err = None  # Explicitly break reference cycle
                    return sock
                except socket.error as _:
                    err = _
                    if sock is not None:
                        sock.close()
            if err is not None:
                raise err
            else:
                raise socket.error('getaddrinfo returns an empty list')
        if hasattr(hc, '_create_connection'):
            hc._create_connection = _create_connection
        sa = (source_address, 0)
        if hasattr(hc, 'source_address'):  # Python 2.7+
            hc.source_address = sa
        else:  # Python 2.6
            def _hc_connect(self, *args, **kwargs):
                sock = _create_connection(
                    (self.host, self.port), self.timeout, sa)
                if is_https:
                    self.sock = ssl.wrap_socket(
                        sock, self.key_file, self.cert_file,
                        ssl_version=ssl.PROTOCOL_TLSv1)
                else:
                    self.sock = sock
            hc.connect = functools.partial(_hc_connect, hc)

    return hc


def handle_youtubedl_headers(headers):
    filtered_headers = headers

    if 'Youtubedl-no-compression' in filtered_headers:
        filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
        del filtered_headers['Youtubedl-no-compression']

    return filtered_headers


class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
    """Handler for HTTP requests and responses.

    This class, when installed with an OpenerDirector, automatically adds
    the standard headers to every HTTP request and handles gzipped and
    deflated responses from web servers. If compression is to be avoided in
    a particular request, the original request in the program code only has
    to include the HTTP header "Youtubedl-no-compression", which will be
    removed before making the real request.

    Part of this code was copied from:

    http://techknack.net/python-urllib2-handlers/

    Andrew Rowls, the author of that code, agreed to release it to the
    public domain.
    """

    def __init__(self, params, *args, **kwargs):
        compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
        self._params = params

    def http_open(self, req):
        conn_class = compat_http_client.HTTPConnection

        socks_proxy = req.headers.get('Ytdl-socks-proxy')
        if socks_proxy:
            conn_class = make_socks_conn_class(conn_class, socks_proxy)
            del req.headers['Ytdl-socks-proxy']

        return self.do_open(functools.partial(
            _create_http_connection, self, conn_class, False),
            req)

    @staticmethod
    def deflate(data):
        try:
            return zlib.decompress(data, -zlib.MAX_WBITS)
        except zlib.error:
            return zlib.decompress(data)

    def http_request(self, req):
        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
        # always respected by websites, some tend to give out URLs with non percent-encoded
        # non-ASCII characters (see telemb.py, ard.py [#3412])
        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
        # To work around aforementioned issue we will replace request's original URL with
        # percent-encoded one
        # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
        # the code of this workaround has been moved here from YoutubeDL.urlopen()
        url = req.get_full_url()
        url_escaped = escape_url(url)

        # Substitute URL if any change after escaping
        if url != url_escaped:
            req = update_Request(req, url=url_escaped)

        for h, v in std_headers.items():
            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
            # The dict keys are capitalized because of this bug by urllib
            if h.capitalize() not in req.headers:
                req.add_header(h, v)

        req.headers = handle_youtubedl_headers(req.headers)

        if sys.version_info < (2, 7) and '#' in req.get_full_url():
            # Python 2.6 is brain-dead when it comes to fragments
            req._Request__original = req._Request__original.partition('#')[0]
            req._Request__r_type = req._Request__r_type.partition('#')[0]

        return req

    def http_response(self, req, resp):
        old_resp = resp
        # gzip
        if resp.headers.get('Content-encoding', '') == 'gzip':
            content = resp.read()
            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
            try:
                uncompressed = io.BytesIO(gz.read())
            except IOError as original_ioerror:
                # There may be junk add the end of the file
                # See http://stackoverflow.com/q/4928560/35070 for details
                for i in range(1, 1024):
                    try:
                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
                        uncompressed = io.BytesIO(gz.read())
                    except IOError:
                        continue
                    break
                else:
                    raise original_ioerror
            resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
            resp.msg = old_resp.msg
            del resp.headers['Content-encoding']
        # deflate
        if resp.headers.get('Content-encoding', '') == 'deflate':
            gz = io.BytesIO(self.deflate(resp.read()))
            resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
            resp.msg = old_resp.msg
            del resp.headers['Content-encoding']
        # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
        # https://github.com/ytdl-org/youtube-dl/issues/6457).
        if 300 <= resp.code < 400:
            location = resp.headers.get('Location')
            if location:
                # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                if sys.version_info >= (3, 0):
                    location = location.encode('iso-8859-1').decode('utf-8')
                else:
                    location = location.decode('utf-8')
                location_escaped = escape_url(location)
                if location != location_escaped:
                    del resp.headers['Location']
                    if sys.version_info < (3, 0):
                        location_escaped = location_escaped.encode('utf-8')
                    resp.headers['Location'] = location_escaped
        return resp

    https_request = http_request
    https_response = http_response


def make_socks_conn_class(base_class, socks_proxy):
    assert issubclass(base_class, (
        compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))

    url_components = compat_urlparse.urlparse(socks_proxy)
    if url_components.scheme.lower() == 'socks5':
        socks_type = ProxyType.SOCKS5
    elif url_components.scheme.lower() in ('socks', 'socks4'):
        socks_type = ProxyType.SOCKS4
    elif url_components.scheme.lower() == 'socks4a':
        socks_type = ProxyType.SOCKS4A

    def unquote_if_non_empty(s):
        if not s:
            return s
        return compat_urllib_parse_unquote_plus(s)

    proxy_args = (
        socks_type,
        url_components.hostname, url_components.port or 1080,
        True,  # Remote DNS
        unquote_if_non_empty(url_components.username),
        unquote_if_non_empty(url_components.password),
    )

    class SocksConnection(base_class):
        def connect(self):
            self.sock = sockssocket()
            self.sock.setproxy(*proxy_args)
            if type(self.timeout) in (int, float):
                self.sock.settimeout(self.timeout)
            self.sock.connect((self.host, self.port))

            if isinstance(self, compat_http_client.HTTPSConnection):
                if hasattr(self, '_context'):  # Python > 2.6
                    self.sock = self._context.wrap_socket(
                        self.sock, server_hostname=self.host)
                else:
                    self.sock = ssl.wrap_socket(self.sock)

    return SocksConnection


class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
    def __init__(self, params, https_conn_class=None, *args, **kwargs):
        compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
        self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
        self._params = params

    def https_open(self, req):
        kwargs = {}
        conn_class = self._https_conn_class

        if hasattr(self, '_context'):  # python > 2.6
            kwargs['context'] = self._context
        if hasattr(self, '_check_hostname'):  # python 3.x
            kwargs['check_hostname'] = self._check_hostname

        socks_proxy = req.headers.get('Ytdl-socks-proxy')
        if socks_proxy:
            conn_class = make_socks_conn_class(conn_class, socks_proxy)
            del req.headers['Ytdl-socks-proxy']

        return self.do_open(functools.partial(
            _create_http_connection, self, conn_class, True),
            req, **kwargs)


class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
    _HTTPONLY_PREFIX = '#HttpOnly_'

    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
        # Store session cookies with `expires` set to 0 instead of an empty
        # string
        for cookie in self:
            if cookie.expires is None:
                cookie.expires = 0
        compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)

    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
        """Load cookies from a file."""
        if filename is None:
            if self.filename is not None:
                filename = self.filename
            else:
                raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)

        cf = io.StringIO()
        with open(filename) as f:
            for line in f:
                if line.startswith(self._HTTPONLY_PREFIX):
                    line = line[len(self._HTTPONLY_PREFIX):]
                cf.write(compat_str(line))
        cf.seek(0)
        self._really_load(cf, filename, ignore_discard, ignore_expires)
        # Session cookies are denoted by either `expires` field set to
        # an empty string or 0. MozillaCookieJar only recognizes the former
        # (see [1]). So we need force the latter to be recognized as session
        # cookies on our own.
        # Session cookies may be important for cookies-based authentication,
        # e.g. usually, when user does not check 'Remember me' check box while
        # logging in on a site, some important cookies are stored as session
        # cookies so that not recognizing them will result in failed login.
        # 1. https://bugs.python.org/issue17164
        for cookie in self:
            # Treat `expires=0` cookies as session cookies
            if cookie.expires == 0:
                cookie.expires = None
                cookie.discard = True


class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
    def __init__(self, cookiejar=None):
        compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)

    def http_response(self, request, response):
        # Python 2 will choke on next HTTP request in row if there are non-ASCII
        # characters in Set-Cookie HTTP header of last response (see
        # https://github.com/ytdl-org/youtube-dl/issues/6769).
        # In order to at least prevent crashing we will percent encode Set-Cookie
        # header before HTTPCookieProcessor starts processing it.
        # if sys.version_info < (3, 0) and response.headers:
        #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
        #         set_cookie = response.headers.get(set_cookie_header)
        #         if set_cookie:
        #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
        #             if set_cookie != set_cookie_escaped:
        #                 del response.headers[set_cookie_header]
        #                 response.headers[set_cookie_header] = set_cookie_escaped
        return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)

    https_request = compat_urllib_request.HTTPCookieProcessor.http_request
    https_response = http_response


def extract_timezone(date_str):
    m = re.search(
        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
        date_str)
    if not m:
        timezone = datetime.timedelta()
    else:
        date_str = date_str[:-len(m.group('tz'))]
        if not m.group('sign'):
            timezone = datetime.timedelta()
        else:
            sign = 1 if m.group('sign') == '+' else -1
            timezone = datetime.timedelta(
                hours=sign * int(m.group('hours')),
                minutes=sign * int(m.group('minutes')))
    return timezone, date_str


def parse_iso8601(date_str, delimiter='T', timezone=None):
    """ Return a UNIX timestamp from the given date """

    if date_str is None:
        return None

    date_str = re.sub(r'\.[0-9]+', '', date_str)

    if timezone is None:
        timezone, date_str = extract_timezone(date_str)

    try:
        date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
        dt = datetime.datetime.strptime(date_str, date_format) - timezone
        return calendar.timegm(dt.timetuple())
    except ValueError:
        pass


def date_formats(day_first=True):
    return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST


def unified_strdate(date_str, day_first=True):
    """Return a string with the date in the format YYYYMMDD"""

    if date_str is None:
        return None
    upload_date = None
    # Replace commas
    date_str = date_str.replace(',', ' ')
    # Remove AM/PM + timezone
    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
    _, date_str = extract_timezone(date_str)

    for expression in date_formats(day_first):
        try:
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
        except ValueError:
            pass
    if upload_date is None:
        timetuple = email.utils.parsedate_tz(date_str)
        if timetuple:
            try:
                upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
            except ValueError:
                pass
    if upload_date is not None:
        return compat_str(upload_date)


def unified_timestamp(date_str, day_first=True):
    if date_str is None:
        return None

    date_str = re.sub(r'[,|]', '', date_str)

    pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
    timezone, date_str = extract_timezone(date_str)

    # Remove AM/PM + timezone
    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)

    # Remove unrecognized timezones from ISO 8601 alike timestamps
    m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
    if m:
        date_str = date_str[:-len(m.group('tz'))]

    # Python only supports microseconds, so remove nanoseconds
    m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
    if m:
        date_str = m.group(1)

    for expression in date_formats(day_first):
        try:
            dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
            return calendar.timegm(dt.timetuple())
        except ValueError:
            pass
    timetuple = email.utils.parsedate_tz(date_str)
    if timetuple:
        return calendar.timegm(timetuple) + pm_delta * 3600


def determine_ext(url, default_ext='unknown_video'):
    if url is None or '.' not in url:
        return default_ext
    guess = url.partition('?')[0].rpartition('.')[2]
    if re.match(r'^[A-Za-z0-9]+$', guess):
        return guess
    # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
    elif guess.rstrip('/') in KNOWN_EXTENSIONS:
        return guess.rstrip('/')
    else:
        return default_ext


def subtitles_filename(filename, sub_lang, sub_format):
    return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format


def date_from_str(date_str):
    """
    Return a datetime object from a string in the format YYYYMMDD or
    (now|today)[+-][0-9](day|week|month|year)(s)?"""
    today = datetime.date.today()
    if date_str in ('now', 'today'):
        return today
    if date_str == 'yesterday':
        return today - datetime.timedelta(days=1)
    match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
    if match is not None:
        sign = match.group('sign')
        time = int(match.group('time'))
        if sign == '-':
            time = -time
        unit = match.group('unit')
        # A bad approximation?
        if unit == 'month':
            unit = 'day'
            time *= 30
        elif unit == 'year':
            unit = 'day'
            time *= 365
        unit += 's'
        delta = datetime.timedelta(**{unit: time})
        return today + delta
    return datetime.datetime.strptime(date_str, '%Y%m%d').date()


def hyphenate_date(date_str):
    """
    Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
    match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
    if match is not None:
        return '-'.join(match.groups())
    else:
        return date_str


class DateRange(object):
    """Represents a time interval between two dates"""

    def __init__(self, start=None, end=None):
        """start and end must be strings in the format accepted by date"""
        if start is not None:
            self.start = date_from_str(start)
        else:
            self.start = datetime.datetime.min.date()
        if end is not None:
            self.end = date_from_str(end)
        else:
            self.end = datetime.datetime.max.date()
        if self.start > self.end:
            raise ValueError('Date range: "%s" , the start date must be before the end date' % self)

    @classmethod
    def day(cls, day):
        """Returns a range that only contains the given day"""
        return cls(day, day)

    def __contains__(self, date):
        """Check if the date is in the range"""
        if not isinstance(date, datetime.date):
            date = date_from_str(date)
        return self.start <= date <= self.end

    def __str__(self):
        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())


def platform_name():
    """ Returns the platform name as a compat_str """
    res = platform.platform()
    if isinstance(res, bytes):
        res = res.decode(preferredencoding())

    assert isinstance(res, compat_str)
    return res


def _windows_write_string(s, out):
    """ Returns True if the string was written using special methods,
    False if it has yet to be written out."""
    # Adapted from http://stackoverflow.com/a/3259271/35070

    import ctypes
    import ctypes.wintypes

    WIN_OUTPUT_IDS = {
        1: -11,
        2: -12,
    }

    try:
        fileno = out.fileno()
    except AttributeError:
        # If the output stream doesn't have a fileno, it's virtual
        return False
    except io.UnsupportedOperation:
        # Some strange Windows pseudo files?
        return False
    if fileno not in WIN_OUTPUT_IDS:
        return False

    GetStdHandle = compat_ctypes_WINFUNCTYPE(
        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
        ('GetStdHandle', ctypes.windll.kernel32))
    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])

    WriteConsoleW = compat_ctypes_WINFUNCTYPE(
        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
        ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
    written = ctypes.wintypes.DWORD(0)

    GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
    FILE_TYPE_CHAR = 0x0002
    FILE_TYPE_REMOTE = 0x8000
    GetConsoleMode = compat_ctypes_WINFUNCTYPE(
        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
        ctypes.POINTER(ctypes.wintypes.DWORD))(
        ('GetConsoleMode', ctypes.windll.kernel32))
    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value

    def not_a_console(handle):
        if handle == INVALID_HANDLE_VALUE or handle is None:
            return True
        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
                GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)

    if not_a_console(h):
        return False

    def next_nonbmp_pos(s):
        try:
            return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
        except StopIteration:
            return len(s)

    while s:
        count = min(next_nonbmp_pos(s), 1024)

        ret = WriteConsoleW(
            h, s, count if count else 2, ctypes.byref(written), None)
        if ret == 0:
            raise OSError('Failed to write string')
        if not count:  # We just wrote a non-BMP character
            assert written.value == 2
            s = s[1:]
        else:
            assert written.value > 0
            s = s[written.value:]
    return True


def write_string(s, out=None, encoding=None):
    if out is None:
        out = sys.stderr
    assert type(s) == compat_str

    if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
        if _windows_write_string(s, out):
            return

    if ('b' in getattr(out, 'mode', '') or
            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
        byt = s.encode(encoding or preferredencoding(), 'ignore')
        out.write(byt)
    elif hasattr(out, 'buffer'):
        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
        byt = s.encode(enc, 'ignore')
        out.buffer.write(byt)
    else:
        out.write(s)
    out.flush()


def bytes_to_intlist(bs):
    if not bs:
        return []
    if isinstance(bs[0], int):  # Python 3
        return list(bs)
    else:
        return [ord(c) for c in bs]


def intlist_to_bytes(xs):
    if not xs:
        return b''
    return compat_struct_pack('%dB' % len(xs), *xs)


# Cross-platform file locking
if sys.platform == 'win32':
    import ctypes.wintypes
    import msvcrt

    class OVERLAPPED(ctypes.Structure):
        _fields_ = [
            ('Internal', ctypes.wintypes.LPVOID),
            ('InternalHigh', ctypes.wintypes.LPVOID),
            ('Offset', ctypes.wintypes.DWORD),
            ('OffsetHigh', ctypes.wintypes.DWORD),
            ('hEvent', ctypes.wintypes.HANDLE),
        ]

    kernel32 = ctypes.windll.kernel32
    LockFileEx = kernel32.LockFileEx
    LockFileEx.argtypes = [
        ctypes.wintypes.HANDLE,     # hFile
        ctypes.wintypes.DWORD,      # dwFlags
        ctypes.wintypes.DWORD,      # dwReserved
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
        ctypes.POINTER(OVERLAPPED)  # Overlapped
    ]
    LockFileEx.restype = ctypes.wintypes.BOOL
    UnlockFileEx = kernel32.UnlockFileEx
    UnlockFileEx.argtypes = [
        ctypes.wintypes.HANDLE,     # hFile
        ctypes.wintypes.DWORD,      # dwReserved
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
        ctypes.POINTER(OVERLAPPED)  # Overlapped
    ]
    UnlockFileEx.restype = ctypes.wintypes.BOOL
    whole_low = 0xffffffff
    whole_high = 0x7fffffff

    def _lock_file(f, exclusive):
        overlapped = OVERLAPPED()
        overlapped.Offset = 0
        overlapped.OffsetHigh = 0
        overlapped.hEvent = 0
        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
        handle = msvcrt.get_osfhandle(f.fileno())
        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
                          whole_low, whole_high, f._lock_file_overlapped_p):
            raise OSError('Locking file failed: %r' % ctypes.FormatError())

    def _unlock_file(f):
        assert f._lock_file_overlapped_p
        handle = msvcrt.get_osfhandle(f.fileno())
        if not UnlockFileEx(handle, 0,
                            whole_low, whole_high, f._lock_file_overlapped_p):
            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())

else:
    # Some platforms, such as Jython, is missing fcntl
    try:
        import fcntl

        def _lock_file(f, exclusive):
            fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)

        def _unlock_file(f):
            fcntl.flock(f, fcntl.LOCK_UN)
    except ImportError:
        UNSUPPORTED_MSG = 'file locking is not supported on this platform'

        def _lock_file(f, exclusive):
            raise IOError(UNSUPPORTED_MSG)

        def _unlock_file(f):
            raise IOError(UNSUPPORTED_MSG)


class locked_file(object):
    def __init__(self, filename, mode, encoding=None):
        assert mode in ['r', 'a', 'w']
        self.f = io.open(filename, mode, encoding=encoding)
        self.mode = mode

    def __enter__(self):
        exclusive = self.mode != 'r'
        try:
            _lock_file(self.f, exclusive)
        except IOError:
            self.f.close()
            raise
        return self

    def __exit__(self, etype, value, traceback):
        try:
            _unlock_file(self.f)
        finally:
            self.f.close()

    def __iter__(self):
        return iter(self.f)

    def write(self, *args):
        return self.f.write(*args)

    def read(self, *args):
        return self.f.read(*args)


def get_filesystem_encoding():
    encoding = sys.getfilesystemencoding()
    return encoding if encoding is not None else 'utf-8'


def shell_quote(args):
    quoted_args = []
    encoding = get_filesystem_encoding()
    for a in args:
        if isinstance(a, bytes):
            # We may get a filename encoded with 'encodeFilename'
            a = a.decode(encoding)
        quoted_args.append(compat_shlex_quote(a))
    return ' '.join(quoted_args)


def smuggle_url(url, data):
    """ Pass additional data in a URL for internal use. """

    url, idata = unsmuggle_url(url, {})
    data.update(idata)
    sdata = compat_urllib_parse_urlencode(
        {'__youtubedl_smuggle': json.dumps(data)})
    return url + '#' + sdata


def unsmuggle_url(smug_url, default=None):
    if '#__youtubedl_smuggle' not in smug_url:
        return smug_url, default
    url, _, sdata = smug_url.rpartition('#')
    jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
    data = json.loads(jsond)
    return url, data


def format_bytes(bytes):
    if bytes is None:
        return 'N/A'
    if type(bytes) is str:
        bytes = float(bytes)
    if bytes == 0.0:
        exponent = 0
    else:
        exponent = int(math.log(bytes, 1024.0))
    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
    converted = float(bytes) / float(1024 ** exponent)
    return '%.2f%s' % (converted, suffix)


def lookup_unit_table(unit_table, s):
    units_re = '|'.join(re.escape(u) for u in unit_table)
    m = re.match(
        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
    if not m:
        return None
    num_str = m.group('num').replace(',', '.')
    mult = unit_table[m.group('unit')]
    return int(float(num_str) * mult)


def parse_filesize(s):
    if s is None:
        return None

    # The lower-case forms are of course incorrect and unofficial,
    # but we support those too
    _UNIT_TABLE = {
        'B': 1,
        'b': 1,
        'bytes': 1,
        'KiB': 1024,
        'KB': 1000,
        'kB': 1024,
        'Kb': 1000,
        'kb': 1000,
        'kilobytes': 1000,
        'kibibytes': 1024,
        'MiB': 1024 ** 2,
        'MB': 1000 ** 2,
        'mB': 1024 ** 2,
        'Mb': 1000 ** 2,
        'mb': 1000 ** 2,
        'megabytes': 1000 ** 2,
        'mebibytes': 1024 ** 2,
        'GiB': 1024 ** 3,
        'GB': 1000 ** 3,
        'gB': 1024 ** 3,
        'Gb': 1000 ** 3,
        'gb': 1000 ** 3,
        'gigabytes': 1000 ** 3,
        'gibibytes': 1024 ** 3,
        'TiB': 1024 ** 4,
        'TB': 1000 ** 4,
        'tB': 1024 ** 4,
        'Tb': 1000 ** 4,
        'tb': 1000 ** 4,
        'terabytes': 1000 ** 4,
        'tebibytes': 1024 ** 4,
        'PiB': 1024 ** 5,
        'PB': 1000 ** 5,
        'pB': 1024 ** 5,
        'Pb': 1000 ** 5,
        'pb': 1000 ** 5,
        'petabytes': 1000 ** 5,
        'pebibytes': 1024 ** 5,
        'EiB': 1024 ** 6,
        'EB': 1000 ** 6,
        'eB': 1024 ** 6,
        'Eb': 1000 ** 6,
        'eb': 1000 ** 6,
        'exabytes': 1000 ** 6,
        'exbibytes': 1024 ** 6,
        'ZiB': 1024 ** 7,
        'ZB': 1000 ** 7,
        'zB': 1024 ** 7,
        'Zb': 1000 ** 7,
        'zb': 1000 ** 7,
        'zettabytes': 1000 ** 7,
        'zebibytes': 1024 ** 7,
        'YiB': 1024 ** 8,
        'YB': 1000 ** 8,
        'yB': 1024 ** 8,
        'Yb': 1000 ** 8,
        'yb': 1000 ** 8,
        'yottabytes': 1000 ** 8,
        'yobibytes': 1024 ** 8,
    }

    return lookup_unit_table(_UNIT_TABLE, s)


def parse_count(s):
    if s is None:
        return None

    s = s.strip()

    if re.match(r'^[\d,.]+$', s):
        return str_to_int(s)

    _UNIT_TABLE = {
        'k': 1000,
        'K': 1000,
        'm': 1000 ** 2,
        'M': 1000 ** 2,
        'kk': 1000 ** 2,
        'KK': 1000 ** 2,
    }

    return lookup_unit_table(_UNIT_TABLE, s)


def parse_resolution(s):
    if s is None:
        return {}

    mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
    if mobj:
        return {
            'width': int(mobj.group('w')),
            'height': int(mobj.group('h')),
        }

    mobj = re.search(r'\b(\d+)[pPiI]\b', s)
    if mobj:
        return {'height': int(mobj.group(1))}

    mobj = re.search(r'\b([48])[kK]\b', s)
    if mobj:
        return {'height': int(mobj.group(1)) * 540}

    return {}


def parse_bitrate(s):
    if not isinstance(s, compat_str):
        return
    mobj = re.search(r'\b(\d+)\s*kbps', s)
    if mobj:
        return int(mobj.group(1))


def month_by_name(name, lang='en'):
    """ Return the number of a month by (locale-independently) English name """

    month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])

    try:
        return month_names.index(name) + 1
    except ValueError:
        return None


def month_by_abbreviation(abbrev):
    """ Return the number of a month by (locale-independently) English
        abbreviations """

    try:
        return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
    except ValueError:
        return None


def fix_xml_ampersands(xml_str):
    """Replace all the '&' by '&amp;' in XML"""
    return re.sub(
        r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
        '&amp;',
        xml_str)


def setproctitle(title):
    assert isinstance(title, compat_str)

    # ctypes in Jython is not complete
    # http://bugs.jython.org/issue2148
    if sys.platform.startswith('java'):
        return

    try:
        libc = ctypes.cdll.LoadLibrary('libc.so.6')
    except OSError:
        return
    except TypeError:
        # LoadLibrary in Windows Python 2.7.13 only expects
        # a bytestring, but since unicode_literals turns
        # every string into a unicode string, it fails.
        return
    title_bytes = title.encode('utf-8')
    buf = ctypes.create_string_buffer(len(title_bytes))
    buf.value = title_bytes
    try:
        libc.prctl(15, buf, 0, 0, 0)
    except AttributeError:
        return  # Strange libc, just skip this


def remove_start(s, start):
    return s[len(start):] if s is not None and s.startswith(start) else s


def remove_end(s, end):
    return s[:-len(end)] if s is not None and s.endswith(end) else s


def remove_quotes(s):
    if s is None or len(s) < 2:
        return s
    for quote in ('"', "'", ):
        if s[0] == quote and s[-1] == quote:
            return s[1:-1]
    return s


def url_basename(url):
    path = compat_urlparse.urlparse(url).path
    return path.strip('/').split('/')[-1]


def base_url(url):
    return re.match(r'https?://[^?#&]+/', url).group()


def urljoin(base, path):
    if isinstance(path, bytes):
        path = path.decode('utf-8')
    if not isinstance(path, compat_str) or not path:
        return None
    if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
        return path
    if isinstance(base, bytes):
        base = base.decode('utf-8')
    if not isinstance(base, compat_str) or not re.match(
            r'^(?:https?:)?//', base):
        return None
    return compat_urlparse.urljoin(base, path)


class HEADRequest(compat_urllib_request.Request):
    def get_method(self):
        return 'HEAD'


class PUTRequest(compat_urllib_request.Request):
    def get_method(self):
        return 'PUT'


def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
    if get_attr:
        if v is not None:
            v = getattr(v, get_attr, None)
    if v == '':
        v = None
    if v is None:
        return default
    try:
        return int(v) * invscale // scale
    except ValueError:
        return default


def str_or_none(v, default=None):
    return default if v is None else compat_str(v)


def str_to_int(int_str):
    """ A more relaxed version of int_or_none """
    if int_str is None:
        return None
    int_str = re.sub(r'[,\.\+]', '', int_str)
    return int(int_str)


def float_or_none(v, scale=1, invscale=1, default=None):
    if v is None:
        return default
    try:
        return float(v) * invscale / scale
    except ValueError:
        return default


def bool_or_none(v, default=None):
    return v if isinstance(v, bool) else default


def strip_or_none(v):
    return None if v is None else v.strip()


def url_or_none(url):
    if not url or not isinstance(url, compat_str):
        return None
    url = url.strip()
    return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None


def parse_duration(s):
    if not isinstance(s, compat_basestring):
        return None

    s = s.strip()

    days, hours, mins, secs, ms = [None] * 5
    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
    if m:
        days, hours, mins, secs, ms = m.groups()
    else:
        m = re.match(
            r'''(?ix)(?:P?
                (?:
                    [0-9]+\s*y(?:ears?)?\s*
                )?
                (?:
                    [0-9]+\s*m(?:onths?)?\s*
                )?
                (?:
                    [0-9]+\s*w(?:eeks?)?\s*
                )?
                (?:
                    (?P<days>[0-9]+)\s*d(?:ays?)?\s*
                )?
                T)?
                (?:
                    (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
                )?
                (?:
                    (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
                )?
                (?:
                    (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
                )?Z?$''', s)
        if m:
            days, hours, mins, secs, ms = m.groups()
        else:
            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
            if m:
                hours, mins = m.groups()
            else:
                return None

    duration = 0
    if secs:
        duration += float(secs)
    if mins:
        duration += float(mins) * 60
    if hours:
        duration += float(hours) * 60 * 60
    if days:
        duration += float(days) * 24 * 60 * 60
    if ms:
        duration += float(ms)
    return duration


def prepend_extension(filename, ext, expected_real_ext=None):
    name, real_ext = os.path.splitext(filename)
    return (
        '{0}.{1}{2}'.format(name, ext, real_ext)
        if not expected_real_ext or real_ext[1:] == expected_real_ext
        else '{0}.{1}'.format(filename, ext))


def replace_extension(filename, ext, expected_real_ext=None):
    name, real_ext = os.path.splitext(filename)
    return '{0}.{1}'.format(
        name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
        ext)


def check_executable(exe, args=[]):
    """ Checks if the given binary is installed somewhere in PATH, and returns its name.
    args can be a list of arguments for a short output (like -version) """
    try:
        subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
    except OSError:
        return False
    return exe


def get_exe_version(exe, args=['--version'],
                    version_re=None, unrecognized='present'):
    """ Returns the version of the specified executable,
    or False if the executable is not present """
    try:
        # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
        # SIGTTOU if youtube-dl is run in the background.
        # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
        out, _ = subprocess.Popen(
            [encodeArgument(exe)] + args,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
    except OSError:
        return False
    if isinstance(out, bytes):  # Python 2.x
        out = out.decode('ascii', 'ignore')
    return detect_exe_version(out, version_re, unrecognized)


def detect_exe_version(output, version_re=None, unrecognized='present'):
    assert isinstance(output, compat_str)
    if version_re is None:
        version_re = r'version\s+([-0-9._a-zA-Z]+)'
    m = re.search(version_re, output)
    if m:
        return m.group(1)
    else:
        return unrecognized


class PagedList(object):
    def __len__(self):
        # This is only useful for tests
        return len(self.getslice())


class OnDemandPagedList(PagedList):
    def __init__(self, pagefunc, pagesize, use_cache=True):
        self._pagefunc = pagefunc
        self._pagesize = pagesize
        self._use_cache = use_cache
        if use_cache:
            self._cache = {}

    def getslice(self, start=0, end=None):
        res = []
        for pagenum in itertools.count(start // self._pagesize):
            firstid = pagenum * self._pagesize
            nextfirstid = pagenum * self._pagesize + self._pagesize
            if start >= nextfirstid:
                continue

            page_results = None
            if self._use_cache:
                page_results = self._cache.get(pagenum)
            if page_results is None:
                page_results = list(self._pagefunc(pagenum))
            if self._use_cache:
                self._cache[pagenum] = page_results

            startv = (
                start % self._pagesize
                if firstid <= start < nextfirstid
                else 0)

            endv = (
                ((end - 1) % self._pagesize) + 1
                if (end is not None and firstid <= end <= nextfirstid)
                else None)

            if startv != 0 or endv is not None:
                page_results = page_results[startv:endv]
            res.extend(page_results)

            # A little optimization - if current page is not "full", ie. does
            # not contain page_size videos then we can assume that this page
            # is the last one - there are no more ids on further pages -
            # i.e. no need to query again.
            if len(page_results) + startv < self._pagesize:
                break

            # If we got the whole page, but the next page is not interesting,
            # break out early as well
            if end == nextfirstid:
                break
        return res


class InAdvancePagedList(PagedList):
    def __init__(self, pagefunc, pagecount, pagesize):
        self._pagefunc = pagefunc
        self._pagecount = pagecount
        self._pagesize = pagesize

    def getslice(self, start=0, end=None):
        res = []
        start_page = start // self._pagesize
        end_page = (
            self._pagecount if end is None else (end // self._pagesize + 1))
        skip_elems = start - start_page * self._pagesize
        only_more = None if end is None else end - start
        for pagenum in range(start_page, end_page):
            page = list(self._pagefunc(pagenum))
            if skip_elems:
                page = page[skip_elems:]
                skip_elems = None
            if only_more is not None:
                if len(page) < only_more:
                    only_more -= len(page)
                else:
                    page = page[:only_more]
                    res.extend(page)
                    break
            res.extend(page)
        return res


def uppercase_escape(s):
    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\U[0-9a-fA-F]{8}',
        lambda m: unicode_escape(m.group(0))[0],
        s)


def lowercase_escape(s):
    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\u[0-9a-fA-F]{4}',
        lambda m: unicode_escape(m.group(0))[0],
        s)


def escape_rfc3986(s):
    """Escape non-ASCII characters as suggested by RFC 3986"""
    if sys.version_info < (3, 0) and isinstance(s, compat_str):
        s = s.encode('utf-8')
    return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")


def escape_url(url):
    """Escape URL as suggested by RFC 3986"""
    url_parsed = compat_urllib_parse_urlparse(url)
    return url_parsed._replace(
        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
        path=escape_rfc3986(url_parsed.path),
        params=escape_rfc3986(url_parsed.params),
        query=escape_rfc3986(url_parsed.query),
        fragment=escape_rfc3986(url_parsed.fragment)
    ).geturl()


def read_batch_urls(batch_fd):
    def fixup(url):
        if not isinstance(url, compat_str):
            url = url.decode('utf-8', 'replace')
        BOM_UTF8 = '\xef\xbb\xbf'
        if url.startswith(BOM_UTF8):
            url = url[len(BOM_UTF8):]
        url = url.strip()
        if url.startswith(('#', ';', ']')):
            return False
        return url

    with contextlib.closing(batch_fd) as fd:
        return [url for url in map(fixup, fd) if url]


def urlencode_postdata(*args, **kargs):
    return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')


def update_url_query(url, query):
    if not query:
        return url
    parsed_url = compat_urlparse.urlparse(url)
    qs = compat_parse_qs(parsed_url.query)
    qs.update(query)
    return compat_urlparse.urlunparse(parsed_url._replace(
        query=compat_urllib_parse_urlencode(qs, True)))


def update_Request(req, url=None, data=None, headers={}, query={}):
    req_headers = req.headers.copy()
    req_headers.update(headers)
    req_data = data or req.data
    req_url = update_url_query(url or req.get_full_url(), query)
    req_get_method = req.get_method()
    if req_get_method == 'HEAD':
        req_type = HEADRequest
    elif req_get_method == 'PUT':
        req_type = PUTRequest
    else:
        req_type = compat_urllib_request.Request
    new_req = req_type(
        req_url, data=req_data, headers=req_headers,
        origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
    if hasattr(req, 'timeout'):
        new_req.timeout = req.timeout
    return new_req


def _multipart_encode_impl(data, boundary):
    content_type = 'multipart/form-data; boundary=%s' % boundary

    out = b''
    for k, v in data.items():
        out += b'--' + boundary.encode('ascii') + b'\r\n'
        if isinstance(k, compat_str):
            k = k.encode('utf-8')
        if isinstance(v, compat_str):
            v = v.encode('utf-8')
        # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
        # suggests sending UTF-8 directly. Firefox sends UTF-8, too
        content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
        if boundary.encode('ascii') in content:
            raise ValueError('Boundary overlaps with data')
        out += content

    out += b'--' + boundary.encode('ascii') + b'--\r\n'

    return out, content_type


def multipart_encode(data, boundary=None):
    '''
    Encode a dict to RFC 7578-compliant form-data

    data:
        A dict where keys and values can be either Unicode or bytes-like
        objects.
    boundary:
        If specified a Unicode object, it's used as the boundary. Otherwise
        a random boundary is generated.

    Reference: https://tools.ietf.org/html/rfc7578
    '''
    has_specified_boundary = boundary is not None

    while True:
        if boundary is None:
            boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))

        try:
            out, content_type = _multipart_encode_impl(data, boundary)
            break
        except ValueError:
            if has_specified_boundary:
                raise
            boundary = None

    return out, content_type


def dict_get(d, key_or_keys, default=None, skip_false_values=True):
    if isinstance(key_or_keys, (list, tuple)):
        for key in key_or_keys:
            if key not in d or d[key] is None or skip_false_values and not d[key]:
                continue
            return d[key]
        return default
    return d.get(key_or_keys, default)


def try_get(src, getter, expected_type=None):
    if not isinstance(getter, (list, tuple)):
        getter = [getter]
    for get in getter:
        try:
            v = get(src)
        except (AttributeError, KeyError, TypeError, IndexError):
            pass
        else:
            if expected_type is None or isinstance(v, expected_type):
                return v


def merge_dicts(*dicts):
    merged = {}
    for a_dict in dicts:
        for k, v in a_dict.items():
            if v is None:
                continue
            if (k not in merged or
                    (isinstance(v, compat_str) and v and
                        isinstance(merged[k], compat_str) and
                        not merged[k])):
                merged[k] = v
    return merged


def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
    return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)


US_RATINGS = {
    'G': 0,
    'PG': 10,
    'PG-13': 13,
    'R': 16,
    'NC': 18,
}


TV_PARENTAL_GUIDELINES = {
    'TV-Y': 0,
    'TV-Y7': 7,
    'TV-G': 0,
    'TV-PG': 0,
    'TV-14': 14,
    'TV-MA': 17,
}


def parse_age_limit(s):
    if type(s) == int:
        return s if 0 <= s <= 21 else None
    if not isinstance(s, compat_basestring):
        return None
    m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
    if m:
        return int(m.group('age'))
    if s in US_RATINGS:
        return US_RATINGS[s]
    m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
    if m:
        return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
    return None


def strip_jsonp(code):
    return re.sub(
        r'''(?sx)^
            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
            (?:\s*&&\s*(?P=func_name))?
            \s*\(\s*(?P<callback_data>.*)\);?
            \s*?(?://[^\n]*)*$''',
        r'\g<callback_data>', code)


def js_to_json(code):
    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
    SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
    INTEGER_TABLE = (
        (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
        (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
    )

    def fix_kv(m):
        v = m.group(0)
        if v in ('true', 'false', 'null'):
            return v
        elif v.startswith('/*') or v.startswith('//') or v == ',':
            return ""

        if v[0] in ("'", '"'):
            v = re.sub(r'(?s)\\.|"', lambda m: {
                '"': '\\"',
                "\\'": "'",
                '\\\n': '',
                '\\x': '\\u00',
            }.get(m.group(0), m.group(0)), v[1:-1])

        for regex, base in INTEGER_TABLE:
            im = re.match(regex, v)
            if im:
                i = int(im.group(1), base)
                return '"%d":' % i if v.endswith(':') else '%d' % i

        return '"%s"' % v

    return re.sub(r'''(?sx)
        "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
        '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
        {comment}|,(?={skip}[\]}}])|
        (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
        [0-9]+(?={skip}:)
        '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)


def qualities(quality_ids):
    """ Get a numeric quality value out of a list of possible values """
    def q(qid):
        try:
            return quality_ids.index(qid)
        except ValueError:
            return -1
    return q


DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'


def limit_length(s, length):
    """ Add ellipses to overly long strings """
    if s is None:
        return None
    ELLIPSES = '...'
    if len(s) > length:
        return s[:length - len(ELLIPSES)] + ELLIPSES
    return s


def version_tuple(v):
    return tuple(int(e) for e in re.split(r'[-.]', v))


def is_outdated_version(version, limit, assume_new=True):
    if not version:
        return not assume_new
    try:
        return version_tuple(version) < version_tuple(limit)
    except ValueError:
        return not assume_new


def ytdl_is_updateable():
    """ Returns if youtube-dl can be updated with -U """
    from zipimport import zipimporter

    return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')


def args_to_str(args):
    # Get a short string representation for a subprocess command
    return ' '.join(compat_shlex_quote(a) for a in args)


def error_to_compat_str(err):
    err_str = str(err)
    # On python 2 error byte string must be decoded with proper
    # encoding rather than ascii
    if sys.version_info[0] < 3:
        err_str = err_str.decode(preferredencoding())
    return err_str


def mimetype2ext(mt):
    if mt is None:
        return None

    ext = {
        'audio/mp4': 'm4a',
        # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
        # it's the most popular one
        'audio/mpeg': 'mp3',
    }.get(mt)
    if ext is not None:
        return ext

    _, _, res = mt.rpartition('/')
    res = res.split(';')[0].strip().lower()

    return {
        '3gpp': '3gp',
        'smptett+xml': 'tt',
        'ttaf+xml': 'dfxp',
        'ttml+xml': 'ttml',
        'x-flv': 'flv',
        'x-mp4-fragmented': 'mp4',
        'x-ms-sami': 'sami',
        'x-ms-wmv': 'wmv',
        'mpegurl': 'm3u8',
        'x-mpegurl': 'm3u8',
        'vnd.apple.mpegurl': 'm3u8',
        'dash+xml': 'mpd',
        'f4m+xml': 'f4m',
        'hds+xml': 'f4m',
        'vnd.ms-sstr+xml': 'ism',
        'quicktime': 'mov',
        'mp2t': 'ts',
    }.get(res, res)


def parse_codecs(codecs_str):
    # http://tools.ietf.org/html/rfc6381
    if not codecs_str:
        return {}
    splited_codecs = list(filter(None, map(
        lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
    vcodec, acodec = None, None
    for full_codec in splited_codecs:
        codec = full_codec.split('.')[0]
        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
            if not vcodec:
                vcodec = full_codec
        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
            if not acodec:
                acodec = full_codec
        else:
            write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
    if not vcodec and not acodec:
        if len(splited_codecs) == 2:
            return {
                'vcodec': vcodec,
                'acodec': acodec,
            }
        elif len(splited_codecs) == 1:
            return {
                'vcodec': 'none',
                'acodec': vcodec,
            }
    else:
        return {
            'vcodec': vcodec or 'none',
            'acodec': acodec or 'none',
        }
    return {}


def urlhandle_detect_ext(url_handle):
    getheader = url_handle.headers.get

    cd = getheader('Content-Disposition')
    if cd:
        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
        if m:
            e = determine_ext(m.group('filename'), default_ext=None)
            if e:
                return e

    return mimetype2ext(getheader('Content-Type'))


def encode_data_uri(data, mime_type):
    return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))


def age_restricted(content_limit, age_limit):
    """ Returns True iff the content should be blocked """

    if age_limit is None:  # No limit set
        return False
    if content_limit is None:
        return False  # Content available for everyone
    return age_limit < content_limit


def is_html(first_bytes):
    """ Detect whether a file contains HTML by examining its first bytes. """

    BOMS = [
        (b'\xef\xbb\xbf', 'utf-8'),
        (b'\x00\x00\xfe\xff', 'utf-32-be'),
        (b'\xff\xfe\x00\x00', 'utf-32-le'),
        (b'\xff\xfe', 'utf-16-le'),
        (b'\xfe\xff', 'utf-16-be'),
    ]
    for bom, enc in BOMS:
        if first_bytes.startswith(bom):
            s = first_bytes[len(bom):].decode(enc, 'replace')
            break
    else:
        s = first_bytes.decode('utf-8', 'replace')

    return re.match(r'^\s*<', s)


def determine_protocol(info_dict):
    protocol = info_dict.get('protocol')
    if protocol is not None:
        return protocol

    url = info_dict['url']
    if url.startswith('rtmp'):
        return 'rtmp'
    elif url.startswith('mms'):
        return 'mms'
    elif url.startswith('rtsp'):
        return 'rtsp'

    ext = determine_ext(url)
    if ext == 'm3u8':
        return 'm3u8'
    elif ext == 'f4m':
        return 'f4m'

    return compat_urllib_parse_urlparse(url).scheme


def render_table(header_row, data):
    """ Render a list of rows, each as a list of values """
    table = [header_row] + data
    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
    return '\n'.join(format_str % tuple(row) for row in table)


def _match_one(filter_part, dct):
    COMPARISON_OPERATORS = {
        '<': operator.lt,
        '<=': operator.le,
        '>': operator.gt,
        '>=': operator.ge,
        '=': operator.eq,
        '!=': operator.ne,
    }
    operator_rex = re.compile(r'''(?x)\s*
        (?P<key>[a-z_]+)
        \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
        (?:
            (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
            (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
            (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
        )
        \s*$
        ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
    m = operator_rex.search(filter_part)
    if m:
        op = COMPARISON_OPERATORS[m.group('op')]
        actual_value = dct.get(m.group('key'))
        if (m.group('quotedstrval') is not None or
            m.group('strval') is not None or
            # If the original field is a string and matching comparisonvalue is
            # a number we should respect the origin of the original field
            # and process comparison value as a string (see
            # https://github.com/ytdl-org/youtube-dl/issues/11082).
            actual_value is not None and m.group('intval') is not None and
                isinstance(actual_value, compat_str)):
            if m.group('op') not in ('=', '!='):
                raise ValueError(
                    'Operator %s does not support string values!' % m.group('op'))
            comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
            quote = m.group('quote')
            if quote is not None:
                comparison_value = comparison_value.replace(r'\%s' % quote, quote)
        else:
            try:
                comparison_value = int(m.group('intval'))
            except ValueError:
                comparison_value = parse_filesize(m.group('intval'))
                if comparison_value is None:
                    comparison_value = parse_filesize(m.group('intval') + 'B')
                if comparison_value is None:
                    raise ValueError(
                        'Invalid integer value %r in filter part %r' % (
                            m.group('intval'), filter_part))
        if actual_value is None:
            return m.group('none_inclusive')
        return op(actual_value, comparison_value)

    UNARY_OPERATORS = {
        '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
        '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
    }
    operator_rex = re.compile(r'''(?x)\s*
        (?P<op>%s)\s*(?P<key>[a-z_]+)
        \s*$
        ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
    m = operator_rex.search(filter_part)
    if m:
        op = UNARY_OPERATORS[m.group('op')]
        actual_value = dct.get(m.group('key'))
        return op(actual_value)

    raise ValueError('Invalid filter part %r' % filter_part)


def match_str(filter_str, dct):
    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """

    return all(
        _match_one(filter_part, dct) for filter_part in filter_str.split('&'))


def match_filter_func(filter_str):
    def _match_func(info_dict):
        if match_str(filter_str, info_dict):
            return None
        else:
            video_title = info_dict.get('title', info_dict.get('id', 'video'))
            return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
    return _match_func


def parse_dfxp_time_expr(time_expr):
    if not time_expr:
        return

    mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
    if mobj:
        return float(mobj.group('time_offset'))

    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
    if mobj:
        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))


def srt_subtitles_timecode(seconds):
    return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)


def dfxp2srt(dfxp_data):
    '''
    @param dfxp_data A bytes-like object containing DFXP data
    @returns A unicode object containing converted SRT data
    '''
    LEGACY_NAMESPACES = (
        (b'http://www.w3.org/ns/ttml', [
            b'http://www.w3.org/2004/11/ttaf1',
            b'http://www.w3.org/2006/04/ttaf1',
            b'http://www.w3.org/2006/10/ttaf1',
        ]),
        (b'http://www.w3.org/ns/ttml#styling', [
            b'http://www.w3.org/ns/ttml#style',
        ]),
    )

    SUPPORTED_STYLING = [
        'color',
        'fontFamily',
        'fontSize',
        'fontStyle',
        'fontWeight',
        'textDecoration'
    ]

    _x = functools.partial(xpath_with_ns, ns_map={
        'xml': 'http://www.w3.org/XML/1998/namespace',
        'ttml': 'http://www.w3.org/ns/ttml',
        'tts': 'http://www.w3.org/ns/ttml#styling',
    })

    styles = {}
    default_style = {}

    class TTMLPElementParser(object):
        _out = ''
        _unclosed_elements = []
        _applied_styles = []

        def start(self, tag, attrib):
            if tag in (_x('ttml:br'), 'br'):
                self._out += '\n'
            else:
                unclosed_elements = []
                style = {}
                element_style_id = attrib.get('style')
                if default_style:
                    style.update(default_style)
                if element_style_id:
                    style.update(styles.get(element_style_id, {}))
                for prop in SUPPORTED_STYLING:
                    prop_val = attrib.get(_x('tts:' + prop))
                    if prop_val:
                        style[prop] = prop_val
                if style:
                    font = ''
                    for k, v in sorted(style.items()):
                        if self._applied_styles and self._applied_styles[-1].get(k) == v:
                            continue
                        if k == 'color':
                            font += ' color="%s"' % v
                        elif k == 'fontSize':
                            font += ' size="%s"' % v
                        elif k == 'fontFamily':
                            font += ' face="%s"' % v
                        elif k == 'fontWeight' and v == 'bold':
                            self._out += '<b>'
                            unclosed_elements.append('b')
                        elif k == 'fontStyle' and v == 'italic':
                            self._out += '<i>'
                            unclosed_elements.append('i')
                        elif k == 'textDecoration' and v == 'underline':
                            self._out += '<u>'
                            unclosed_elements.append('u')
                    if font:
                        self._out += '<font' + font + '>'
                        unclosed_elements.append('font')
                    applied_style = {}
                    if self._applied_styles:
                        applied_style.update(self._applied_styles[-1])
                    applied_style.update(style)
                    self._applied_styles.append(applied_style)
                self._unclosed_elements.append(unclosed_elements)

        def end(self, tag):
            if tag not in (_x('ttml:br'), 'br'):
                unclosed_elements = self._unclosed_elements.pop()
                for element in reversed(unclosed_elements):
                    self._out += '</%s>' % element
                if unclosed_elements and self._applied_styles:
                    self._applied_styles.pop()

        def data(self, data):
            self._out += data

        def close(self):
            return self._out.strip()

    def parse_node(node):
        target = TTMLPElementParser()
        parser = xml.etree.ElementTree.XMLParser(target=target)
        parser.feed(xml.etree.ElementTree.tostring(node))
        return parser.close()

    for k, v in LEGACY_NAMESPACES:
        for ns in v:
            dfxp_data = dfxp_data.replace(ns, k)

    dfxp = compat_etree_fromstring(dfxp_data)
    out = []
    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')

    if not paras:
        raise ValueError('Invalid dfxp/TTML subtitle')

    repeat = False
    while True:
        for style in dfxp.findall(_x('.//ttml:style')):
            style_id = style.get('id') or style.get(_x('xml:id'))
            if not style_id:
                continue
            parent_style_id = style.get('style')
            if parent_style_id:
                if parent_style_id not in styles:
                    repeat = True
                    continue
                styles[style_id] = styles[parent_style_id].copy()
            for prop in SUPPORTED_STYLING:
                prop_val = style.get(_x('tts:' + prop))
                if prop_val:
                    styles.setdefault(style_id, {})[prop] = prop_val
        if repeat:
            repeat = False
        else:
            break

    for p in ('body', 'div'):
        ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
        if ele is None:
            continue
        style = styles.get(ele.get('style'))
        if not style:
            continue
        default_style.update(style)

    for para, index in zip(paras, itertools.count(1)):
        begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
        end_time = parse_dfxp_time_expr(para.attrib.get('end'))
        dur = parse_dfxp_time_expr(para.attrib.get('dur'))
        if begin_time is None:
            continue
        if not end_time:
            if not dur:
                continue
            end_time = begin_time + dur
        out.append('%d\n%s --> %s\n%s\n\n' % (
            index,
            srt_subtitles_timecode(begin_time),
            srt_subtitles_timecode(end_time),
            parse_node(para)))

    return ''.join(out)


def cli_option(params, command_option, param):
    param = params.get(param)
    if param:
        param = compat_str(param)
    return [command_option, param] if param is not None else []


def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
    param = params.get(param)
    if param is None:
        return []
    assert isinstance(param, bool)
    if separator:
        return [command_option + separator + (true_value if param else false_value)]
    return [command_option, true_value if param else false_value]


def cli_valueless_option(params, command_option, param, expected_value=True):
    param = params.get(param)
    return [command_option] if param == expected_value else []


def cli_configuration_args(params, param, default=[]):
    ex_args = params.get(param)
    if ex_args is None:
        return default
    assert isinstance(ex_args, list)
    return ex_args


class ISO639Utils(object):
    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
    _lang_map = {
        'aa': 'aar',
        'ab': 'abk',
        'ae': 'ave',
        'af': 'afr',
        'ak': 'aka',
        'am': 'amh',
        'an': 'arg',
        'ar': 'ara',
        'as': 'asm',
        'av': 'ava',
        'ay': 'aym',
        'az': 'aze',
        'ba': 'bak',
        'be': 'bel',
        'bg': 'bul',
        'bh': 'bih',
        'bi': 'bis',
        'bm': 'bam',
        'bn': 'ben',
        'bo': 'bod',
        'br': 'bre',
        'bs': 'bos',
        'ca': 'cat',
        'ce': 'che',
        'ch': 'cha',
        'co': 'cos',
        'cr': 'cre',
        'cs': 'ces',
        'cu': 'chu',
        'cv': 'chv',
        'cy': 'cym',
        'da': 'dan',
        'de': 'deu',
        'dv': 'div',
        'dz': 'dzo',
        'ee': 'ewe',
        'el': 'ell',
        'en': 'eng',
        'eo': 'epo',
        'es': 'spa',
        'et': 'est',
        'eu': 'eus',
        'fa': 'fas',
        'ff': 'ful',
        'fi': 'fin',
        'fj': 'fij',
        'fo': 'fao',
        'fr': 'fra',
        'fy': 'fry',
        'ga': 'gle',
        'gd': 'gla',
        'gl': 'glg',
        'gn': 'grn',
        'gu': 'guj',
        'gv': 'glv',
        'ha': 'hau',
        'he': 'heb',
        'iw': 'heb',  # Replaced by he in 1989 revision
        'hi': 'hin',
        'ho': 'hmo',
        'hr': 'hrv',
        'ht': 'hat',
        'hu': 'hun',
        'hy': 'hye',
        'hz': 'her',
        'ia': 'ina',
        'id': 'ind',
        'in': 'ind',  # Replaced by id in 1989 revision
        'ie': 'ile',
        'ig': 'ibo',
        'ii': 'iii',
        'ik': 'ipk',
        'io': 'ido',
        'is': 'isl',
        'it': 'ita',
        'iu': 'iku',
        'ja': 'jpn',
        'jv': 'jav',
        'ka': 'kat',
        'kg': 'kon',
        'ki': 'kik',
        'kj': 'kua',
        'kk': 'kaz',
        'kl': 'kal',
        'km': 'khm',
        'kn': 'kan',
        'ko': 'kor',
        'kr': 'kau',
        'ks': 'kas',
        'ku': 'kur',
        'kv': 'kom',
        'kw': 'cor',
        'ky': 'kir',
        'la': 'lat',
        'lb': 'ltz',
        'lg': 'lug',
        'li': 'lim',
        'ln': 'lin',
        'lo': 'lao',
        'lt': 'lit',
        'lu': 'lub',
        'lv': 'lav',
        'mg': 'mlg',
        'mh': 'mah',
        'mi': 'mri',
        'mk': 'mkd',
        'ml': 'mal',
        'mn': 'mon',
        'mr': 'mar',
        'ms': 'msa',
        'mt': 'mlt',
        'my': 'mya',
        'na': 'nau',
        'nb': 'nob',
        'nd': 'nde',
        'ne': 'nep',
        'ng': 'ndo',
        'nl': 'nld',
        'nn': 'nno',
        'no': 'nor',
        'nr': 'nbl',
        'nv': 'nav',
        'ny': 'nya',
        'oc': 'oci',
        'oj': 'oji',
        'om': 'orm',
        'or': 'ori',
        'os': 'oss',
        'pa': 'pan',
        'pi': 'pli',
        'pl': 'pol',
        'ps': 'pus',
        'pt': 'por',
        'qu': 'que',
        'rm': 'roh',
        'rn': 'run',
        'ro': 'ron',
        'ru': 'rus',
        'rw': 'kin',
        'sa': 'san',
        'sc': 'srd',
        'sd': 'snd',
        'se': 'sme',
        'sg': 'sag',
        'si': 'sin',
        'sk': 'slk',
        'sl': 'slv',
        'sm': 'smo',
        'sn': 'sna',
        'so': 'som',
        'sq': 'sqi',
        'sr': 'srp',
        'ss': 'ssw',
        'st': 'sot',
        'su': 'sun',
        'sv': 'swe',
        'sw': 'swa',
        'ta': 'tam',
        'te': 'tel',
        'tg': 'tgk',
        'th': 'tha',
        'ti': 'tir',
        'tk': 'tuk',
        'tl': 'tgl',
        'tn': 'tsn',
        'to': 'ton',
        'tr': 'tur',
        'ts': 'tso',
        'tt': 'tat',
        'tw': 'twi',
        'ty': 'tah',
        'ug': 'uig',
        'uk': 'ukr',
        'ur': 'urd',
        'uz': 'uzb',
        've': 'ven',
        'vi': 'vie',
        'vo': 'vol',
        'wa': 'wln',
        'wo': 'wol',
        'xh': 'xho',
        'yi': 'yid',
        'ji': 'yid',  # Replaced by yi in 1989 revision
        'yo': 'yor',
        'za': 'zha',
        'zh': 'zho',
        'zu': 'zul',
    }

    @classmethod
    def short2long(cls, code):
        """Convert language code from ISO 639-1 to ISO 639-2/T"""
        return cls._lang_map.get(code[:2])

    @classmethod
    def long2short(cls, code):
        """Convert language code from ISO 639-2/T to ISO 639-1"""
        for short_name, long_name in cls._lang_map.items():
            if long_name == code:
                return short_name


class ISO3166Utils(object):
    # From http://data.okfn.org/data/core/country-list
    _country_map = {
        'AF': 'Afghanistan',
        'AX': 'Åland Islands',
        'AL': 'Albania',
        'DZ': 'Algeria',
        'AS': 'American Samoa',
        'AD': 'Andorra',
        'AO': 'Angola',
        'AI': 'Anguilla',
        'AQ': 'Antarctica',
        'AG': 'Antigua and Barbuda',
        'AR': 'Argentina',
        'AM': 'Armenia',
        'AW': 'Aruba',
        'AU': 'Australia',
        'AT': 'Austria',
        'AZ': 'Azerbaijan',
        'BS': 'Bahamas',
        'BH': 'Bahrain',
        'BD': 'Bangladesh',
        'BB': 'Barbados',
        'BY': 'Belarus',
        'BE': 'Belgium',
        'BZ': 'Belize',
        'BJ': 'Benin',
        'BM': 'Bermuda',
        'BT': 'Bhutan',
        'BO': 'Bolivia, Plurinational State of',
        'BQ': 'Bonaire, Sint Eustatius and Saba',
        'BA': 'Bosnia and Herzegovina',
        'BW': 'Botswana',
        'BV': 'Bouvet Island',
        'BR': 'Brazil',
        'IO': 'British Indian Ocean Territory',
        'BN': 'Brunei Darussalam',
        'BG': 'Bulgaria',
        'BF': 'Burkina Faso',
        'BI': 'Burundi',
        'KH': 'Cambodia',
        'CM': 'Cameroon',
        'CA': 'Canada',
        'CV': 'Cape Verde',
        'KY': 'Cayman Islands',
        'CF': 'Central African Republic',
        'TD': 'Chad',
        'CL': 'Chile',
        'CN': 'China',
        'CX': 'Christmas Island',
        'CC': 'Cocos (Keeling) Islands',
        'CO': 'Colombia',
        'KM': 'Comoros',
        'CG': 'Congo',
        'CD': 'Congo, the Democratic Republic of the',
        'CK': 'Cook Islands',
        'CR': 'Costa Rica',
        'CI': 'Côte d\'Ivoire',
        'HR': 'Croatia',
        'CU': 'Cuba',
        'CW': 'Curaçao',
        'CY': 'Cyprus',
        'CZ': 'Czech Republic',
        'DK': 'Denmark',
        'DJ': 'Djibouti',
        'DM': 'Dominica',
        'DO': 'Dominican Republic',
        'EC': 'Ecuador',
        'EG': 'Egypt',
        'SV': 'El Salvador',
        'GQ': 'Equatorial Guinea',
        'ER': 'Eritrea',
        'EE': 'Estonia',
        'ET': 'Ethiopia',
        'FK': 'Falkland Islands (Malvinas)',
        'FO': 'Faroe Islands',
        'FJ': 'Fiji',
        'FI': 'Finland',
        'FR': 'France',
        'GF': 'French Guiana',
        'PF': 'French Polynesia',
        'TF': 'French Southern Territories',
        'GA': 'Gabon',
        'GM': 'Gambia',
        'GE': 'Georgia',
        'DE': 'Germany',
        'GH': 'Ghana',
        'GI': 'Gibraltar',
        'GR': 'Greece',
        'GL': 'Greenland',
        'GD': 'Grenada',
        'GP': 'Guadeloupe',
        'GU': 'Guam',
        'GT': 'Guatemala',
        'GG': 'Guernsey',
        'GN': 'Guinea',
        'GW': 'Guinea-Bissau',
        'GY': 'Guyana',
        'HT': 'Haiti',
        'HM': 'Heard Island and McDonald Islands',
        'VA': 'Holy See (Vatican City State)',
        'HN': 'Honduras',
        'HK': 'Hong Kong',
        'HU': 'Hungary',
        'IS': 'Iceland',
        'IN': 'India',
        'ID': 'Indonesia',
        'IR': 'Iran, Islamic Republic of',
        'IQ': 'Iraq',
        'IE': 'Ireland',
        'IM': 'Isle of Man',
        'IL': 'Israel',
        'IT': 'Italy',
        'JM': 'Jamaica',
        'JP': 'Japan',
        'JE': 'Jersey',
        'JO': 'Jordan',
        'KZ': 'Kazakhstan',
        'KE': 'Kenya',
        'KI': 'Kiribati',
        'KP': 'Korea, Democratic People\'s Republic of',
        'KR': 'Korea, Republic of',
        'KW': 'Kuwait',
        'KG': 'Kyrgyzstan',
        'LA': 'Lao People\'s Democratic Republic',
        'LV': 'Latvia',
        'LB': 'Lebanon',
        'LS': 'Lesotho',
        'LR': 'Liberia',
        'LY': 'Libya',
        'LI': 'Liechtenstein',
        'LT': 'Lithuania',
        'LU': 'Luxembourg',
        'MO': 'Macao',
        'MK': 'Macedonia, the Former Yugoslav Republic of',
        'MG': 'Madagascar',
        'MW': 'Malawi',
        'MY': 'Malaysia',
        'MV': 'Maldives',
        'ML': 'Mali',
        'MT': 'Malta',
        'MH': 'Marshall Islands',
        'MQ': 'Martinique',
        'MR': 'Mauritania',
        'MU': 'Mauritius',
        'YT': 'Mayotte',
        'MX': 'Mexico',
        'FM': 'Micronesia, Federated States of',
        'MD': 'Moldova, Republic of',
        'MC': 'Monaco',
        'MN': 'Mongolia',
        'ME': 'Montenegro',
        'MS': 'Montserrat',
        'MA': 'Morocco',
        'MZ': 'Mozambique',
        'MM': 'Myanmar',
        'NA': 'Namibia',
        'NR': 'Nauru',
        'NP': 'Nepal',
        'NL': 'Netherlands',
        'NC': 'New Caledonia',
        'NZ': 'New Zealand',
        'NI': 'Nicaragua',
        'NE': 'Niger',
        'NG': 'Nigeria',
        'NU': 'Niue',
        'NF': 'Norfolk Island',
        'MP': 'Northern Mariana Islands',
        'NO': 'Norway',
        'OM': 'Oman',
        'PK': 'Pakistan',
        'PW': 'Palau',
        'PS': 'Palestine, State of',
        'PA': 'Panama',
        'PG': 'Papua New Guinea',
        'PY': 'Paraguay',
        'PE': 'Peru',
        'PH': 'Philippines',
        'PN': 'Pitcairn',
        'PL': 'Poland',
        'PT': 'Portugal',
        'PR': 'Puerto Rico',
        'QA': 'Qatar',
        'RE': 'Réunion',
        'RO': 'Romania',
        'RU': 'Russian Federation',
        'RW': 'Rwanda',
        'BL': 'Saint Barthélemy',
        'SH': 'Saint Helena, Ascension and Tristan da Cunha',
        'KN': 'Saint Kitts and Nevis',
        'LC': 'Saint Lucia',
        'MF': 'Saint Martin (French part)',
        'PM': 'Saint Pierre and Miquelon',
        'VC': 'Saint Vincent and the Grenadines',
        'WS': 'Samoa',
        'SM': 'San Marino',
        'ST': 'Sao Tome and Principe',
        'SA': 'Saudi Arabia',
        'SN': 'Senegal',
        'RS': 'Serbia',
        'SC': 'Seychelles',
        'SL': 'Sierra Leone',
        'SG': 'Singapore',
        'SX': 'Sint Maarten (Dutch part)',
        'SK': 'Slovakia',
        'SI': 'Slovenia',
        'SB': 'Solomon Islands',
        'SO': 'Somalia',
        'ZA': 'South Africa',
        'GS': 'South Georgia and the South Sandwich Islands',
        'SS': 'South Sudan',
        'ES': 'Spain',
        'LK': 'Sri Lanka',
        'SD': 'Sudan',
        'SR': 'Suriname',
        'SJ': 'Svalbard and Jan Mayen',
        'SZ': 'Swaziland',
        'SE': 'Sweden',
        'CH': 'Switzerland',
        'SY': 'Syrian Arab Republic',
        'TW': 'Taiwan, Province of China',
        'TJ': 'Tajikistan',
        'TZ': 'Tanzania, United Republic of',
        'TH': 'Thailand',
        'TL': 'Timor-Leste',
        'TG': 'Togo',
        'TK': 'Tokelau',
        'TO': 'Tonga',
        'TT': 'Trinidad and Tobago',
        'TN': 'Tunisia',
        'TR': 'Turkey',
        'TM': 'Turkmenistan',
        'TC': 'Turks and Caicos Islands',
        'TV': 'Tuvalu',
        'UG': 'Uganda',
        'UA': 'Ukraine',
        'AE': 'United Arab Emirates',
        'GB': 'United Kingdom',
        'US': 'United States',
        'UM': 'United States Minor Outlying Islands',
        'UY': 'Uruguay',
        'UZ': 'Uzbekistan',
        'VU': 'Vanuatu',
        'VE': 'Venezuela, Bolivarian Republic of',
        'VN': 'Viet Nam',
        'VG': 'Virgin Islands, British',
        'VI': 'Virgin Islands, U.S.',
        'WF': 'Wallis and Futuna',
        'EH': 'Western Sahara',
        'YE': 'Yemen',
        'ZM': 'Zambia',
        'ZW': 'Zimbabwe',
    }

    @classmethod
    def short2full(cls, code):
        """Convert an ISO 3166-2 country code to the corresponding full name"""
        return cls._country_map.get(code.upper())


class GeoUtils(object):
    # Major IPv4 address blocks per country
    _country_ip_map = {
        'AD': '85.94.160.0/19',
        'AE': '94.200.0.0/13',
        'AF': '149.54.0.0/17',
        'AG': '209.59.64.0/18',
        'AI': '204.14.248.0/21',
        'AL': '46.99.0.0/16',
        'AM': '46.70.0.0/15',
        'AO': '105.168.0.0/13',
        'AP': '159.117.192.0/21',
        'AR': '181.0.0.0/12',
        'AS': '202.70.112.0/20',
        'AT': '84.112.0.0/13',
        'AU': '1.128.0.0/11',
        'AW': '181.41.0.0/18',
        'AZ': '5.191.0.0/16',
        'BA': '31.176.128.0/17',
        'BB': '65.48.128.0/17',
        'BD': '114.130.0.0/16',
        'BE': '57.0.0.0/8',
        'BF': '129.45.128.0/17',
        'BG': '95.42.0.0/15',
        'BH': '37.131.0.0/17',
        'BI': '154.117.192.0/18',
        'BJ': '137.255.0.0/16',
        'BL': '192.131.134.0/24',
        'BM': '196.12.64.0/18',
        'BN': '156.31.0.0/16',
        'BO': '161.56.0.0/16',
        'BQ': '161.0.80.0/20',
        'BR': '152.240.0.0/12',
        'BS': '24.51.64.0/18',
        'BT': '119.2.96.0/19',
        'BW': '168.167.0.0/16',
        'BY': '178.120.0.0/13',
        'BZ': '179.42.192.0/18',
        'CA': '99.224.0.0/11',
        'CD': '41.243.0.0/16',
        'CF': '196.32.200.0/21',
        'CG': '197.214.128.0/17',
        'CH': '85.0.0.0/13',
        'CI': '154.232.0.0/14',
        'CK': '202.65.32.0/19',
        'CL': '152.172.0.0/14',
        'CM': '165.210.0.0/15',
        'CN': '36.128.0.0/10',
        'CO': '181.240.0.0/12',
        'CR': '201.192.0.0/12',
        'CU': '152.206.0.0/15',
        'CV': '165.90.96.0/19',
        'CW': '190.88.128.0/17',
        'CY': '46.198.0.0/15',
        'CZ': '88.100.0.0/14',
        'DE': '53.0.0.0/8',
        'DJ': '197.241.0.0/17',
        'DK': '87.48.0.0/12',
        'DM': '192.243.48.0/20',
        'DO': '152.166.0.0/15',
        'DZ': '41.96.0.0/12',
        'EC': '186.68.0.0/15',
        'EE': '90.190.0.0/15',
        'EG': '156.160.0.0/11',
        'ER': '196.200.96.0/20',
        'ES': '88.0.0.0/11',
        'ET': '196.188.0.0/14',
        'EU': '2.16.0.0/13',
        'FI': '91.152.0.0/13',
        'FJ': '144.120.0.0/16',
        'FM': '119.252.112.0/20',
        'FO': '88.85.32.0/19',
        'FR': '90.0.0.0/9',
        'GA': '41.158.0.0/15',
        'GB': '25.0.0.0/8',
        'GD': '74.122.88.0/21',
        'GE': '31.146.0.0/16',
        'GF': '161.22.64.0/18',
        'GG': '62.68.160.0/19',
        'GH': '45.208.0.0/14',
        'GI': '85.115.128.0/19',
        'GL': '88.83.0.0/19',
        'GM': '160.182.0.0/15',
        'GN': '197.149.192.0/18',
        'GP': '104.250.0.0/19',
        'GQ': '105.235.224.0/20',
        'GR': '94.64.0.0/13',
        'GT': '168.234.0.0/16',
        'GU': '168.123.0.0/16',
        'GW': '197.214.80.0/20',
        'GY': '181.41.64.0/18',
        'HK': '113.252.0.0/14',
        'HN': '181.210.0.0/16',
        'HR': '93.136.0.0/13',
        'HT': '148.102.128.0/17',
        'HU': '84.0.0.0/14',
        'ID': '39.192.0.0/10',
        'IE': '87.32.0.0/12',
        'IL': '79.176.0.0/13',
        'IM': '5.62.80.0/20',
        'IN': '117.192.0.0/10',
        'IO': '203.83.48.0/21',
        'IQ': '37.236.0.0/14',
        'IR': '2.176.0.0/12',
        'IS': '82.221.0.0/16',
        'IT': '79.0.0.0/10',
        'JE': '87.244.64.0/18',
        'JM': '72.27.0.0/17',
        'JO': '176.29.0.0/16',
        'JP': '126.0.0.0/8',
        'KE': '105.48.0.0/12',
        'KG': '158.181.128.0/17',
        'KH': '36.37.128.0/17',
        'KI': '103.25.140.0/22',
        'KM': '197.255.224.0/20',
        'KN': '198.32.32.0/19',
        'KP': '175.45.176.0/22',
        'KR': '175.192.0.0/10',
        'KW': '37.36.0.0/14',
        'KY': '64.96.0.0/15',
        'KZ': '2.72.0.0/13',
        'LA': '115.84.64.0/18',
        'LB': '178.135.0.0/16',
        'LC': '192.147.231.0/24',
        'LI': '82.117.0.0/19',
        'LK': '112.134.0.0/15',
        'LR': '41.86.0.0/19',
        'LS': '129.232.0.0/17',
        'LT': '78.56.0.0/13',
        'LU': '188.42.0.0/16',
        'LV': '46.109.0.0/16',
        'LY': '41.252.0.0/14',
        'MA': '105.128.0.0/11',
        'MC': '88.209.64.0/18',
        'MD': '37.246.0.0/16',
        'ME': '178.175.0.0/17',
        'MF': '74.112.232.0/21',
        'MG': '154.126.0.0/17',
        'MH': '117.103.88.0/21',
        'MK': '77.28.0.0/15',
        'ML': '154.118.128.0/18',
        'MM': '37.111.0.0/17',
        'MN': '49.0.128.0/17',
        'MO': '60.246.0.0/16',
        'MP': '202.88.64.0/20',
        'MQ': '109.203.224.0/19',
        'MR': '41.188.64.0/18',
        'MS': '208.90.112.0/22',
        'MT': '46.11.0.0/16',
        'MU': '105.16.0.0/12',
        'MV': '27.114.128.0/18',
        'MW': '105.234.0.0/16',
        'MX': '187.192.0.0/11',
        'MY': '175.136.0.0/13',
        'MZ': '197.218.0.0/15',
        'NA': '41.182.0.0/16',
        'NC': '101.101.0.0/18',
        'NE': '197.214.0.0/18',
        'NF': '203.17.240.0/22',
        'NG': '105.112.0.0/12',
        'NI': '186.76.0.0/15',
        'NL': '145.96.0.0/11',
        'NO': '84.208.0.0/13',
        'NP': '36.252.0.0/15',
        'NR': '203.98.224.0/19',
        'NU': '49.156.48.0/22',
        'NZ': '49.224.0.0/14',
        'OM': '5.36.0.0/15',
        'PA': '186.72.0.0/15',
        'PE': '186.160.0.0/14',
        'PF': '123.50.64.0/18',
        'PG': '124.240.192.0/19',
        'PH': '49.144.0.0/13',
        'PK': '39.32.0.0/11',
        'PL': '83.0.0.0/11',
        'PM': '70.36.0.0/20',
        'PR': '66.50.0.0/16',
        'PS': '188.161.0.0/16',
        'PT': '85.240.0.0/13',
        'PW': '202.124.224.0/20',
        'PY': '181.120.0.0/14',
        'QA': '37.210.0.0/15',
        'RE': '139.26.0.0/16',
        'RO': '79.112.0.0/13',
        'RS': '178.220.0.0/14',
        'RU': '5.136.0.0/13',
        'RW': '105.178.0.0/15',
        'SA': '188.48.0.0/13',
        'SB': '202.1.160.0/19',
        'SC': '154.192.0.0/11',
        'SD': '154.96.0.0/13',
        'SE': '78.64.0.0/12',
        'SG': '152.56.0.0/14',
        'SI': '188.196.0.0/14',
        'SK': '78.98.0.0/15',
        'SL': '197.215.0.0/17',
        'SM': '89.186.32.0/19',
        'SN': '41.82.0.0/15',
        'SO': '197.220.64.0/19',
        'SR': '186.179.128.0/17',
        'SS': '105.235.208.0/21',
        'ST': '197.159.160.0/19',
        'SV': '168.243.0.0/16',
        'SX': '190.102.0.0/20',
        'SY': '5.0.0.0/16',
        'SZ': '41.84.224.0/19',
        'TC': '65.255.48.0/20',
        'TD': '154.68.128.0/19',
        'TG': '196.168.0.0/14',
        'TH': '171.96.0.0/13',
        'TJ': '85.9.128.0/18',
        'TK': '27.96.24.0/21',
        'TL': '180.189.160.0/20',
        'TM': '95.85.96.0/19',
        'TN': '197.0.0.0/11',
        'TO': '175.176.144.0/21',
        'TR': '78.160.0.0/11',
        'TT': '186.44.0.0/15',
        'TV': '202.2.96.0/19',
        'TW': '120.96.0.0/11',
        'TZ': '156.156.0.0/14',
        'UA': '93.72.0.0/13',
        'UG': '154.224.0.0/13',
        'US': '3.0.0.0/8',
        'UY': '167.56.0.0/13',
        'UZ': '82.215.64.0/18',
        'VA': '212.77.0.0/19',
        'VC': '24.92.144.0/20',
        'VE': '186.88.0.0/13',
        'VG': '172.103.64.0/18',
        'VI': '146.226.0.0/16',
        'VN': '14.160.0.0/11',
        'VU': '202.80.32.0/20',
        'WF': '117.20.32.0/21',
        'WS': '202.4.32.0/19',
        'YE': '134.35.0.0/16',
        'YT': '41.242.116.0/22',
        'ZA': '41.0.0.0/11',
        'ZM': '165.56.0.0/13',
        'ZW': '41.85.192.0/19',
    }

    @classmethod
    def random_ipv4(cls, code_or_block):
        if len(code_or_block) == 2:
            block = cls._country_ip_map.get(code_or_block.upper())
            if not block:
                return None
        else:
            block = code_or_block
        addr, preflen = block.split('/')
        addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
        addr_max = addr_min | (0xffffffff >> int(preflen))
        return compat_str(socket.inet_ntoa(
            compat_struct_pack('!L', random.randint(addr_min, addr_max))))


class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
    def __init__(self, proxies=None):
        # Set default handlers
        for type in ('http', 'https'):
            setattr(self, '%s_open' % type,
                    lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
                        meth(r, proxy, type))
        compat_urllib_request.ProxyHandler.__init__(self, proxies)

    def proxy_open(self, req, proxy, type):
        req_proxy = req.headers.get('Ytdl-request-proxy')
        if req_proxy is not None:
            proxy = req_proxy
            del req.headers['Ytdl-request-proxy']

        if proxy == '__noproxy__':
            return None  # No Proxy
        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
            req.add_header('Ytdl-socks-proxy', proxy)
            # youtube-dl's http/https handlers do wrapping the socket with socks
            return None
        return compat_urllib_request.ProxyHandler.proxy_open(
            self, req, proxy, type)


# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
# released into Public Domain
# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387

def long_to_bytes(n, blocksize=0):
    """long_to_bytes(n:long, blocksize:int) : string
    Convert a long integer to a byte string.

    If optional blocksize is given and greater than zero, pad the front of the
    byte string with binary zeros so that the length is a multiple of
    blocksize.
    """
    # after much testing, this algorithm was deemed to be the fastest
    s = b''
    n = int(n)
    while n > 0:
        s = compat_struct_pack('>I', n & 0xffffffff) + s
        n = n >> 32
    # strip off leading zeros
    for i in range(len(s)):
        if s[i] != b'\000'[0]:
            break
    else:
        # only happens when n == 0
        s = b'\000'
        i = 0
    s = s[i:]
    # add back some pad bytes.  this could be done more efficiently w.r.t. the
    # de-padding being done above, but sigh...
    if blocksize > 0 and len(s) % blocksize:
        s = (blocksize - len(s) % blocksize) * b'\000' + s
    return s


def bytes_to_long(s):
    """bytes_to_long(string) : long
    Convert a byte string to a long integer.

    This is (essentially) the inverse of long_to_bytes().
    """
    acc = 0
    length = len(s)
    if length % 4:
        extra = (4 - length % 4)
        s = b'\000' * extra + s
        length = length + extra
    for i in range(0, length, 4):
        acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
    return acc


def ohdave_rsa_encrypt(data, exponent, modulus):
    '''
    Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/

    Input:
        data: data to encrypt, bytes-like object
        exponent, modulus: parameter e and N of RSA algorithm, both integer
    Output: hex string of encrypted data

    Limitation: supports one block encryption only
    '''

    payload = int(binascii.hexlify(data[::-1]), 16)
    encrypted = pow(payload, exponent, modulus)
    return '%x' % encrypted


def pkcs1pad(data, length):
    """
    Padding input data with PKCS#1 scheme

    @param {int[]} data        input data
    @param {int}   length      target length
    @returns {int[]}           padded data
    """
    if len(data) > length - 11:
        raise ValueError('Input data too long for PKCS#1 padding')

    pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
    return [0, 2] + pseudo_random + [0] + data


def encode_base_n(num, n, table=None):
    FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    if not table:
        table = FULL_TABLE[:n]

    if n > len(table):
        raise ValueError('base %d exceeds table length %d' % (n, len(table)))

    if num == 0:
        return table[0]

    ret = ''
    while num:
        ret = table[num % n] + ret
        num = num // n
    return ret


def decode_packed_codes(code):
    mobj = re.search(PACKED_CODES_RE, code)
    obfucasted_code, base, count, symbols = mobj.groups()
    base = int(base)
    count = int(count)
    symbols = symbols.split('|')
    symbol_table = {}

    while count:
        count -= 1
        base_n_count = encode_base_n(count, base)
        symbol_table[base_n_count] = symbols[count] or base_n_count

    return re.sub(
        r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
        obfucasted_code)


def parse_m3u8_attributes(attrib):
    info = {}
    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
        if val.startswith('"'):
            val = val[1:-1]
        info[key] = val
    return info


def urshift(val, n):
    return val >> n if val >= 0 else (val + 0x100000000) >> n


# Based on png2str() written by @gdkchan and improved by @yokrysty
# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
def decode_png(png_data):
    # Reference: https://www.w3.org/TR/PNG/
    header = png_data[8:]

    if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
        raise IOError('Not a valid PNG file.')

    int_map = {1: '>B', 2: '>H', 4: '>I'}
    unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]

    chunks = []

    while header:
        length = unpack_integer(header[:4])
        header = header[4:]

        chunk_type = header[:4]
        header = header[4:]

        chunk_data = header[:length]
        header = header[length:]

        header = header[4:]  # Skip CRC

        chunks.append({
            'type': chunk_type,
            'length': length,
            'data': chunk_data
        })

    ihdr = chunks[0]['data']

    width = unpack_integer(ihdr[:4])
    height = unpack_integer(ihdr[4:8])

    idat = b''

    for chunk in chunks:
        if chunk['type'] == b'IDAT':
            idat += chunk['data']

    if not idat:
        raise IOError('Unable to read PNG data.')

    decompressed_data = bytearray(zlib.decompress(idat))

    stride = width * 3
    pixels = []

    def _get_pixel(idx):
        x = idx % stride
        y = idx // stride
        return pixels[y][x]

    for y in range(height):
        basePos = y * (1 + stride)
        filter_type = decompressed_data[basePos]

        current_row = []

        pixels.append(current_row)

        for x in range(stride):
            color = decompressed_data[1 + basePos + x]
            basex = y * stride + x
            left = 0
            up = 0

            if x > 2:
                left = _get_pixel(basex - 3)
            if y > 0:
                up = _get_pixel(basex - stride)

            if filter_type == 1:  # Sub
                color = (color + left) & 0xff
            elif filter_type == 2:  # Up
                color = (color + up) & 0xff
            elif filter_type == 3:  # Average
                color = (color + ((left + up) >> 1)) & 0xff
            elif filter_type == 4:  # Paeth
                a = left
                b = up
                c = 0

                if x > 2 and y > 0:
                    c = _get_pixel(basex - stride - 3)

                p = a + b - c

                pa = abs(p - a)
                pb = abs(p - b)
                pc = abs(p - c)

                if pa <= pb and pa <= pc:
                    color = (color + a) & 0xff
                elif pb <= pc:
                    color = (color + b) & 0xff
                else:
                    color = (color + c) & 0xff

            current_row.append(color)

    return width, height, pixels


def write_xattr(path, key, value):
    # This mess below finds the best xattr tool for the job
    try:
        # try the pyxattr module...
        import xattr

        if hasattr(xattr, 'set'):  # pyxattr
            # Unicode arguments are not supported in python-pyxattr until
            # version 0.5.0
            # See https://github.com/ytdl-org/youtube-dl/issues/5498
            pyxattr_required_version = '0.5.0'
            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
                # TODO: fallback to CLI tools
                raise XAttrUnavailableError(
                    'python-pyxattr is detected but is too old. '
                    'youtube-dl requires %s or above while your version is %s. '
                    'Falling back to other xattr implementations' % (
                        pyxattr_required_version, xattr.__version__))

            setxattr = xattr.set
        else:  # xattr
            setxattr = xattr.setxattr

        try:
            setxattr(path, key, value)
        except EnvironmentError as e:
            raise XAttrMetadataError(e.errno, e.strerror)

    except ImportError:
        if compat_os_name == 'nt':
            # Write xattrs to NTFS Alternate Data Streams:
            # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
            assert ':' not in key
            assert os.path.exists(path)

            ads_fn = path + ':' + key
            try:
                with open(ads_fn, 'wb') as f:
                    f.write(value)
            except EnvironmentError as e:
                raise XAttrMetadataError(e.errno, e.strerror)
        else:
            user_has_setfattr = check_executable('setfattr', ['--version'])
            user_has_xattr = check_executable('xattr', ['-h'])

            if user_has_setfattr or user_has_xattr:

                value = value.decode('utf-8')
                if user_has_setfattr:
                    executable = 'setfattr'
                    opts = ['-n', key, '-v', value]
                elif user_has_xattr:
                    executable = 'xattr'
                    opts = ['-w', key, value]

                cmd = ([encodeFilename(executable, True)] +
                       [encodeArgument(o) for o in opts] +
                       [encodeFilename(path, True)])

                try:
                    p = subprocess.Popen(
                        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
                except EnvironmentError as e:
                    raise XAttrMetadataError(e.errno, e.strerror)
                stdout, stderr = p.communicate()
                stderr = stderr.decode('utf-8', 'replace')
                if p.returncode != 0:
                    raise XAttrMetadataError(p.returncode, stderr)

            else:
                # On Unix, and can't find pyxattr, setfattr, or xattr.
                if sys.platform.startswith('linux'):
                    raise XAttrUnavailableError(
                        "Couldn't find a tool to set the xattrs. "
                        "Install either the python 'pyxattr' or 'xattr' "
                        "modules, or the GNU 'attr' package "
                        "(which contains the 'setfattr' tool).")
                else:
                    raise XAttrUnavailableError(
                        "Couldn't find a tool to set the xattrs. "
                        "Install either the python 'xattr' module, "
                        "or the 'xattr' binary.")


def random_birthday(year_field, month_field, day_field):
    start_date = datetime.date(1950, 1, 1)
    end_date = datetime.date(1995, 12, 31)
    offset = random.randint(0, (end_date - start_date).days)
    random_date = start_date + datetime.timedelta(offset)
    return {
        year_field: str(random_date.year),
        month_field: str(random_date.month),
        day_field: str(random_date.day),
    }
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								#!/usr/bin/env python
-												Unify coding cookie

											
										
										
											8 years ago
+								# coding: utf-8
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Modernize

											
										
										
											10 years ago
+								from __future__ import unicode_literals
-												[letv] Fix extraction

Using data URIs for passing the decrypted M3U8 manifest, which is
supported by ffmpeg only.

											
										
										
											10 years ago
+								import base64
-												[utils] Add OHDave's RSA encryption function

											
										
										
											9 years ago
+								import binascii
-												[instagram] Fix info_dict key name

											
										
										
											11 years ago
+								import calendar
-												Fix unicode_escape (Fixes #2695)

											
										
										
											11 years ago
+								import codecs
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											11 years ago
+								import contextlib
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											11 years ago
+								import ctypes
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											11 years ago
+								import datetime
 								import email.utils
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
+								import email.header
-												Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831)

											
										
										
											12 years ago
+								import errno
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								import functools
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								import gzip
-												Use io.BytesIO instead of StringIO

											
										
										
											12 years ago
+								import io
-												Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347

											
										
										
											9 years ago
+								import itertools
-												Correct JSON writing (Closes #596)

											
										
										
											12 years ago
+								import json
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								import locale
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											11 years ago
+								import math
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											10 years ago
+								import operator
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								import os
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											11 years ago
+								import platform
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											8 years ago
+								import random
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								import re
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											11 years ago
+								import socket
-												Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347

											
										
										
											9 years ago
+								import ssl
-												Improve --bidi-workaround support

											
										
										
											11 years ago
+								import subprocess
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								import sys
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
+								import tempfile
-												Make ExtractorError usable for other causes

											
										
										
											12 years ago
+								import traceback
-												[generic] Suppress warning about doctypes in RSS parser

											
										
										
											11 years ago
+								import xml.etree.ElementTree
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								import zlib
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								from .compat import (
-												[utils] Handle HTMLParseError in extract_attributes (closes #13349)

											
										
										
											8 years ago
+								    compat_HTMLParseError,
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											9 years ago
+								    compat_HTMLParser,
-												Appease pyflakes8-3

											
										
										
											10 years ago
+								    compat_basestring,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_chr,
-												[YoutubeDL] Introduce YoutubeDLCookieJar and clarify the rationale behind session cookies (closes #12929)

											
										
										
											6 years ago
+								    compat_cookiejar,
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								    compat_ctypes_WINFUNCTYPE,
-												Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).

											
										
										
											9 years ago
+								    compat_etree_fromstring,
-												[utils] Introduce expand_path

											
										
										
											8 years ago
+								    compat_expanduser,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_html_entities,
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											9 years ago
+								    compat_html_entities_html5,
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								    compat_http_client,
-												[utils] Fix another old python 2.6 kwargs issue (Closes #5539)

											
										
										
											10 years ago
+								    compat_kwargs,
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
+								    compat_os_name,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_parse_qs,
-												[compat] Rename shlex_quote and remove unused subprocess_check_output

											
										
										
											9 years ago
+								    compat_shlex_quote,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_str,
-												[compat] Rename struct_(un)pack to compat_struct_(un)pack

											
										
										
											9 years ago
+								    compat_struct_pack,
-												[utils] Add decode_png for openload (#9706)

											
										
										
											9 years ago
+								    compat_struct_unpack,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_urllib_error,
 								    compat_urllib_parse,
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											9 years ago
+								    compat_urllib_parse_urlencode,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_urllib_parse_urlparse,
-												[utils] Unquote crendentials passed to SOCKS proxies

Fixes #9450

											
										
										
											9 years ago
+								    compat_urllib_parse_unquote_plus,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								    compat_urllib_request,
 								    compat_urlparse,
-												[utils] Use compat_xpath

											
										
										
											9 years ago
+								    compat_xpath,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								)
-												[core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes #3217, Fixes #2918)

Introduces compat versions of os.getenv and os.path.expanduser

											
										
										
											10 years ago
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								from .socks import (
 								    ProxyType,
 								    sockssocket,
 								)
-												[core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes #3217, Fixes #2918)

Introduces compat versions of os.getenv and os.path.expanduser

											
										
										
											10 years ago
-												[utils] Register SOCKS protocols in urllib and support SOCKS4A

											
										
										
											9 years ago
+								def register_socks_protocols():
 								    # "Register" SOCKS protocols
-												[utils] Add rationale for register_socks_protocols

											
										
										
											9 years ago
+								    # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
 								    # URLs with protocols not in urlparse.uses_netloc are not handled correctly
-												[utils] Register SOCKS protocols in urllib and support SOCKS4A

											
										
										
											9 years ago
+								    for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
 								        if scheme not in compat_urlparse.uses_netloc:
 								            compat_urlparse.uses_netloc.append(scheme)
-												implement fallbacks and defaults in _search_regex

											
										
										
											12 years ago
+								# This is not clearly defined otherwise
 								compiled_regex_type = type(re.compile(''))
-												Py3 compat for unichr and htmlentitydefs

											
										
										
											12 years ago
+								std_headers = {
-												[utils] Use pure browser header for User-Agent (closes #17236)

											
										
										
											6 years ago
+								    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 								    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 								    'Accept-Encoding': 'gzip, deflate',
 								    'Accept-Language': 'en-us,en;q=0.5',
-												Py3 compat for unichr and htmlentitydefs

											
										
										
											12 years ago
+								}
-												some fixes, pulled the codename from the code

											
										
										
											12 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												[utils] Add common user agents map

											
										
										
											8 years ago
+								USER_AGENTS = {
 								    'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 								}
-												[utils] Add default value for xpath_text

											
										
										
											10 years ago
+								NO_DEFAULT = object()
-												[Yam] Add new extractor

											
										
										
											10 years ago
+								ENGLISH_MONTH_NAMES = [
 								    'January', 'February', 'March', 'April', 'May', 'June',
 								    'July', 'August', 'September', 'October', 'November', 'December']
-												[utils] Improve month_by_name and add tests

											
										
										
											8 years ago
+								MONTH_NAMES = {
 								    'en': ENGLISH_MONTH_NAMES,
 								    'fr': [
-												[utils] Use native french month names

											
										
										
											8 years ago
+								        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 								        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
-												[utils] Improve month_by_name and add tests

											
										
										
											8 years ago
+								}
-												[utils,franceinter] Add french months' names and fix extraction

Update of the "FranceInter" radio extractor : webpages HTML structure
had changed, the extractor didn't work. So I updated this extractor to
get the mp3 URL and all details.

											
										
										
											8 years ago
-												[utils] Extract known extensions for reuse

											
										
										
											9 years ago
+								KNOWN_EXTENSIONS = (
 								    'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 								    'flv', 'f4v', 'f4a', 'f4b',
 								    'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 								    'mkv', 'mka', 'mk3d',
 								    'avi', 'divx',
 								    'mov',
 								    'asf', 'wmv', 'wma',
 								    '3gp', '3g2',
 								    'mp3',
 								    'flac',
 								    'ape',
 								    'wav',
 								    'f4f', 'f4m', 'm3u8', 'smil')
-												improved performance by extracting accented chars to top level

											
										
										
											9 years ago
+								# needed for sanitizing filenames in restricted mode
-												Added sanitization support for Hungarian letters Ő and Ű
											
										
										
											9 years ago
+								ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 								                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
 								                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
-												improved performance by extracting accented chars to top level

											
										
										
											9 years ago
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								DATE_FORMATS = (
 								    '%d %B %Y',
 								    '%d %b %Y',
 								    '%B %d %Y',
-												[utils] Add more date formats

											
										
										
											8 years ago
+								    '%B %dst %Y',
 								    '%B %dnd %Y',
 								    '%B %dth %Y',
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    '%b %d %Y',
-												[utils] Add more date formats

											
										
										
											8 years ago
+								    '%b %dst %Y',
 								    '%b %dnd %Y',
 								    '%b %dth %Y',
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    '%b %dst %Y %I:%M',
 								    '%b %dnd %Y %I:%M',
 								    '%b %dth %Y %I:%M',
 								    '%Y %m %d',
 								    '%Y-%m-%d',
 								    '%Y/%m/%d',
-												[utils] Recognize more formats in unified_timestamp

Used in CtsNews

											
										
										
											9 years ago
+								    '%Y/%m/%d %H:%M',
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    '%Y/%m/%d %H:%M:%S',
-												[utils] Add another date format seen in NextTV

											
										
										
											8 years ago
+								    '%Y-%m-%d %H:%M',
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    '%Y-%m-%d %H:%M:%S',
 								    '%Y-%m-%d %H:%M:%S.%f',
 								    '%d.%m.%Y %H:%M',
 								    '%d.%m.%Y %H.%M',
 								    '%Y-%m-%dT%H:%M:%SZ',
 								    '%Y-%m-%dT%H:%M:%S.%fZ',
 								    '%Y-%m-%dT%H:%M:%S.%f0Z',
 								    '%Y-%m-%dT%H:%M:%S',
 								    '%Y-%m-%dT%H:%M:%S.%f',
 								    '%Y-%m-%dT%H:%M',
-												[utils] Lower priority for rare date formats and add tests

											
										
										
											8 years ago
+								    '%b %d %Y at %H:%M',
 								    '%b %d %Y at %H:%M:%S',
-												[utils] Add another date format pattern (#14999)

											
										
										
											7 years ago
+								    '%B %d %Y at %H:%M',
 								    '%B %d %Y at %H:%M:%S',
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								)
 								DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 								DATE_FORMATS_DAY_FIRST.extend([
 								    '%d-%m-%Y',
 								    '%d.%m.%Y',
 								    '%d.%m.%y',
 								    '%d/%m/%Y',
 								    '%d/%m/%y',
 								    '%d/%m/%Y %H:%M:%S',
 								])
 								DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 								DATE_FORMATS_MONTH_FIRST.extend([
 								    '%m-%d-%Y',
 								    '%m.%d.%Y',
 								    '%m/%d/%Y',
 								    '%m/%d/%y',
 								    '%m/%d/%Y %H:%M:%S',
 								])
-												[utils] Expose PACKED_CODES_RE

											
										
										
											8 years ago
+								PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-												[malltv] Add extractor (closes #18058)

											
										
										
											6 years ago
+								JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
-												[utils] Expose PACKED_CODES_RE

											
										
										
											8 years ago
-												[Yam] Add new extractor

											
										
										
											10 years ago
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								def preferredencoding():
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Get preferred encoding.
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    Returns the best encoding scheme for the system, based on
 								    locale.getpreferredencoding() and some further tweaks.
 								    """
 								    try:
 								        pref = locale.getpreferredencoding()
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        'TEST'.encode(pref)
-												Don't use bare 'except:'

They catch any exception, including KeyboardInterrupt, we don't want to catch it.

											
										
										
											10 years ago
+								    except Exception:
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        pref = 'UTF-8'
-												Simplified preferredencoding()

Not sure what is the point to use yield to return encoding, thus
it will simplify the whole function.

Signed-off-by: Arvydas Sidorenko <asido4@gmail.com>

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    return pref
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Correct JSON writing (Closes #596)

											
										
										
											12 years ago
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
+								def write_json_file(obj, fn):
-												Fix "ERROR: Cannot write metadata to JSON file" on Windows

Fixes #4246

											
										
										
											10 years ago
+								    """ Encode obj as JSON and write it to fn, atomically if possible """
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
-												[cache] Fix writing to paths with unicode characters

* Use "compat_getenv"
* "write_json_file" now expects the filename to be a string

											
										
										
											10 years ago
+								    fn = encodeFilename(fn)
-												Fix UnicodeEncodeError with --write-info-json on Python 2.7 + Windows

Fixes #4244

											
										
										
											10 years ago
+								    if sys.version_info < (3, 0) and sys.platform != 'win32':
-												[utils] Fix "write_json_file" for unicode names in python 2.x (fixes #4125)

											
										
										
											10 years ago
+								        encoding = get_filesystem_encoding()
 								        # os.path.basename returns a bytes object, but NamedTemporaryFile
 								        # will fail if the filename contains non ascii characters unless we
 								        # use a unicode object
 								        path_basename = lambda f: os.path.basename(fn).decode(encoding)
 								        # the same for os.path.dirname
 								        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
 								    else:
 								        path_basename = os.path.basename
 								        path_dirname = os.path.dirname
-												[utils] Add missing mode and encoding arguments

											
										
										
											11 years ago
+								    args = {
 								        'suffix': '.tmp',
-												[utils] Fix "write_json_file" for unicode names in python 2.x (fixes #4125)

											
										
										
											10 years ago
+								        'prefix': path_basename(fn) + '.',
 								        'dir': path_dirname(fn),
-												[utils] Add missing mode and encoding arguments

											
										
										
											11 years ago
+								        'delete': False,
 								    }
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
+								    # In Python 2.x, json.dump expects a bytestream.
 								    # In Python 3.x, it writes to a character stream
 								    if sys.version_info < (3, 0):
-												[utils] Add missing mode and encoding arguments

											
										
										
											11 years ago
+								        args['mode'] = 'wb'
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
+								    else:
-												[utils] Add missing mode and encoding arguments

											
										
										
											11 years ago
+								        args.update({
 								            'mode': 'w',
 								            'encoding': 'utf-8',
 								        })
-												[utils] Fix another old python 2.6 kwargs issue (Closes #5539)

											
										
										
											10 years ago
+								    tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
 								    try:
 								        with tf:
 								            json.dump(obj, tf)
-												Fix "ERROR: Cannot write metadata to JSON file" on Windows

Fixes #4246

											
										
										
											10 years ago
+								        if sys.platform == 'win32':
 								            # Need to remove existing file on Windows, else os.rename raises
 								            # WindowsError or FileExistsError.
 								            try:
 								                os.unlink(fn)
 								            except OSError:
 								                pass
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
+								        os.rename(tf.name, fn)
-												Don't use bare 'except:'

They catch any exception, including KeyboardInterrupt, we don't want to catch it.

											
										
										
											10 years ago
+								    except Exception:
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											11 years ago
+								        try:
 								            os.remove(tf.name)
 								        except OSError:
 								            pass
 								        raise
 								if sys.version_info >= (2, 7):
-												[utils] Make value optional for find_xpath_attr

This allows selecting particular attributes by name but without specifying the value and similar to xpath syntax `[@attrib]`

											
										
										
											10 years ago
+								    def find_xpath_attr(node, xpath, key, val=None):
-												Add helper function find_path_attr

											
										
										
											12 years ago
+								        """ Find the xpath xpath[@key=val] """
-												[utils] Relax attribute key assert

											
										
										
											9 years ago
+								        assert re.match(r'^[a-zA-Z_-]+$', key)
-												[utils] Make value optional for find_xpath_attr

This allows selecting particular attributes by name but without specifying the value and similar to xpath syntax `[@attrib]`

											
										
										
											10 years ago
+								        expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
-												Add helper function find_path_attr

											
										
										
											12 years ago
+								        return node.find(expr)
 								else:
-												[utils] Make value optional for find_xpath_attr

This allows selecting particular attributes by name but without specifying the value and similar to xpath syntax `[@attrib]`

											
										
										
											10 years ago
+								    def find_xpath_attr(node, xpath, key, val=None):
-												[utils] Use compat_xpath

											
										
										
											9 years ago
+								        for f in node.findall(compat_xpath(xpath)):
-												[utils] Make value optional for find_xpath_attr

This allows selecting particular attributes by name but without specifying the value and similar to xpath syntax `[@attrib]`

											
										
										
											10 years ago
+								            if key not in f.attrib:
 								                continue
 								            if val is None or f.attrib.get(key) == val:
-												Add helper function find_path_attr

											
										
										
											12 years ago
+								                return f
 								        return None
-												Add an extractor for internetvideoarchive.com videos

It's used by videodetective.com

											
										
										
											11 years ago
+								# On python2.6 the xml.etree.ElementTree.Element methods don't support
 								# the namespace parameter
-												PEP8 applied

											
										
										
											10 years ago
-												Add an extractor for internetvideoarchive.com videos

It's used by videodetective.com

											
										
										
											11 years ago
+								def xpath_with_ns(path, ns_map):
 								    components = [c.split(':') for c in path.split('/')]
 								    replaced = []
 								    for c in components:
 								        if len(c) == 1:
 								            replaced.append(c[0])
 								        else:
 								            ns, tag = c
 								            replaced.append('{%s}%s' % (ns_map[ns], tag))
 								    return '/'.join(replaced)
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Add xpath_element and xpath_attr

											
										
										
											9 years ago
+								def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
-												[utils] Support list of xpath in xpath_element

											
										
										
											9 years ago
+								    def _find_xpath(xpath):
-												[utils] Use compat_xpath

											
										
										
											9 years ago
+								        return node.find(compat_xpath(xpath))
-												[utils] Support list of xpath in xpath_element

											
										
										
											9 years ago
 								    if isinstance(xpath, (str, compat_str)):
 								        n = _find_xpath(xpath)
 								    else:
 								        for xp in xpath:
 								            n = _find_xpath(xp)
 								            if n is not None:
 								                break
-												[utils] Apply 2.6 xpath craziness

This fixes ARD on 2.6

											
										
										
											10 years ago
-												[utils] Improve xpath_text

											
										
										
											9 years ago
+								    if n is None:
-												[utils] Add default value for xpath_text

											
										
										
											10 years ago
+								        if default is not NO_DEFAULT:
 								            return default
 								        elif fatal:
-												[ard] Make more robust against missing thumbnails

I cannot reproduce this error, it's from travis.

											
										
										
											10 years ago
+								            name = xpath if name is None else name
 								            raise ExtractorError('Could not find XML element %s' % name)
 								        else:
 								            return None
-												[utils] Add xpath_element and xpath_attr

											
										
										
											9 years ago
+								    return n
 								def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
-												[utils] Improve xpath_text

											
										
										
											9 years ago
+								    n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 								    if n is None or n == default:
 								        return n
 								    if n.text is None:
 								        if default is not NO_DEFAULT:
 								            return default
 								        elif fatal:
 								            name = xpath if name is None else name
 								            raise ExtractorError('Could not find XML element\'s text %s' % name)
 								        else:
 								            return None
 								    return n.text
-												[utils] Add xpath_element and xpath_attr

											
										
										
											9 years ago
 								def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 								    n = find_xpath_attr(node, xpath, key)
 								    if n is None:
 								        if default is not NO_DEFAULT:
 								            return default
 								        elif fatal:
 								            name = '%s[@%s]' % (xpath, key) if name is None else name
 								            raise ExtractorError('Could not find XML attribute %s' % name)
 								        else:
 								            return None
 								    return n.attrib[key]
-												[ard] Make more robust against missing thumbnails

I cannot reproduce this error, it's from travis.

											
										
										
											10 years ago
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											13 years ago
+								def get_element_by_id(id, html):
-												Refactor IDParser to search for elements by any attribute not just ID

											
										
										
											12 years ago
+								    """Return the content of the tag with the specified ID in the passed HTML document"""
-												[refactor] Single quotes consistency

											
										
										
											9 years ago
+								    return get_element_by_attribute('id', id, html)
-												Refactor IDParser to search for elements by any attribute not just ID

											
										
										
											12 years ago
-												[utils] Remove unused get_meta_content function

											
										
										
											10 years ago
-												[utils] Add get_element_by_class

For #9950

											
										
										
											9 years ago
+								def get_element_by_class(class_name, html):
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								    """Return the content of the first tag with the specified class in the passed HTML document"""
 								    retval = get_elements_by_class(class_name, html)
 								    return retval[0] if retval else None
 								def get_element_by_attribute(attribute, value, html, escape_value=True):
 								    retval = get_elements_by_attribute(attribute, value, html, escape_value)
 								    return retval[0] if retval else None
 								def get_elements_by_class(class_name, html):
 								    """Return the content of all tags with the specified class in the passed HTML document as a list"""
 								    return get_elements_by_attribute(
-												[utils] Add get_element_by_class

For #9950

											
										
										
											9 years ago
+								        'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 								        html, escape_value=False)
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								def get_elements_by_attribute(attribute, value, html, escape_value=True):
-												Refactor IDParser to search for elements by any attribute not just ID

											
										
										
											12 years ago
+								    """Return the content of the tag with the specified attribute in the passed HTML document"""
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											13 years ago
-												[utils] Add get_element_by_class

For #9950

											
										
										
											9 years ago
+								    value = re.escape(value) if escape_value else value
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								    retlist = []
 								    for m in re.finditer(r'''(?xs)
-												[utils] Use a regexp instead of HTMLParser for get_element_by_attribute

											
										
										
											10 years ago
+								        <([a-zA-Z0-9:._-]+)
-												[utils] Support attributes with no values in get_elements_by_attribute()

											
										
										
											8 years ago
+								         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
-												[utils] Use a regexp instead of HTMLParser for get_element_by_attribute

											
										
										
											10 years ago
+								         \s+%s=['"]?%s['"]?
-												[utils] Support attributes with no values in get_elements_by_attribute()

											
										
										
											8 years ago
+								         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
-												[utils] Use a regexp instead of HTMLParser for get_element_by_attribute

											
										
										
											10 years ago
+								        \s*>
 								        (?P<content>.*?)
 								        </\1>
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								    ''' % (re.escape(attribute), value), html):
 								        res = m.group('content')
-												[utils] Use a regexp instead of HTMLParser for get_element_by_attribute

											
										
										
											10 years ago
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								        if res.startswith('"') or res.startswith("'"):
 								            res = res[1:-1]
-												[utils] Use a regexp instead of HTMLParser for get_element_by_attribute

											
										
										
											10 years ago
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								        retlist.append(unescapeHTML(res))
-												[ustream] Simplify channel extraction

the ChannelParser has been moved to a new function in utils get_meta_content
Instead of the SocialStreamParser now it uses a regex

											
										
										
											11 years ago
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											8 years ago
+								    return retlist
-												[ustream] Simplify channel extraction

the ChannelParser has been moved to a new function in utils get_meta_content
Instead of the SocialStreamParser now it uses a regex

											
										
										
											11 years ago
-												[utils] PEP 8

											
										
										
											9 years ago
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											9 years ago
+								class HTMLAttributeParser(compat_HTMLParser):
 								    """Trivial HTML parser to gather the attributes for a single element"""
 								    def __init__(self):
-												[utils] PEP 8

											
										
										
											9 years ago
+								        self.attrs = {}
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											9 years ago
+								        compat_HTMLParser.__init__(self)
 								    def handle_starttag(self, tag, attrs):
 								        self.attrs = dict(attrs)
-												[utils] PEP 8

											
										
										
											9 years ago
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											9 years ago
+								def extract_attributes(html_element):
 								    """Given a string for an HTML element such as
 								    <el
 								         a="foo" B="bar" c="&98;az" d=boz
 								         empty= noval entity="&amp;"
 								         sq='"' dq="'"
 								    >
 								    Decode and return a dictionary of attributes.
 								    {
 								        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 								        'empty': '', 'noval': None, 'entity': '&',
 								        'sq': '"', 'dq': '\''
 								    }.
 								    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
 								    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
 								    """
 								    parser = HTMLAttributeParser()
-												[utils] Handle HTMLParseError in extract_attributes (closes #13349)

											
										
										
											8 years ago
+								    try:
 								        parser.feed(html_element)
 								        parser.close()
 								    # Older Python may throw HTMLParseError in case of malformed HTML
 								    except compat_HTMLParseError:
 								        pass
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											9 years ago
+								    return parser.attrs
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											13 years ago
-												[utils] PEP 8

											
										
										
											9 years ago
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											13 years ago
+								def clean_html(html):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Clean an HTML snippet into a readable string"""
-												[netzkino] Add new extractor (Fixes #4669)

											
										
										
											10 years ago
 								    if html is None:  # Convenience for sanitizing descriptions etc.
 								        return html
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    # Newline vs <br />
 								    html = html.replace('\n', ' ')
-												[utils] Fix inconsistent output of clean_html

`\s` in Python 2.x doesn't match unicode whitespace characters by
default

											
										
										
											8 years ago
+								    html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
 								    html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    # Strip html tags
 								    html = re.sub('<.*?>', '', html)
 								    # Replace html entities
 								    html = unescapeHTML(html)
-												fix FunnyOrDieIE, MyVideoIE, TEDIE

											
										
										
											12 years ago
+								    return html.strip()
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											13 years ago
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								def sanitize_open(filename, open_mode):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Try to open the given filename, and slightly tweak it if this fails.
 								    Attempts to open the given filename. If this fails, it tries to change
 								    the filename slightly, step by step, until it's either able to open it
 								    or it fails and raises a final exception, like the standard open()
 								    function.
 								    It returns the tuple (stream, definitive_file_name).
 								    """
 								    try:
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        if filename == '-':
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								            if sys.platform == 'win32':
 								                import msvcrt
 								                msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
-												use sys.stdout.buffer only on Python3
											
										
										
											12 years ago
+								            return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        stream = open(encodeFilename(filename), open_mode)
 								        return (stream, filename)
 								    except (IOError, OSError) as err:
-												Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831)

											
										
										
											12 years ago
+								        if err.errno in (errno.EACCES,):
 								            raise
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
-												Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831)

											
										
										
											12 years ago
+								        # In case of error, try to remove win32 forbidden chars
-												[utils] Fix sanitize_open

											
										
										
											10 years ago
+								        alt_filename = sanitize_path(filename)
-												Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831)

											
										
										
											12 years ago
+								        if alt_filename == filename:
 								            raise
 								        else:
 								            # An exception here should be caught in the caller
-												[utils] Fix sanitize_open

											
										
										
											10 years ago
+								            stream = open(encodeFilename(alt_filename), open_mode)
-												Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831)

											
										
										
											12 years ago
+								            return (stream, alt_filename)
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
 								def timeconvert(timestr):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Convert RFC 2822 defined time string into system timestamp"""
 								    timestamp = None
 								    timetuple = email.utils.parsedate_tz(timestr)
 								    if timetuple is not None:
 								        timestamp = email.utils.mktime_tz(timetuple)
 								    return timestamp
-												New optoin --restrict-filenames

											
										
										
											12 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												Keep video IDs verbatim if possible (Closes #571)

											
										
										
											12 years ago
+								def sanitize_filename(s, restricted=False, is_id=False):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Sanitizes a string so it could be used as part of a filename.
 								    If restricted is set, use a stricter subset of allowed characters.
-												[utils] Carry long doc string

											
										
										
											8 years ago
+								    Set is_id if this is not an arbitrary string, but an ID that should be kept
 								    if possible.
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """
 								    def replace_insane(char):
-												improved performance by extracting accented chars to top level

											
										
										
											9 years ago
+								        if restricted and char in ACCENT_CHARS:
 								            return ACCENT_CHARS[char]
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        if char == '?' or ord(char) < 32 or ord(char) == 127:
 								            return ''
 								        elif char == '"':
 								            return '' if restricted else '\''
 								        elif char == ':':
 								            return '_-' if restricted else ' -'
 								        elif char in '\\/|*<>':
 								            return '_'
-												Restrict more characters (Closes #566)

											
										
										
											12 years ago
+								        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								            return '_'
 								        if restricted and ord(char) > 127:
 								            return '_'
 								        return char
-												[utils] Improve colon handling (Fixes #4683)

											
										
										
											10 years ago
+								    # Handle timestamps
 								    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    result = ''.join(map(replace_insane, s))
-												Keep video IDs verbatim if possible (Closes #571)

											
										
										
											12 years ago
+								    if not is_id:
 								        while '__' in result:
 								            result = result.replace('__', '_')
 								        result = result.strip('_')
 								        # Common case of "Foreign band name - English song title"
 								        if restricted and result.startswith('-_'):
 								            result = result[2:]
-												[utils] Prevent hyphen at beginning of filename (Fixes #5035)

											
										
										
											10 years ago
+								        if result.startswith('-'):
 								            result = '_' + result[len('-'):]
-												[utils] Streap leading dots

Fixes #2865, closes #5087

											
										
										
											10 years ago
+								        result = result.lstrip('.')
-												Keep video IDs verbatim if possible (Closes #571)

											
										
										
											12 years ago
+								        if not result:
 								            result = '_'
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    return result
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												[utils] Add sanitize_path

											
										
										
											10 years ago
+								def sanitize_path(s):
 								    """Sanitizes and normalizes path on Windows"""
 								    if sys.platform != 'win32':
 								        return s
-												[utils] Fix splitunc deprecation warning

											
										
										
											10 years ago
+								    drive_or_unc, _ = os.path.splitdrive(s)
 								    if sys.version_info < (2, 7) and not drive_or_unc:
 								        drive_or_unc, _ = os.path.splitunc(s)
 								    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 								    if drive_or_unc:
-												[utils] Add sanitize_path

											
										
										
											10 years ago
+								        norm_path.pop(0)
 								    sanitized_path = [
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											8 years ago
+								        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
-												[utils] Add sanitize_path

											
										
										
											10 years ago
+								        for path_part in norm_path]
-												[utils] Fix splitunc deprecation warning

											
										
										
											10 years ago
+								    if drive_or_unc:
 								        sanitized_path.insert(0, drive_or_unc + os.path.sep)
-												[utils] Add sanitize_path

											
										
										
											10 years ago
+								    return os.path.join(*sanitized_path)
-												[utils] Extract sanitize_url routine

											
										
										
											9 years ago
+								def sanitize_url(url):
-												[utils] Fixup some common URL's typos in sanitize_url (closes #15649)

											
										
										
											7 years ago
+								    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 								    # the number of unwanted failures due to missing protocol
 								    if url.startswith('//'):
 								        return 'http:%s' % url
 								    # Fix some common typos seen so far
 								    COMMON_TYPOS = (
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								        # https://github.com/ytdl-org/youtube-dl/issues/15649
-												[utils] Fixup some common URL's typos in sanitize_url (closes #15649)

											
										
										
											7 years ago
+								        (r'^httpss://', r'https://'),
 								        # https://bx1.be/lives/direct-tv/
 								        (r'^rmtp([es]?)://', r'rtmp\1://'),
 								    )
 								    for mistake, fixup in COMMON_TYPOS:
 								        if re.match(mistake, url):
 								            return re.sub(mistake, fixup, url)
 								    return url
-												[utils] Extract sanitize_url routine

											
										
										
											9 years ago
-												Rename compat_urllib_request_Request to sanitized_Request and move to utils

											
										
										
											9 years ago
+								def sanitized_Request(url, *args, **kwargs):
-												[utils] Extract sanitize_url routine

											
										
										
											9 years ago
+								    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
-												Rename compat_urllib_request_Request to sanitized_Request and move to utils

											
										
										
											9 years ago
-												[utils] Introduce expand_path

											
										
										
											8 years ago
+								def expand_path(s):
 								    """Expand shell variables and ~"""
 								    return os.path.expandvars(compat_expanduser(s))
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								def orderedSet(iterable):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """ Remove all duplicates from the input iterable """
 								    res = []
 								    for el in iterable:
 								        if el not in res:
 								            res.append(el)
 								    return res
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[instagram] Fix info_dict key name

											
										
										
											11 years ago
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											9 years ago
+								def _htmlentity_transform(entity_with_semicolon):
-												[utils] Modernize tests

											
										
										
											10 years ago
+								    """Transforms an HTML entity to a character."""
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											9 years ago
+								    entity = entity_with_semicolon[:-1]
-												[utils] Modernize tests

											
										
										
											10 years ago
+								    # Known non-numeric HTML entity
 								    if entity in compat_html_entities.name2codepoint:
 								        return compat_chr(compat_html_entities.name2codepoint[entity])
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											9 years ago
+								    # TODO: HTML5 allows entities without a semicolon. For example,
 								    # '&Eacuteric' should be decoded as 'Éric'.
 								    if entity_with_semicolon in compat_html_entities_html5:
 								        return compat_html_entities_html5[entity_with_semicolon]
-												[utils] Escape all HTML entities written in hexadecimal form

											
										
										
											10 years ago
+								    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
-												[utils] Modernize tests

											
										
										
											10 years ago
+								    if mobj is not None:
 								        numstr = mobj.group(1)
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        if numstr.startswith('x'):
-												[utils] Modernize tests

											
										
										
											10 years ago
+								            base = 16
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								            numstr = '0%s' % numstr
-												[utils] Modernize tests

											
										
										
											10 years ago
+								        else:
 								            base = 10
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								        # See https://github.com/ytdl-org/youtube-dl/issues/7518
-												[utils] Skip invalid/non HTML entities (Closes #7518)

											
										
										
											9 years ago
+								        try:
 								            return compat_chr(int(numstr, base))
 								        except ValueError:
 								            pass
-												[utils] Modernize tests

											
										
										
											10 years ago
 								    # Unknown entity in name, return its literal representation
-												[utils] Style

											
										
										
											9 years ago
+								    return '&%s;' % entity
-												[utils] Modernize tests

											
										
										
											10 years ago
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
+								def unescapeHTML(s):
-												[instagram] Fix info_dict key name

											
										
										
											11 years ago
+								    if s is None:
 								        return None
 								    assert type(s) == compat_str
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Modernize tests

											
										
										
											10 years ago
+								    return re.sub(
-												[utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)

											
										
										
											7 years ago
+								        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
-												[utils] Add `get_subprocess_encoding` and filename/argument decode counterparts

											
										
										
											10 years ago
+								def get_subprocess_encoding():
 								    if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 								        # For subprocess calls, encode with locale encoding
 								        # Refer to http://stackoverflow.com/a/9951851/35070
 								        encoding = preferredencoding()
 								    else:
 								        encoding = sys.getfilesystemencoding()
 								    if encoding is None:
 								        encoding = 'utf-8'
 								    return encoding
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
+								def encodeFilename(s, for_subprocess=False):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """
 								    @param s The name of the file
 								    """
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
+								    assert type(s) == compat_str
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    # Python 3 has a Unicode API
 								    if sys.version_info >= (3, 0):
 								        return s
-												Woooohooo!  python3 youtube_dl BaW_jenozKc -t  works!

											
										
										
											12 years ago
-												[utils] Add `get_subprocess_encoding` and filename/argument decode counterparts

											
										
										
											10 years ago
+								    # Pass '' directly to use Unicode APIs on Windows 2000 and up
 								    # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 								    # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 								    if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 								        return s
-												[utils] Jython support - handle filenames correctly

Now test:youtube downloads

											
										
										
											9 years ago
+								    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
 								    if sys.platform.startswith('java'):
 								        return s
-												[utils] Add `get_subprocess_encoding` and filename/argument decode counterparts

											
										
										
											10 years ago
+								    return s.encode(get_subprocess_encoding(), 'ignore')
 								def decodeFilename(b, for_subprocess=False):
 								    if sys.version_info >= (3, 0):
 								        return b
 								    if not isinstance(b, bytes):
 								        return b
 								    return b.decode(get_subprocess_encoding(), 'ignore')
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
-												[ffmpeg] Correct argument encoding on Windows with Python 2.x

Fixes #2924

											
										
										
											11 years ago
 								def encodeArgument(s):
 								    if not isinstance(s, compat_str):
 								        # Legacy code that uses byte strings
 								        # Uncomment the following line after fixing all post processors
-												Improve code style

											
										
										
											10 years ago
+								        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
-												[ffmpeg] Correct argument encoding on Windows with Python 2.x

Fixes #2924

											
										
										
											11 years ago
+								        s = s.decode('ascii')
 								    return encodeFilename(s, True)
-												[utils] Add `get_subprocess_encoding` and filename/argument decode counterparts

											
										
										
											10 years ago
+								def decodeArgument(b):
 								    return decodeFilename(b, True)
-												Fix --match-title and --reject-title decoding (Closes #690)

											
										
										
											12 years ago
+								def decodeOption(optval):
 								    if optval is None:
 								        return optval
 								    if isinstance(optval, bytes):
 								        optval = optval.decode(preferredencoding())
 								    assert isinstance(optval, compat_str)
 								    return optval
-												ExtractorError for errors during extraction

											
										
										
											12 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												twitch.tv chapters (#810): print out start and end time

											
										
										
											12 years ago
+								def formatSeconds(secs):
 								    if secs > 3600:
 								        return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 								    elif secs > 60:
 								        return '%d:%02d' % (secs // 60, secs % 60)
 								    else:
 								        return '%d' % secs
-												Add new --print-traffic option

											
										
										
											11 years ago
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								def make_HTTPS_handler(params, **kwargs):
 								    opts_no_check_certificate = params.get('nocheckcertificate', False)
-												[utils] make_HTTPS_handler:  Use ssl.create_default_context in Python 2.7.9

The new features in the ssl module have been backported from 3.4, see https://docs.python.org/dev/whatsnew/2.7.html#pep-466-network-security-enhancements-for-python-2-7

											
										
										
											10 years ago
+								    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
-												[ssl] Correct connect creation

We want to authenticate the server, see https://docs.python.org/dev/library/ssl.html#ssl.Purpose.SERVER_AUTH .

											
										
										
											10 years ago
+								        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
-												[utils] make_HTTPS_handler:  Use ssl.create_default_context in Python 2.7.9

The new features in the ssl module have been backported from 3.4, see https://docs.python.org/dev/whatsnew/2.7.html#pep-466-network-security-enhancements-for-python-2-7

											
										
										
											10 years ago
+								        if opts_no_check_certificate:
-												[ssl] Correct connect creation

We want to authenticate the server, see https://docs.python.org/dev/library/ssl.html#ssl.Purpose.SERVER_AUTH .

											
										
										
											10 years ago
+								            context.check_hostname = False
-												[utils] make_HTTPS_handler:  Use ssl.create_default_context in Python 2.7.9

The new features in the ssl module have been backported from 3.4, see https://docs.python.org/dev/whatsnew/2.7.html#pep-466-network-security-enhancements-for-python-2-7

											
										
										
											10 years ago
+								            context.verify_mode = ssl.CERT_NONE
-												[utils] Make ssl work on Python 2.7.8

											
										
										
											10 years ago
+								        try:
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-												[utils] Make ssl work on Python 2.7.8

											
										
										
											10 years ago
+								        except TypeError:
 								            # Python 2.7.8
 								            # (create_default_context present but HTTPSHandler has no context=)
 								            pass
 								    if sys.version_info < (3, 2):
-												[utils] Simplify HTTPS socket creation

We were duplicating (bad) code and doing crazy things with SSL.
Just use TLSv1 across the board, and do with one implementation of HTTPSConnection.connect.
Fixes #4696.

											
										
										
											10 years ago
+								        return YoutubeDLHTTPSHandler(params, **kwargs)
-												[utils] Default SSL to TLS. (Fixes #3727)

On 2.x, we now try TLS first, and fall back to the compat 23 (basically anything) afterwards.
On 3.4+, we now use the proper function so that we get all the latest security configurations.
We allow SSLv3 though for the time being, since a lot of older pages use that.
On 3.3, we default to SSLv23 (basically "anything, including TLS") because that has the widest compatibility.

											
										
										
											10 years ago
+								    else:  # Python < 3.4
-												[utils] Simplify HTTPS socket creation

We were duplicating (bad) code and doing crazy things with SSL.
Just use TLSv1 across the board, and do with one implementation of HTTPSConnection.connect.
Fixes #4696.

											
										
										
											10 years ago
+								        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
-												Add --no-check-certificate (#814)

											
										
										
											12 years ago
+								        context.verify_mode = (ssl.CERT_NONE
-												Move the opener to the YoutubeDL object.

This is the first step towards being able to just import youtube_dl and start using it.
Apart from removing global state, this would fix problems like #1805.

											
										
										
											11 years ago
+								                               if opts_no_check_certificate
-												Add --no-check-certificate (#814)

											
										
										
											12 years ago
+								                               else ssl.CERT_REQUIRED)
-												Automatically load SSL certs on Windows

											
										
										
											11 years ago
+								        context.set_default_verify_paths()
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-												Add --no-check-certificate (#814)

											
										
										
											12 years ago
-												[utils] Improve update on error message somewhat

We still may want to implement a bulletproof check for the current version, and a better place to add this message so that it works for all kind of other errors too.

											
										
										
											10 years ago
-												InfoExtractor._search_regex: Suggest updating when the regex is not found (suggested in #5442)

Reuse the same message from ExtractorError

											
										
										
											10 years ago
+								def bug_reports_message():
 								    if ytdl_is_updateable():
 								        update_cmd = 'type  youtube-dl -U  to update'
 								    else:
 								        update_cmd = 'see  https://yt-dl.org/update  on how to update'
 								    msg = '; please report this issue on https://yt-dl.org/bug .'
 								    msg += ' Make sure you are using the latest version; %s.' % update_cmd
 								    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 								    return msg
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class YoutubeDLError(Exception):
 								    """Base exception for YoutubeDL errors."""
 								    pass
 								class ExtractorError(YoutubeDLError):
-												ExtractorError for errors during extraction

											
										
										
											12 years ago
+								    """Error during info extraction."""
-												PEP8 applied

											
										
										
											10 years ago
-												[youtube] Include video Id in common error message (Fixes #2786)

											
										
										
											11 years ago
+								    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
-												Do not show bug report for errors that are to be expected (Closes #973)

											
										
										
											12 years ago
+								        """ tb, if given, is the original traceback (so that it can be printed out).
 								        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 								        """
 								        if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 								            expected = True
-												[youtube] Include video Id in common error message (Fixes #2786)

											
										
										
											11 years ago
+								        if video_id is not None:
 								            msg = video_id + ': ' + msg
-												[utils] Fix js_to_json

											
										
										
											10 years ago
+								        if cause:
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								            msg += ' (caused by %r)' % cause
-												Do not show bug report for errors that are to be expected (Closes #973)

											
										
										
											12 years ago
+								        if not expected:
-												InfoExtractor._search_regex: Suggest updating when the regex is not found (suggested in #5442)

Reuse the same message from ExtractorError

											
										
										
											10 years ago
+								            msg += bug_reports_message()
-												ExtractorError for errors during extraction

											
										
										
											12 years ago
+								        super(ExtractorError, self).__init__(msg)
-												do not ask the user to report network errors

											
										
										
											12 years ago
-												ExtractorError for errors during extraction

											
										
										
											12 years ago
+								        self.traceback = tb
-												Bubble up all the stack of exceptions and retry download tests on timeout errors

											
										
										
											12 years ago
+								        self.exc_info = sys.exc_info()  # preserve original exception
-												[addanime] improve

											
										
										
											11 years ago
+								        self.cause = cause
-												[youtube] Include video Id in common error message (Fixes #2786)

											
										
										
											11 years ago
+								        self.video_id = video_id
-												ExtractorError for errors during extraction

											
										
										
											12 years ago
-												Make ExtractorError usable for other causes

											
										
										
											12 years ago
+								    def format_traceback(self):
 								        if self.traceback is None:
 								            return None
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        return ''.join(traceback.format_tb(self.traceback))
-												Make ExtractorError usable for other causes

											
										
										
											12 years ago
-												ExtractorError for errors during extraction

											
										
										
											12 years ago
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											10 years ago
+								class UnsupportedError(ExtractorError):
 								    def __init__(self, url):
 								        super(UnsupportedError, self).__init__(
 								            'Unsupported URL: %s' % url, expected=True)
 								        self.url = url
-												[vimeo] Fix pro videos and player.vimeo.com urls

The old process can still be used for those videos.
Added RegexNotFoundError, which is raised by _search_regex if it can't extract the info.

											
										
										
											11 years ago
+								class RegexNotFoundError(ExtractorError):
 								    """Error when a regex didn't match"""
 								    pass
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											8 years ago
+								class GeoRestrictedError(ExtractorError):
 								    """Geographic restriction Error exception.
 								    This exception may be thrown when a video is not available from your
 								    geographic location due to geographic restrictions imposed by a website.
 								    """
 								    def __init__(self, msg, countries=None):
 								        super(GeoRestrictedError, self).__init__(msg, expected=True)
 								        self.msg = msg
 								        self.countries = countries
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class DownloadError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Download Error exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    This exception may be thrown by FileDownloader objects if they are not
 								    configured to continue on errors. They will contain the appropriate
 								    error message.
 								    """
-												PEP8 applied

											
										
										
											10 years ago
-												Bubble up all the stack of exceptions and retry download tests on timeout errors

											
										
										
											12 years ago
+								    def __init__(self, msg, exc_info=None):
 								        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 								        super(DownloadError, self).__init__(msg)
 								        self.exc_info = exc_info
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class SameFileError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Same File exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    This exception will be thrown by FileDownloader objects if they detect
 								    multiple files would have to be downloaded to the same file on disk.
 								    """
 								    pass
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class PostProcessingError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Post Processing exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    This exception may be raised by PostProcessor's .run() method to
 								    indicate an error in the postprocessing task.
 								    """
-												PEP8 applied

											
										
										
											10 years ago
-												--recode-video option (Closes #18)

											
										
										
											12 years ago
+								    def __init__(self, msg):
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								        super(PostProcessingError, self).__init__(msg)
-												--recode-video option (Closes #18)

											
										
										
											12 years ago
+								        self.msg = msg
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class MaxDownloadsReached(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """ --max-downloads limit has been reached. """
 								    pass
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class UnavailableVideoError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Unavailable Format exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    This exception will be thrown when a video is requested
 								    in a format that is not available for that video.
 								    """
 								    pass
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class ContentTooShortError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Content Too Short exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    This exception may be raised by FileDownloader objects when a file they
 								    download is too small for what the server announced first, indicating
 								    the connection was probably interrupted.
 								    """
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    def __init__(self, downloaded, expected):
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								        super(ContentTooShortError, self).__init__(
 								            'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
 								        )
-												Remove redundant (and wrong) class parameters
											
										
										
											10 years ago
+								        # Both in bytes
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        self.downloaded = downloaded
 								        self.expected = expected
-												Split code as a package, compiled into an executable zip

											
										
										
											13 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class XAttrMetadataError(YoutubeDLError):
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
+								    def __init__(self, code=None, msg='Unknown error'):
 								        super(XAttrMetadataError, self).__init__(msg)
 								        self.code = code
-												[utils] Fix xattr error handling

											
										
										
											8 years ago
+								        self.msg = msg
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
 								        # Parsing code and msg
 								        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
 								                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
 								            self.reason = 'NO_SPACE'
 								        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
 								            self.reason = 'VALUE_TOO_LONG'
 								        else:
 								            self.reason = 'NOT_SUPPORTED'
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											8 years ago
+								class XAttrUnavailableError(YoutubeDLError):
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
+								    pass
-												[utils] Fix call to _create_http_connection

Avoid confusion over args/kwargs.

											
										
										
											10 years ago
+								def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
-												[utils] Strict HTTP responses (Closes #6727)

											
										
										
											9 years ago
+								    # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
 								    # expected HTTP responses to meet HTTP/1.0 or later (see also
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								    # https://github.com/ytdl-org/youtube-dl/issues/6727)
-												[utils] Strict HTTP responses (Closes #6727)

											
										
										
											9 years ago
+								    if sys.version_info < (3, 0):
-												Add support for IronPython

											
										
										
											7 years ago
+								        kwargs['strict'] = True
 								    hc = http_class(*args, **compat_kwargs(kwargs))
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								    source_address = ydl_handler._params.get('source_address')
-												[utils] Skip remote IP addresses non matching to source address' IP version (closes #13422)

											
										
										
											7 years ago
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								    if source_address is not None:
-												[utils] Skip remote IP addresses non matching to source address' IP version (closes #13422)

											
										
										
											7 years ago
+								        # This is to workaround _create_connection() from socket where it will try all
 								        # address data from getaddrinfo() including IPv6. This filters the result from
 								        # getaddrinfo() based on the source_address value.
 								        # This is based on the cpython socket.create_connection() function.
 								        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
 								        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
 								            host, port = address
 								            err = None
 								            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
-												[utils] Improve remote address skipping and add support for python 2.6 (closes #17362)

											
										
										
											6 years ago
+								            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
 								            ip_addrs = [addr for addr in addrs if addr[0] == af]
 								            if addrs and not ip_addrs:
 								                ip_version = 'v4' if af == socket.AF_INET else 'v6'
 								                raise socket.error(
 								                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
 								                    % (ip_version, source_address[0]))
-												[utils] Skip remote IP addresses non matching to source address' IP version (closes #13422)

											
										
										
											7 years ago
+								            for res in ip_addrs:
 								                af, socktype, proto, canonname, sa = res
 								                sock = None
 								                try:
 								                    sock = socket.socket(af, socktype, proto)
 								                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
 								                        sock.settimeout(timeout)
 								                    sock.bind(source_address)
 								                    sock.connect(sa)
 								                    err = None  # Explicitly break reference cycle
 								                    return sock
 								                except socket.error as _:
 								                    err = _
 								                    if sock is not None:
 								                        sock.close()
 								            if err is not None:
 								                raise err
 								            else:
-												[utils] Improve remote address skipping and add support for python 2.6 (closes #17362)

											
										
										
											6 years ago
+								                raise socket.error('getaddrinfo returns an empty list')
 								        if hasattr(hc, '_create_connection'):
 								            hc._create_connection = _create_connection
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								        sa = (source_address, 0)
 								        if hasattr(hc, 'source_address'):  # Python 2.7+
 								            hc.source_address = sa
 								        else:  # Python 2.6
 								            def _hc_connect(self, *args, **kwargs):
-												[utils] Improve remote address skipping and add support for python 2.6 (closes #17362)

											
										
										
											6 years ago
+								                sock = _create_connection(
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								                    (self.host, self.port), self.timeout, sa)
 								                if is_https:
-												[utils] Simplify HTTPS socket creation

We were duplicating (bad) code and doing crazy things with SSL.
Just use TLSv1 across the board, and do with one implementation of HTTPSConnection.connect.
Fixes #4696.

											
										
										
											10 years ago
+								                    self.sock = ssl.wrap_socket(
 								                        sock, self.key_file, self.cert_file,
 								                        ssl_version=ssl.PROTOCOL_TLSv1)
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								                else:
 								                    self.sock = sock
 								            hc.connect = functools.partial(_hc_connect, hc)
 								    return hc
-												[utils] Separate codes for handling Youtubedl-* headers

											
										
										
											9 years ago
+								def handle_youtubedl_headers(headers):
-												[utils] Refactor handle_youtubedl_headers for future extension

											
										
										
											9 years ago
+								    filtered_headers = headers
 								    if 'Youtubedl-no-compression' in filtered_headers:
 								        filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
-												[utils] Separate codes for handling Youtubedl-* headers

											
										
										
											9 years ago
+								        del filtered_headers['Youtubedl-no-compression']
-												[utils] Refactor handle_youtubedl_headers for future extension

											
										
										
											9 years ago
+								    return filtered_headers
-												[utils] Separate codes for handling Youtubedl-* headers

											
										
										
											9 years ago
-												Revert "Install our own HTTPS handler as well (#1309)"

This reverts commit 36399e85765a6a04fd84126264af75382fcfd1f6 and fixes #1322.

											
										
										
											11 years ago
+								class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    """Handler for HTTP requests and responses.
 								    This class, when installed with an OpenerDirector, automatically adds
 								    the standard headers to every HTTP request and handles gzipped and
 								    deflated responses from web servers. If compression is to be avoided in
 								    a particular request, the original request in the program code only has
-												[utils] Correct docstring of YoutubeDLHandler

											
										
										
											9 years ago
+								    to include the HTTP header "Youtubedl-no-compression", which will be
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    removed before making the real request.
 								    Part of this code was copied from:
 								    http://techknack.net/python-urllib2-handlers/
 								    Andrew Rowls, the author of that code, agreed to release it to the
 								    public domain.
 								    """
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								    def __init__(self, params, *args, **kwargs):
 								        compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
 								        self._params = params
 								    def http_open(self, req):
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								        conn_class = compat_http_client.HTTPConnection
 								        socks_proxy = req.headers.get('Ytdl-socks-proxy')
 								        if socks_proxy:
 								            conn_class = make_socks_conn_class(conn_class, socks_proxy)
 								            del req.headers['Ytdl-socks-proxy']
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								        return self.do_open(functools.partial(
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								            _create_http_connection, self, conn_class, False),
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								            req)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								    @staticmethod
 								    def deflate(data):
 								        try:
 								            return zlib.decompress(data, -zlib.MAX_WBITS)
 								        except zlib.error:
 								            return zlib.decompress(data)
-												Revert "Install our own HTTPS handler as well (#1309)"

This reverts commit 36399e85765a6a04fd84126264af75382fcfd1f6 and fixes #1322.

											
										
										
											11 years ago
+								    def http_request(self, req):
-												[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)

											
										
										
											10 years ago
+								        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
 								        # always respected by websites, some tend to give out URLs with non percent-encoded
 								        # non-ASCII characters (see telemb.py, ard.py [#3412])
 								        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
 								        # To work around aforementioned issue we will replace request's original URL with
 								        # percent-encoded one
 								        # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
 								        # the code of this workaround has been moved here from YoutubeDL.urlopen()
 								        url = req.get_full_url()
 								        url_escaped = escape_url(url)
 								        # Substitute URL if any change after escaping
 								        if url != url_escaped:
-												[utils] Use update_Request in http_request

											
										
										
											9 years ago
+								            req = update_Request(req, url=url_escaped)
-												[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)

											
										
										
											10 years ago
-												[utils] Let request headers override standard headers

What was I thinking when writing this?

											
										
										
											10 years ago
+								        for h, v in std_headers.items():
-												[utils] Prevent override of custom headers.

The dict of headers of request objects in urllib has its keys always
capitalized.

This causes the lookup to fail and overwrite the header. If for example
a Extractor tries to add a "User-Agent" header the internal
representation in the request object is "User-agent". The header is
therefore clobbered by the "User-Agent" in std_headers, because the
strings are not equal.

											
										
										
											10 years ago
+								            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
 								            # The dict keys are capitalized because of this bug by urllib
 								            if h.capitalize() not in req.headers:
-												[utils] Let request headers override standard headers

What was I thinking when writing this?

											
										
										
											10 years ago
+								                req.add_header(h, v)
-												[utils] Separate codes for handling Youtubedl-* headers

											
										
										
											9 years ago
 								        req.headers = handle_youtubedl_headers(req.headers)
-												[utils:YoutubeDLHandler] Work around brain-dead Python 2.6 httplib

In 2.6, the httplib sends fragments! Remove those (fixes generic_26 on 2.6).

											
										
										
											10 years ago
 								        if sys.version_info < (2, 7) and '#' in req.get_full_url():
 								            # Python 2.6 is brain-dead when it comes to fragments
 								            req._Request__original = req._Request__original.partition('#')[0]
 								            req._Request__r_type = req._Request__r_type.partition('#')[0]
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        return req
-												Revert "Install our own HTTPS handler as well (#1309)"

This reverts commit 36399e85765a6a04fd84126264af75382fcfd1f6 and fixes #1322.

											
										
										
											11 years ago
+								    def http_response(self, req, resp):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        old_resp = resp
 								        # gzip
 								        if resp.headers.get('Content-encoding', '') == 'gzip':
-												Tolerate junk at the end of gzip-compressed content (#1268)

											
										
										
											11 years ago
+								            content = resp.read()
 								            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 								            try:
 								                uncompressed = io.BytesIO(gz.read())
 								            except IOError as original_ioerror:
 								                # There may be junk add the end of the file
 								                # See http://stackoverflow.com/q/4928560/35070 for details
 								                for i in range(1, 1024):
 								                    try:
 								                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 								                        uncompressed = io.BytesIO(gz.read())
 								                    except IOError:
 								                        continue
 								                    break
 								                else:
 								                    raise original_ioerror
-												[utils] Drop an compatibility wrapper for Python < 2.6

addinfourl.getcode is added since Python 2.6a1. As youtube-dl now
requires 2.6+, this is no longer necessary.

See https://github.com/python/cpython/commit/9b0d46db115c10767b240a0a64286214b50fe6ad

											
										
										
											8 years ago
+								            resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								            resp.msg = old_resp.msg
-												[utils] Remove Content-encoding from headers after decompression

With cn_verification_proxy, our http_response() is called twice, one from
PerRequestProxyHandler.proxy_open() and another from normal
YoutubeDL.urlopen(). As a result, for proxies honoring Accept-Encoding, the
following bug occurs:

$ youtube-dl -vs --cn-verification-proxy https://secure.uku.im:993 "test:letv"
[debug] System config: []
[debug] User config: []
[debug] Command-line args: ['-vs', '--cn-verification-proxy', 'https://secure.uku.im:993', 'test:letv']
[debug] Encodings: locale UTF-8, fs utf-8, out UTF-8, pref UTF-8
[debug] youtube-dl version 2015.12.23
[debug] Git HEAD: 97f18fa
[debug] Python version 3.5.1 - Linux-4.3.3-1-ARCH-x86_64-with-arch-Arch-Linux
[debug] exe versions: ffmpeg 2.8.4, ffprobe 2.8.4, rtmpdump 2.4
[debug] Proxy map: {}
[TestURL] Test URL: http://www.letv.com/ptv/vplay/22005890.html
[Letv] 22005890: Downloading webpage
[Letv] 22005890: Downloading playJson data
ERROR: Unable to download JSON metadata: Not a gzipped file (b'{"') (caused by OSError('Not a gzipped file (b\'{"\')',)); please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/extractor/common.py", line 330, in _request_webpage
    return self._downloader.urlopen(url_or_request)
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/YoutubeDL.py", line 1886, in urlopen
    return self._opener.open(req, timeout=self._socket_timeout)
  File "/usr/lib/python3.5/urllib/request.py", line 471, in open
    response = meth(req, response)
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 773, in http_response
    raise original_ioerror
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 761, in http_response
    uncompressed = io.BytesIO(gz.read())
  File "/usr/lib/python3.5/gzip.py", line 274, in read
    return self._buffer.read(size)
  File "/usr/lib/python3.5/gzip.py", line 461, in read
    if not self._read_gzip_header():
  File "/usr/lib/python3.5/gzip.py", line 409, in _read_gzip_header
    raise OSError('Not a gzipped file (%r)' % magic)

											
										
										
											9 years ago
+								            del resp.headers['Content-encoding']
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        # deflate
 								        if resp.headers.get('Content-encoding', '') == 'deflate':
 								            gz = io.BytesIO(self.deflate(resp.read()))
-												[utils] Drop an compatibility wrapper for Python < 2.6

addinfourl.getcode is added since Python 2.6a1. As youtube-dl now
requires 2.6+, this is no longer necessary.

See https://github.com/python/cpython/commit/9b0d46db115c10767b240a0a64286214b50fe6ad

											
										
										
											8 years ago
+								            resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								            resp.msg = old_resp.msg
-												[utils] Remove Content-encoding from headers after decompression

With cn_verification_proxy, our http_response() is called twice, one from
PerRequestProxyHandler.proxy_open() and another from normal
YoutubeDL.urlopen(). As a result, for proxies honoring Accept-Encoding, the
following bug occurs:

$ youtube-dl -vs --cn-verification-proxy https://secure.uku.im:993 "test:letv"
[debug] System config: []
[debug] User config: []
[debug] Command-line args: ['-vs', '--cn-verification-proxy', 'https://secure.uku.im:993', 'test:letv']
[debug] Encodings: locale UTF-8, fs utf-8, out UTF-8, pref UTF-8
[debug] youtube-dl version 2015.12.23
[debug] Git HEAD: 97f18fa
[debug] Python version 3.5.1 - Linux-4.3.3-1-ARCH-x86_64-with-arch-Arch-Linux
[debug] exe versions: ffmpeg 2.8.4, ffprobe 2.8.4, rtmpdump 2.4
[debug] Proxy map: {}
[TestURL] Test URL: http://www.letv.com/ptv/vplay/22005890.html
[Letv] 22005890: Downloading webpage
[Letv] 22005890: Downloading playJson data
ERROR: Unable to download JSON metadata: Not a gzipped file (b'{"') (caused by OSError('Not a gzipped file (b\'{"\')',)); please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/extractor/common.py", line 330, in _request_webpage
    return self._downloader.urlopen(url_or_request)
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/YoutubeDL.py", line 1886, in urlopen
    return self._opener.open(req, timeout=self._socket_timeout)
  File "/usr/lib/python3.5/urllib/request.py", line 471, in open
    response = meth(req, response)
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 773, in http_response
    raise original_ioerror
  File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 761, in http_response
    uncompressed = io.BytesIO(gz.read())
  File "/usr/lib/python3.5/gzip.py", line 274, in read
    return self._buffer.read(size)
  File "/usr/lib/python3.5/gzip.py", line 461, in read
    if not self._read_gzip_header():
  File "/usr/lib/python3.5/gzip.py", line 409, in _read_gzip_header
    raise OSError('Not a gzipped file (%r)' % magic)

											
										
										
											9 years ago
+								            del resp.headers['Content-encoding']
-												[utils] Add issue URL in comment for #6457

											
										
										
											9 years ago
+								        # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								        # https://github.com/ytdl-org/youtube-dl/issues/6457).
-												[utils] Percent-encode redirect URL of Location header (Closes #6457)

											
										
										
											10 years ago
+								        if 300 <= resp.code < 400:
 								            location = resp.headers.get('Location')
 								            if location:
 								                # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
 								                if sys.version_info >= (3, 0):
 								                    location = location.encode('iso-8859-1').decode('utf-8')
-												[utils] Always decode Location header

escape_url is broken for bytes-like objects

											
										
										
											9 years ago
+								                else:
 								                    location = location.decode('utf-8')
-												[utils] Percent-encode redirect URL of Location header (Closes #6457)

											
										
										
											10 years ago
+								                location_escaped = escape_url(location)
 								                if location != location_escaped:
 								                    del resp.headers['Location']
-												[utils] Use bytes-like objects as header values on Python 2

											
										
										
											9 years ago
+								                    if sys.version_info < (3, 0):
 								                        location_escaped = location_escaped.encode('utf-8')
-												[utils] Percent-encode redirect URL of Location header (Closes #6457)

											
										
										
											10 years ago
+								                    resp.headers['Location'] = location_escaped
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											12 years ago
+								        return resp
-												Let YoutubeDLHandler (transparent gzip) handle HTTPS URLs as well (Needed for #579)

											
										
										
											12 years ago
-												Revert "Install our own HTTPS handler as well (#1309)"

This reverts commit 36399e85765a6a04fd84126264af75382fcfd1f6 and fixes #1322.

											
										
										
											11 years ago
+								    https_request = http_request
 								    https_response = http_response
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											12 years ago
-												[elpais] Add extractor

											
										
										
											11 years ago
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								def make_socks_conn_class(base_class, socks_proxy):
 								    assert issubclass(base_class, (
 								        compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
 								    url_components = compat_urlparse.urlparse(socks_proxy)
 								    if url_components.scheme.lower() == 'socks5':
 								        socks_type = ProxyType.SOCKS5
 								    elif url_components.scheme.lower() in ('socks', 'socks4'):
 								        socks_type = ProxyType.SOCKS4
-												[utils] Register SOCKS protocols in urllib and support SOCKS4A

											
										
										
											9 years ago
+								    elif url_components.scheme.lower() == 'socks4a':
 								        socks_type = ProxyType.SOCKS4A
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
-												[utils] Check for None values in SOCKS proxy

Originally reported at
https://github.com/rg3/youtube-dl/pull/9287#issuecomment-219617864

											
										
										
											9 years ago
+								    def unquote_if_non_empty(s):
 								        if not s:
 								            return s
 								        return compat_urllib_parse_unquote_plus(s)
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								    proxy_args = (
 								        socks_type,
 								        url_components.hostname, url_components.port or 1080,
 								        True,  # Remote DNS
-												[utils] Check for None values in SOCKS proxy

Originally reported at
https://github.com/rg3/youtube-dl/pull/9287#issuecomment-219617864

											
										
										
											9 years ago
+								        unquote_if_non_empty(url_components.username),
 								        unquote_if_non_empty(url_components.password),
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								    )
 								    class SocksConnection(base_class):
 								        def connect(self):
 								            self.sock = sockssocket()
 								            self.sock.setproxy(*proxy_args)
 								            if type(self.timeout) in (int, float):
 								                self.sock.settimeout(self.timeout)
 								            self.sock.connect((self.host, self.port))
 								            if isinstance(self, compat_http_client.HTTPSConnection):
 								                if hasattr(self, '_context'):  # Python > 2.6
 								                    self.sock = self._context.wrap_socket(
 								                        self.sock, server_hostname=self.host)
 								                else:
 								                    self.sock = ssl.wrap_socket(self.sock)
 								    return SocksConnection
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
 								    def __init__(self, params, https_conn_class=None, *args, **kwargs):
 								        compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
 								        self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
 								        self._params = params
 								    def https_open(self, req):
-												[utils] YoutubeDLHTTPSHandler.https_open: pass all required arguments to do_open

With this change the '--no-check-certificate' works again (#4807).

											
										
										
											10 years ago
+								        kwargs = {}
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								        conn_class = self._https_conn_class
-												[utils] YoutubeDLHTTPSHandler.https_open: pass all required arguments to do_open

With this change the '--no-check-certificate' works again (#4807).

											
										
										
											10 years ago
+								        if hasattr(self, '_context'):  # python > 2.6
 								            kwargs['context'] = self._context
 								        if hasattr(self, '_check_hostname'):  # python 3.x
 								            kwargs['check_hostname'] = self._check_hostname
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
 								        socks_proxy = req.headers.get('Ytdl-socks-proxy')
 								        if socks_proxy:
 								            conn_class = make_socks_conn_class(conn_class, socks_proxy)
 								            del req.headers['Ytdl-socks-proxy']
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
+								        return self.do_open(functools.partial(
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								            _create_http_connection, self, conn_class, True),
-												[utils] YoutubeDLHTTPSHandler.https_open: pass all required arguments to do_open

With this change the '--no-check-certificate' works again (#4807).

											
										
										
											10 years ago
+								            req, **kwargs)
-												Add new option --source-address

Closes #3618, fixes #721, fixes #2481, fixes #4551, closes #1020.

											
										
										
											10 years ago
-												[YoutubeDL] Introduce YoutubeDLCookieJar and clarify the rationale behind session cookies (closes #12929)

											
										
										
											6 years ago
+								class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
-												[utils] strip #HttpOnly_ prefix from cookies files (#20219)


											
										
										
											6 years ago
+								    _HTTPONLY_PREFIX = '#HttpOnly_'
-												[YoutubeDL] Introduce YoutubeDLCookieJar and clarify the rationale behind session cookies (closes #12929)

											
										
										
											6 years ago
+								    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 								        # Store session cookies with `expires` set to 0 instead of an empty
 								        # string
 								        for cookie in self:
 								            if cookie.expires is None:
 								                cookie.expires = 0
 								        compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
 								    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
-												[utils] strip #HttpOnly_ prefix from cookies files (#20219)


											
										
										
											6 years ago
+								        """Load cookies from a file."""
 								        if filename is None:
 								            if self.filename is not None:
 								                filename = self.filename
 								            else:
 								                raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
 								        cf = io.StringIO()
 								        with open(filename) as f:
 								            for line in f:
 								                if line.startswith(self._HTTPONLY_PREFIX):
 								                    line = line[len(self._HTTPONLY_PREFIX):]
 								                cf.write(compat_str(line))
 								        cf.seek(0)
 								        self._really_load(cf, filename, ignore_discard, ignore_expires)
-												[YoutubeDL] Introduce YoutubeDLCookieJar and clarify the rationale behind session cookies (closes #12929)

											
										
										
											6 years ago
+								        # Session cookies are denoted by either `expires` field set to
 								        # an empty string or 0. MozillaCookieJar only recognizes the former
 								        # (see [1]). So we need force the latter to be recognized as session
 								        # cookies on our own.
 								        # Session cookies may be important for cookies-based authentication,
 								        # e.g. usually, when user does not check 'Remember me' check box while
 								        # logging in on a site, some important cookies are stored as session
 								        # cookies so that not recognizing them will result in failed login.
 								        # 1. https://bugs.python.org/issue17164
 								        for cookie in self:
 								            # Treat `expires=0` cookies as session cookies
 								            if cookie.expires == 0:
 								                cookie.expires = None
 								                cookie.discard = True
-												[utils] Add cookie processor for cookie correction (Closes #6769)

											
										
										
											9 years ago
+								class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
 								    def __init__(self, cookiejar=None):
 								        compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
 								    def http_response(self, request, response):
 								        # Python 2 will choke on next HTTP request in row if there are non-ASCII
 								        # characters in Set-Cookie HTTP header of last response (see
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								        # https://github.com/ytdl-org/youtube-dl/issues/6769).
-												[utils] Add cookie processor for cookie correction (Closes #6769)

											
										
										
											9 years ago
+								        # In order to at least prevent crashing we will percent encode Set-Cookie
 								        # header before HTTPCookieProcessor starts processing it.
-												[utils] Comment cookie processing until result from travis and some more testing

											
										
										
											9 years ago
+								        # if sys.version_info < (3, 0) and response.headers:
 								        #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
 								        #         set_cookie = response.headers.get(set_cookie_header)
 								        #         if set_cookie:
 								        #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
 								        #             if set_cookie != set_cookie_escaped:
 								        #                 del response.headers[set_cookie_header]
 								        #                 response.headers[set_cookie_header] = set_cookie_escaped
-												[utils] Add cookie processor for cookie correction (Closes #6769)

											
										
										
											9 years ago
+								        return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
 								    https_request = compat_urllib_request.HTTPCookieProcessor.http_request
 								    https_response = http_response
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								def extract_timezone(date_str):
 								    m = re.search(
 								        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 								        date_str)
 								    if not m:
 								        timezone = datetime.timedelta()
 								    else:
 								        date_str = date_str[:-len(m.group('tz'))]
 								        if not m.group('sign'):
 								            timezone = datetime.timedelta()
 								        else:
 								            sign = 1 if m.group('sign') == '+' else -1
 								            timezone = datetime.timedelta(
 								                hours=sign * int(m.group('hours')),
 								                minutes=sign * int(m.group('minutes')))
 								    return timezone, date_str
-												[camdemy] Simplify and make more robust (#4938)

Do not throw errors if view count or upload date extraction fails.
Dispose of re.MULTILINE, which had absolutely no effect without any ^ or $ in sight.
Follow PEP8 naming conventions.

											
										
										
											10 years ago
+								def parse_iso8601(date_str, delimiter='T', timezone=None):
-												[instagram] Fix info_dict key name

											
										
										
											11 years ago
+								    """ Return a UNIX timestamp from the given date """
 								    if date_str is None:
 								        return None
-												[utils] Improve parse_iso8601

											
										
										
											9 years ago
+								    date_str = re.sub(r'\.[0-9]+', '', date_str)
-												[camdemy] Simplify and make more robust (#4938)

Do not throw errors if view count or upload date extraction fails.
Dispose of re.MULTILINE, which had absolutely no effect without any ^ or $ in sight.
Follow PEP8 naming conventions.

											
										
										
											10 years ago
+								    if timezone is None:
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								        timezone, date_str = extract_timezone(date_str)
-												[utils] Improve parse_iso8601

											
										
										
											9 years ago
+								    try:
 								        date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 								        dt = datetime.datetime.strptime(date_str, date_format) - timezone
 								        return calendar.timegm(dt.timetuple())
 								    except ValueError:
 								        pass
-												[instagram] Fix info_dict key name

											
										
										
											11 years ago
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								def date_formats(day_first=True):
 								    return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											10 years ago
+								def unified_strdate(date_str, day_first=True):
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											12 years ago
+								    """Return a string with the date in the format YYYYMMDD"""
-												[videolectures] (New extractor)

											
										
										
											11 years ago
 								    if date_str is None:
 								        return None
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											12 years ago
+								    upload_date = None
-												PEP8 applied

											
										
										
											10 years ago
+								    # Replace commas
-												Fix #2355 (date parsing with dashes)

											
										
										
											11 years ago
+								    date_str = date_str.replace(',', ' ')
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											10 years ago
+								    # Remove AM/PM + timezone
-												[wsj] Add new extractor (Fixes #4854)

											
										
										
											10 years ago
+								    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    _, date_str = extract_timezone(date_str)
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											10 years ago
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    for expression in date_formats(day_first):
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											12 years ago
+								        try:
 								            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
-												[elpais] Add extractor

											
										
										
											11 years ago
+								        except ValueError:
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											12 years ago
+								            pass
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
+								    if upload_date is None:
 								        timetuple = email.utils.parsedate_tz(date_str)
 								        if timetuple:
-												[utils] Do not fail on unknown date formats in unified_strdate

											
										
										
											9 years ago
+								            try:
 								                upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 								            except ValueError:
 								                pass
-												[utils] unified_strdate: Return None if the date format can't be recognized (fixes #7340)

This issue was introduced with ae12bc3ebb4cb377c2b4337ec255e652b36f5143, it returned 'None'.

											
										
										
											9 years ago
+								    if upload_date is not None:
 								        return compat_str(upload_date)
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											12 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								def unified_timestamp(date_str, day_first=True):
 								    if date_str is None:
 								        return None
-												[utils] Improve unified_timestamp

											
										
										
											8 years ago
+								    date_str = re.sub(r'[,|]', '', date_str)
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
-												[utils] Fix unified_timestamp for formats parsed by parsedate_tz()

											
										
										
											9 years ago
+								    pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    timezone, date_str = extract_timezone(date_str)
 								    # Remove AM/PM + timezone
 								    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
-												[utils] Improve unified_timestamp
Seen at http://zaq1.pl/video/xev0e

											
										
										
											8 years ago
+								    # Remove unrecognized timezones from ISO 8601 alike timestamps
 								    m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 								    if m:
 								        date_str = date_str[:-len(m.group('tz'))]
-												[tennistv] Add support for tennistv.com

											
										
										
											7 years ago
+								    # Python only supports microseconds, so remove nanoseconds
 								    m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
 								    if m:
 								        date_str = m.group(1)
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								    for expression in date_formats(day_first):
 								        try:
-												[utils] Fix unified_timestamp for formats parsed by parsedate_tz()

											
										
										
											9 years ago
+								            dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
+								            return calendar.timegm(dt.timetuple())
 								        except ValueError:
 								            pass
 								    timetuple = email.utils.parsedate_tz(date_str)
 								    if timetuple:
-												[utils] Fix unified_timestamp for formats parsed by parsedate_tz()

											
										
										
											9 years ago
+								        return calendar.timegm(timetuple) + pm_delta * 3600
-												[utils] Add unified_timestamp

											
										
										
											9 years ago
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								def determine_ext(url, default_ext='unknown_video'):
-												[openload] Improve ext extraction


											
										
										
											7 years ago
+								    if url is None or '.' not in url:
-												[izlesene] Minor changes

											
										
										
											11 years ago
+								        return default_ext
-												[utils] Check ext with trailing slash against the list of known extensions

											
										
										
											9 years ago
+								    guess = url.partition('?')[0].rpartition('.')[2]
-												[3sat] Add support (Fixes #1001)

											
										
										
											12 years ago
+								    if re.match(r'^[A-Za-z0-9]+$', guess):
 								        return guess
-												[utils] Extract known extensions for reuse

											
										
										
											9 years ago
+								    # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
 								    elif guess.rstrip('/') in KNOWN_EXTENSIONS:
-												[utils] Check ext with trailing slash against the list of known extensions

											
										
										
											9 years ago
+								        return guess.rstrip('/')
-												[3sat] Add support (Fixes #1001)

											
										
										
											12 years ago
+								    else:
-												Use determine_ext when saving the thumbnail

Urls that contain a query produced filenames with wrong extensions

											
										
										
											12 years ago
+								        return default_ext
-												[3sat] Add support (Fixes #1001)

											
										
										
											12 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												Add a post processor for embedding subtitles in mp4 videos (closes #1052)

											
										
										
											12 years ago
+								def subtitles_filename(filename, sub_lang, sub_format):
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
-												Add a post processor for embedding subtitles in mp4 videos (closes #1052)

											
										
										
											12 years ago
-												PEP8 applied

											
										
										
											10 years ago
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								def date_from_str(date_str):
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											12 years ago
+								    """
 								    Return a datetime object from a string in the format YYYYMMDD or
 								    (now|today)[+-][0-9](day|week|month|year)(s)?"""
 								    today = datetime.date.today()
-												[utils] Add "yesterday" as a date keyword

											
										
										
											10 years ago
+								    if date_str in ('now', 'today'):
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											12 years ago
+								        return today
-												[utils] Add "yesterday" as a date keyword

											
										
										
											10 years ago
+								    if date_str == 'yesterday':
 								        return today - datetime.timedelta(days=1)
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											8 years ago
+								    match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											12 years ago
+								    if match is not None:
 								        sign = match.group('sign')
 								        time = int(match.group('time'))
 								        if sign == '-':
 								            time = -time
 								        unit = match.group('unit')
-												Fix typos

Closes #8200.

											
										
										
											9 years ago
+								        # A bad approximation?
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											12 years ago
+								        if unit == 'month':
 								            unit = 'day'
 								            time *= 30
 								        elif unit == 'year':
 								            unit = 'day'
 								            time *= 365
 								        unit += 's'
 								        delta = datetime.timedelta(**{unit: time})
 								        return today + delta
-												[refactor] Single quotes consistency

											
										
										
											9 years ago
+								    return datetime.datetime.strptime(date_str, '%Y%m%d').date()
-												PEP8 applied

											
										
										
											10 years ago
-												Added '--xattrs' option which writes metadata to the file's extended attributes using a youtube-dl postprocessor.
Works on Linux, OSX, and Windows.

											
										
										
											11 years ago
+								def hyphenate_date(date_str):
 								    """
 								    Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 								    match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 								    if match is not None:
 								        return '-'.join(match.groups())
 								    else:
 								        return date_str
-												PEP8 applied

											
										
										
											10 years ago
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								class DateRange(object):
 								    """Represents a time interval between two dates"""
-												PEP8 applied

											
										
										
											10 years ago
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								    def __init__(self, start=None, end=None):
 								        """start and end must be strings in the format accepted by date"""
 								        if start is not None:
 								            self.start = date_from_str(start)
 								        else:
 								            self.start = datetime.datetime.min.date()
 								        if end is not None:
 								            self.end = date_from_str(end)
 								        else:
 								            self.end = datetime.datetime.max.date()
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											12 years ago
+								        if self.start > self.end:
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								            raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
-												PEP8 applied

											
										
										
											10 years ago
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								    @classmethod
 								    def day(cls, day):
 								        """Returns a range that only contains the given day"""
-												PEP8 applied

											
										
										
											10 years ago
+								        return cls(day, day)
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								    def __contains__(self, date):
 								        """Check if the date is in the range"""
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											12 years ago
+								        if not isinstance(date, datetime.date):
 								            date = date_from_str(date)
 								        return self.start <= date <= self.end
-												PEP8 applied

											
										
										
											10 years ago
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											12 years ago
+								    def __str__(self):
-												PEP8 applied

											
										
										
											10 years ago
+								        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											11 years ago
 								def platform_name():
 								    """ Returns the platform name as a compat_str """
 								    res = platform.platform()
 								    if isinstance(res, bytes):
 								        res = res.decode(preferredencoding())
 								    assert isinstance(res, compat_str)
 								    return res
-												Merge remote-tracking branch 'rzhxeo/youporn-hd'

Conflicts:
	youtube_dl/utils.py

											
										
										
											11 years ago
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								def _windows_write_string(s, out):
 								    """ Returns True if the string was written using special methods,
 								    False if it has yet to be written out."""
 								    # Adapted from http://stackoverflow.com/a/3259271/35070
 								    import ctypes
 								    import ctypes.wintypes
 								    WIN_OUTPUT_IDS = {
 : -11,
 : -12,
 								    }
-												[utils/_windows_write_string] Be defensive about fileno (Fixes #2820)

											
										
										
											11 years ago
+								    try:
 								        fileno = out.fileno()
 								    except AttributeError:
 								        # If the output stream doesn't have a fileno, it's virtual
 								        return False
-												[utils] Catch strange Windows errors (Closes #4733)

											
										
										
											10 years ago
+								    except io.UnsupportedOperation:
 								        # Some strange Windows pseudo files?
 								        return False
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    if fileno not in WIN_OUTPUT_IDS:
 								        return False
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								    GetStdHandle = compat_ctypes_WINFUNCTYPE(
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								        ('GetStdHandle', ctypes.windll.kernel32))
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								    WriteConsoleW = compat_ctypes_WINFUNCTYPE(
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 								        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								        ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    written = ctypes.wintypes.DWORD(0)
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								    GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    FILE_TYPE_CHAR = 0x0002
 								    FILE_TYPE_REMOTE = 0x8000
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								    GetConsoleMode = compat_ctypes_WINFUNCTYPE(
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 								        ctypes.POINTER(ctypes.wintypes.DWORD))(
-												[utils] Fix youtube-dl under PyPy3 on Windows

											
										
										
											10 years ago
+								        ('GetConsoleMode', ctypes.windll.kernel32))
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 								    def not_a_console(handle):
 								        if handle == INVALID_HANDLE_VALUE or handle is None:
 								            return True
-												PEP8: W503

											
										
										
											10 years ago
+								        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
 								                GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
 								    if not_a_console(h):
 								        return False
-												[utils] Fix _windows_write_string (Fixes #2779)

It turns out that the function did not work for outputs longer than 1024 UCS-2 tokens.
Write non-BMP characters one by one to ensure that we count correctly.

											
										
										
											11 years ago
+								    def next_nonbmp_pos(s):
 								        try:
 								            return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 								        except StopIteration:
 								            return len(s)
 								    while s:
 								        count = min(next_nonbmp_pos(s), 1024)
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								        ret = WriteConsoleW(
-												[utils] Fix _windows_write_string (Fixes #2779)

It turns out that the function did not work for outputs longer than 1024 UCS-2 tokens.
Write non-BMP characters one by one to ensure that we count correctly.

											
										
										
											11 years ago
+								            h, s, count if count else 2, ctypes.byref(written), None)
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								        if ret == 0:
 								            raise OSError('Failed to write string')
-												[utils] Fix _windows_write_string (Fixes #2779)

It turns out that the function did not work for outputs longer than 1024 UCS-2 tokens.
Write non-BMP characters one by one to ensure that we count correctly.

											
										
										
											11 years ago
+								        if not count:  # We just wrote a non-BMP character
 								            assert written.value == 2
 								            s = s[1:]
 								        else:
 								            assert written.value > 0
 								            s = s[written.value:]
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    return True
-												Use --encoding when outputting

											
										
										
											11 years ago
+								def write_string(s, out=None, encoding=None):
-												Always correct encoding when writing to sys.stderr (Fixes #1435)

											
										
										
											11 years ago
+								    if out is None:
 								        out = sys.stderr
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
+								    assert type(s) == compat_str
-												Always correct encoding when writing to sys.stderr (Fixes #1435)

											
										
										
											11 years ago
-												[utils] Completely rewrite Windows output (Fixes #2672)

											
										
										
											11 years ago
+								    if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 								        if _windows_write_string(s, out):
 								            return
-												Always correct encoding when writing to sys.stderr (Fixes #1435)

											
										
										
											11 years ago
+								    if ('b' in getattr(out, 'mode', '') or
 								            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
-												Use our own encoding when writing strings

											
										
										
											11 years ago
+								        byt = s.encode(encoding or preferredencoding(), 'ignore')
 								        out.write(byt)
 								    elif hasattr(out, 'buffer'):
 								        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 								        byt = s.encode(enc, 'ignore')
 								        out.buffer.write(byt)
 								    else:
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
+								        out.write(s)
-												Always correct encoding when writing to sys.stderr (Fixes #1435)

											
										
										
											11 years ago
+								    out.flush()
-												Allow changes to run under Python 3

											
										
										
											11 years ago
+								def bytes_to_intlist(bs):
 								    if not bs:
 								        return []
 								    if isinstance(bs[0], int):  # Python 3
 								        return list(bs)
 								    else:
 								        return [ord(c) for c in bs]
-												Merge remote-tracking branch 'rzhxeo/youporn-hd'

Conflicts:
	youtube_dl/utils.py

											
										
										
											11 years ago
-												Add intlist_to_bytes to utils.py
											
										
										
											11 years ago
+								def intlist_to_bytes(xs):
 								    if not xs:
 								        return b''
-												[compat] Rename struct_(un)pack to compat_struct_(un)pack

											
										
										
											9 years ago
+								    return compat_struct_pack('%dB' % len(xs), *xs)
-												[youtube] Simplify cache_dir code (#1529)

											
										
										
											11 years ago
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											11 years ago
+								# Cross-platform file locking
 								if sys.platform == 'win32':
 								    import ctypes.wintypes
 								    import msvcrt
 								    class OVERLAPPED(ctypes.Structure):
 								        _fields_ = [
 								            ('Internal', ctypes.wintypes.LPVOID),
 								            ('InternalHigh', ctypes.wintypes.LPVOID),
 								            ('Offset', ctypes.wintypes.DWORD),
 								            ('OffsetHigh', ctypes.wintypes.DWORD),
 								            ('hEvent', ctypes.wintypes.HANDLE),
 								        ]
 								    kernel32 = ctypes.windll.kernel32
 								    LockFileEx = kernel32.LockFileEx
 								    LockFileEx.argtypes = [
 								        ctypes.wintypes.HANDLE,     # hFile
 								        ctypes.wintypes.DWORD,      # dwFlags
 								        ctypes.wintypes.DWORD,      # dwReserved
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 								        ctypes.POINTER(OVERLAPPED)  # Overlapped
 								    ]
 								    LockFileEx.restype = ctypes.wintypes.BOOL
 								    UnlockFileEx = kernel32.UnlockFileEx
 								    UnlockFileEx.argtypes = [
 								        ctypes.wintypes.HANDLE,     # hFile
 								        ctypes.wintypes.DWORD,      # dwReserved
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 								        ctypes.POINTER(OVERLAPPED)  # Overlapped
 								    ]
 								    UnlockFileEx.restype = ctypes.wintypes.BOOL
 								    whole_low = 0xffffffff
 								    whole_high = 0x7fffffff
 								    def _lock_file(f, exclusive):
 								        overlapped = OVERLAPPED()
 								        overlapped.Offset = 0
 								        overlapped.OffsetHigh = 0
 								        overlapped.hEvent = 0
 								        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 								        handle = msvcrt.get_osfhandle(f.fileno())
 								        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 								                          whole_low, whole_high, f._lock_file_overlapped_p):
 								            raise OSError('Locking file failed: %r' % ctypes.FormatError())
 								    def _unlock_file(f):
 								        assert f._lock_file_overlapped_p
 								        handle = msvcrt.get_osfhandle(f.fileno())
 								        if not UnlockFileEx(handle, 0,
 								                            whole_low, whole_high, f._lock_file_overlapped_p):
 								            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 								else:
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											9 years ago
+								    # Some platforms, such as Jython, is missing fcntl
 								    try:
 								        import fcntl
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											11 years ago
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											9 years ago
+								        def _lock_file(f, exclusive):
 								            fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											11 years ago
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											9 years ago
+								        def _unlock_file(f):
 								            fcntl.flock(f, fcntl.LOCK_UN)
 								    except ImportError:
 								        UNSUPPORTED_MSG = 'file locking is not supported on this platform'
 								        def _lock_file(f, exclusive):
 								            raise IOError(UNSUPPORTED_MSG)
 								        def _unlock_file(f):
 								            raise IOError(UNSUPPORTED_MSG)
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											11 years ago
 								class locked_file(object):
 								    def __init__(self, filename, mode, encoding=None):
 								        assert mode in ['r', 'a', 'w']
 								        self.f = io.open(filename, mode, encoding=encoding)
 								        self.mode = mode
 								    def __enter__(self):
 								        exclusive = self.mode != 'r'
 								        try:
 								            _lock_file(self.f, exclusive)
 								        except IOError:
 								            self.f.close()
 								            raise
 								        return self
 								    def __exit__(self, etype, value, traceback):
 								        try:
 								            _unlock_file(self.f)
 								        finally:
 								            self.f.close()
 								    def __iter__(self):
 								        return iter(self.f)
 								    def write(self, *args):
 								        return self.f.write(*args)
 								    def read(self, *args):
 								        return self.f.read(*args)
-												FFmpegPostProcessor: print the command line used if the --verbose option is given

											
										
										
											11 years ago
-												[core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes #3217, Fixes #2918)

Introduces compat versions of os.getenv and os.path.expanduser

											
										
										
											10 years ago
+								def get_filesystem_encoding():
 								    encoding = sys.getfilesystemencoding()
 								    return encoding if encoding is not None else 'utf-8'
-												FFmpegPostProcessor: print the command line used if the --verbose option is given

											
										
										
											11 years ago
+								def shell_quote(args):
-												utils.shell_quote: Convert the args to unicode strings

The youtube test video failed with `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 34: ordinal not in range(128)`, the problem was with the filenames being encoded.

											
										
										
											11 years ago
+								    quoted_args = []
-												[core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes #3217, Fixes #2918)

Introduces compat versions of os.getenv and os.path.expanduser

											
										
										
											10 years ago
+								    encoding = get_filesystem_encoding()
-												utils.shell_quote: Convert the args to unicode strings

The youtube test video failed with `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 34: ordinal not in range(128)`, the problem was with the filenames being encoded.

											
										
										
											11 years ago
+								    for a in args:
 								        if isinstance(a, bytes):
 								            # We may get a filename encoded with 'encodeFilename'
 								            a = a.decode(encoding)
-												[utils] Use compat_shlex_quote in shell_quote

											
										
										
											8 years ago
+								        quoted_args.append(compat_shlex_quote(a))
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    return ' '.join(quoted_args)
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											11 years ago
 								def smuggle_url(url, data):
 								    """ Pass additional data in a URL for internal use. """
-												[kaltura] add support videos stored on custom kaltura servers(closes #5557)

											
										
										
											9 years ago
+								    url, idata = unsmuggle_url(url, {})
 								    data.update(idata)
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											9 years ago
+								    sdata = compat_urllib_parse_urlencode(
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        {'__youtubedl_smuggle': json.dumps(data)})
 								    return url + '#' + sdata
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											11 years ago
-												Use original Referer URL in Brightcove requests (Fixes #2110)

											
										
										
											11 years ago
+								def unsmuggle_url(smug_url, default=None):
-												Fix PEP8 issue E713

											
										
										
											10 years ago
+								    if '#__youtubedl_smuggle' not in smug_url:
-												Use original Referer URL in Brightcove requests (Fixes #2110)

											
										
										
											11 years ago
+								        return smug_url, default
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    url, _, sdata = smug_url.rpartition('#')
 								    jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											11 years ago
+								    data = json.loads(jsond)
 								    return url, data
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											11 years ago
 								def format_bytes(bytes):
 								    if bytes is None:
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        return 'N/A'
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											11 years ago
+								    if type(bytes) is str:
 								        bytes = float(bytes)
 								    if bytes == 0.0:
 								        exponent = 0
 								    else:
 								        exponent = int(math.log(bytes, 1024.0))
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											11 years ago
+								    converted = float(bytes) / float(1024 ** exponent)
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    return '%.2f%s' % (converted, suffix)
-												[dailymotion] Extract view count (#1895)

											
										
										
											11 years ago
-												Improve --bidi-workaround support

											
										
										
											11 years ago
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											9 years ago
+								def lookup_unit_table(unit_table, s):
 								    units_re = '|'.join(re.escape(u) for u in unit_table)
 								    m = re.match(
-												[utils] lookup_unit_table: Match word boundary instead of end of string

											
										
										
											9 years ago
+								        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											9 years ago
+								    if not m:
 								        return None
 								    num_str = m.group('num').replace(',', '.')
 								    mult = unit_table[m.group('unit')]
 								    return int(float(num_str) * mult)
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								def parse_filesize(s):
 								    if s is None:
 								        return None
-												Fix typos

Closes #8200.

											
										
										
											9 years ago
+								    # The lower-case forms are of course incorrect and unofficial,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								    # but we support those too
 								    _UNIT_TABLE = {
 								        'B': 1,
 								        'b': 1,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'bytes': 1,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'KiB': 1024,
 								        'KB': 1000,
 								        'kB': 1024,
 								        'Kb': 1000,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'kb': 1000,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'kilobytes': 1000,
 								        'kibibytes': 1024,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'MiB': 1024 ** 2,
 								        'MB': 1000 ** 2,
 								        'mB': 1024 ** 2,
 								        'Mb': 1000 ** 2,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'mb': 1000 ** 2,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'megabytes': 1000 ** 2,
 								        'mebibytes': 1024 ** 2,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'GiB': 1024 ** 3,
 								        'GB': 1000 ** 3,
 								        'gB': 1024 ** 3,
 								        'Gb': 1000 ** 3,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'gb': 1000 ** 3,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'gigabytes': 1000 ** 3,
 								        'gibibytes': 1024 ** 3,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'TiB': 1024 ** 4,
 								        'TB': 1000 ** 4,
 								        'tB': 1024 ** 4,
 								        'Tb': 1000 ** 4,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'tb': 1000 ** 4,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'terabytes': 1000 ** 4,
 								        'tebibytes': 1024 ** 4,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'PiB': 1024 ** 5,
 								        'PB': 1000 ** 5,
 								        'pB': 1024 ** 5,
 								        'Pb': 1000 ** 5,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'pb': 1000 ** 5,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'petabytes': 1000 ** 5,
 								        'pebibytes': 1024 ** 5,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'EiB': 1024 ** 6,
 								        'EB': 1000 ** 6,
 								        'eB': 1024 ** 6,
 								        'Eb': 1000 ** 6,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'eb': 1000 ** 6,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'exabytes': 1000 ** 6,
 								        'exbibytes': 1024 ** 6,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'ZiB': 1024 ** 7,
 								        'ZB': 1000 ** 7,
 								        'zB': 1024 ** 7,
 								        'Zb': 1000 ** 7,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'zb': 1000 ** 7,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'zettabytes': 1000 ** 7,
 								        'zebibytes': 1024 ** 7,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        'YiB': 1024 ** 8,
 								        'YB': 1000 ** 8,
 								        'yB': 1024 ** 8,
 								        'Yb': 1000 ** 8,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											8 years ago
+								        'yb': 1000 ** 8,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											8 years ago
+								        'yottabytes': 1000 ** 8,
 								        'yobibytes': 1024 ** 8,
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								    }
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											9 years ago
+								    return lookup_unit_table(_UNIT_TABLE, s)
 								def parse_count(s):
 								    if s is None:
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
+								        return None
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											9 years ago
+								    s = s.strip()
 								    if re.match(r'^[\d,.]+$', s):
 								        return str_to_int(s)
 								    _UNIT_TABLE = {
 								        'k': 1000,
 								        'K': 1000,
 								        'm': 1000 ** 2,
 								        'M': 1000 ** 2,
 								        'kk': 1000 ** 2,
 								        'KK': 1000 ** 2,
 								    }
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											9 years ago
+								    return lookup_unit_table(_UNIT_TABLE, s)
-												[xminus] Simplify and extend (#4302)

											
										
										
											10 years ago
-												[utils] PEP 8

											
										
										
											9 years ago
-												[utils] Add parse_resolution

											
										
										
											7 years ago
+								def parse_resolution(s):
 								    if s is None:
 								        return {}
 								    mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
 								    if mobj:
 								        return {
 								            'width': int(mobj.group('w')),
 								            'height': int(mobj.group('h')),
 								        }
 								    mobj = re.search(r'\b(\d+)[pPiI]\b', s)
 								    if mobj:
 								        return {'height': int(mobj.group(1))}
 								    mobj = re.search(r'\b([48])[kK]\b', s)
 								    if mobj:
 								        return {'height': int(mobj.group(1)) * 540}
 								    return {}
-												[utils] Introduce parse_bitrate

											
										
										
											6 years ago
+								def parse_bitrate(s):
 								    if not isinstance(s, compat_str):
 								        return
 								    mobj = re.search(r'\b(\d+)\s*kbps', s)
 								    if mobj:
 								        return int(mobj.group(1))
-												[utils,franceinter] Add french months' names and fix extraction

Update of the "FranceInter" radio extractor : webpages HTML structure
had changed, the extractor didn't work. So I updated this extractor to
get the mp3 URL and all details.

											
										
										
											8 years ago
+								def month_by_name(name, lang='en'):
-												[ndtv] Add extractor (Fixes #1924)

											
										
										
											11 years ago
+								    """ Return the number of a month by (locale-independently) English name """
-												[utils] Improve month_by_name and add tests

											
										
										
											8 years ago
+								    month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
-												[utils,franceinter] Add french months' names and fix extraction

Update of the "FranceInter" radio extractor : webpages HTML structure
had changed, the extractor didn't work. So I updated this extractor to
get the mp3 URL and all details.

											
										
										
											8 years ago
-												[ndtv] Add extractor (Fixes #1924)

											
										
										
											11 years ago
+								    try:
-												[utils] Improve month_by_name and add tests

											
										
										
											8 years ago
+								        return month_names.index(name) + 1
-												[Yam] Add new extractor

											
										
										
											10 years ago
+								    except ValueError:
 								        return None
 								def month_by_abbreviation(abbrev):
 								    """ Return the number of a month by (locale-independently) English
 								        abbreviations """
 								    try:
 								        return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
-												[ndtv] Add extractor (Fixes #1924)

											
										
										
											11 years ago
+								    except ValueError:
 								        return None
-												Use `_download_xml` in more extractors

											
										
										
											11 years ago
-												Correct XML ampersand fixup

											
										
										
											11 years ago
+								def fix_xml_ampersands(xml_str):
-												Use `_download_xml` in more extractors

											
										
										
											11 years ago
+								    """Replace all the '&' by '&amp;' in XML"""
-												Correct XML ampersand fixup

											
										
										
											11 years ago
+								    return re.sub(
 								        r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        '&amp;',
-												Correct XML ampersand fixup

											
										
										
											11 years ago
+								        xml_str)
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											11 years ago
 								def setproctitle(title):
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											11 years ago
+								    assert isinstance(title, compat_str)
-												[utils] Jython support - disable setproctitle() until ctypes is complete

											
										
										
											9 years ago
 								    # ctypes in Jython is not complete
 								    # http://bugs.jython.org/issue2148
 								    if sys.platform.startswith('java'):
 								        return
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											11 years ago
+								    try:
-												[refactor] Single quotes consistency

											
										
										
											9 years ago
+								        libc = ctypes.cdll.LoadLibrary('libc.so.6')
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											11 years ago
+								    except OSError:
 								        return
-												utils.py: Workaround TypeError with Python 2.7.13 in Windows

Fixes #11540

Tested with Windows Python 2.7.12 and 2.7.13.

											
										
										
											8 years ago
+								    except TypeError:
 								        # LoadLibrary in Windows Python 2.7.13 only expects
 								        # a bytestring, but since unicode_literals turns
 								        # every string into a unicode string, it fails.
 								        return
-												[utils] Simplify setproctitle

											
										
										
											11 years ago
+								    title_bytes = title.encode('utf-8')
 								    buf = ctypes.create_string_buffer(len(title_bytes))
 								    buf.value = title_bytes
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											11 years ago
+								    try:
-												[utils] Simplify setproctitle

											
										
										
											11 years ago
+								        libc.prctl(15, buf, 0, 0, 0)
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											11 years ago
+								    except AttributeError:
 								        return  # Strange libc, just skip this
-												[blinkx] Add extractor (Fixes #1972)

											
										
										
											11 years ago
 								def remove_start(s, start):
-												[utils] Allow None in remove_{start,end}

											
										
										
											9 years ago
+								    return s[len(start):] if s is not None and s.startswith(start) else s
-												Add webpage_url_basename info_dict field (Fixes #1938)

											
										
										
											11 years ago
-												[rtve] Add support for live stream

At the moment, only RTVE-1 seems to work flawlessly.
-2 seems geoblocked right now.
-TDP doesn't seem to be available outside of Spain.

											
										
										
											10 years ago
+								def remove_end(s, end):
-												[utils] Allow None in remove_{start,end}

											
										
										
											9 years ago
+								    return s[:-len(end)] if s is not None and s.endswith(end) else s
-												[rtve] Add support for live stream

At the moment, only RTVE-1 seems to work flawlessly.
-2 seems geoblocked right now.
-TDP doesn't seem to be available outside of Spain.

											
										
										
											10 years ago
-												[utils] Add remove_quotes

											
										
										
											9 years ago
+								def remove_quotes(s):
 								    if s is None or len(s) < 2:
 								        return s
 								    for quote in ('"', "'", ):
 								        if s[0] == quote and s[-1] == quote:
 								            return s[1:-1]
 								    return s
-												Add webpage_url_basename info_dict field (Fixes #1938)

											
										
										
											11 years ago
+								def url_basename(url):
-												Simplify url_basename

Use urlparse from the standard library.

											
										
										
											11 years ago
+								    path = compat_urlparse.urlparse(url).path
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    return path.strip('/').split('/')[-1]
-												[aparat] Add support (Fixes #2012)

											
										
										
											11 years ago
-												[utils] Introduce base_url

											
										
										
											8 years ago
+								def base_url(url):
 								    return re.match(r'https?://[^?#&]+/', url).group()
-												[utils] Add convenience urljoin

											
										
										
											8 years ago
+								def urljoin(base, path):
-												[utils] Process bytestrings in urljoin (closes #12369)

											
										
										
											8 years ago
+								    if isinstance(path, bytes):
 								        path = path.decode('utf-8')
-												[utils] Add convenience urljoin

											
										
										
											8 years ago
+								    if not isinstance(path, compat_str) or not path:
 								        return None
-												[utils] Fix urljoin for paths with non-http(s) schemes

											
										
										
											6 years ago
+								    if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
-												[utils] Add convenience urljoin

											
										
										
											8 years ago
+								        return path
-												[utils] Process bytestrings in urljoin (closes #12369)

											
										
										
											8 years ago
+								    if isinstance(base, bytes):
 								        base = base.decode('utf-8')
 								    if not isinstance(base, compat_str) or not re.match(
 								            r'^(?:https?:)?//', base):
-												[utils] Add convenience urljoin

											
										
										
											8 years ago
+								        return None
 								    return compat_urlparse.urljoin(base, path)
-												[aparat] Add support (Fixes #2012)

											
										
										
											11 years ago
+								class HEADRequest(compat_urllib_request.Request):
 								    def get_method(self):
-												[refactor] Single quotes consistency

											
										
										
											9 years ago
+								        return 'HEAD'
-												[yahoo] Use centralized sorting, and add tbr field

											
										
										
											11 years ago
-												[utils] Add PUTRequest

											
										
										
											9 years ago
+								class PUTRequest(compat_urllib_request.Request):
 								    def get_method(self):
 								        return 'PUT'
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											11 years ago
+								def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
-												[bilibili] Add preliminary support (#2174)

The URL http://www.bilibili.tv/video/av636603/index_2.html does not work yet.

											
										
										
											11 years ago
+								    if get_attr:
 								        if v is not None:
 								            v = getattr(v, get_attr, None)
-												[appletrailers] Support height-less videos

											
										
										
											11 years ago
+								    if v == '':
 								        v = None
-												[utils] Do not fail in int_or_none on non-numeric data (Closes #7175)

											
										
										
											9 years ago
+								    if v is None:
 								        return default
 								    try:
 								        return int(v) * invscale // scale
 								    except ValueError:
-												[utils] Return default on fail in int_or_none

											
										
										
											9 years ago
+								        return default
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											11 years ago
-												[appletrailers] Support height-less videos

											
										
										
											11 years ago
-												[reverbnation] The 'uploader_id' field must be a string

											
										
										
											11 years ago
+								def str_or_none(v, default=None):
 								    return default if v is None else compat_str(v)
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											11 years ago
 								def str_to_int(int_str):
-												[eporner] Simplify and correct (#3629)

											
										
										
											10 years ago
+								    """ A more relaxed version of int_or_none """
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											11 years ago
+								    if int_str is None:
 								        return None
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								    int_str = re.sub(r'[,\.\+]', '', int_str)
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											11 years ago
+								    return int(int_str)
-												[cnn] Add multiple formats, duration, and upload_date

											
										
										
											11 years ago
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											11 years ago
+								def float_or_none(v, scale=1, invscale=1, default=None):
-												[utils] Do not fail in float_or_none on non-numeric data

											
										
										
											9 years ago
+								    if v is None:
 								        return default
 								    try:
 								        return float(v) * invscale / scale
 								    except ValueError:
 								        return default
-												[comedycentral] Duration can now be a float (Fixes #2647)

											
										
										
											11 years ago
-												[utils] Introduce bool_or_none

											
										
										
											7 years ago
+								def bool_or_none(v, default=None):
 								    return v if isinstance(v, bool) else default
-												[utils] Add strip_or_none

											
										
										
											9 years ago
+								def strip_or_none(v):
 								    return None if v is None else v.strip()
-												[utils] Introduce url_or_none

											
										
										
											7 years ago
+								def url_or_none(url):
 								    if not url or not isinstance(url, compat_str):
 								        return None
 								    url = url.strip()
 								    return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
-												[cnn] Add multiple formats, duration, and upload_date

											
										
										
											11 years ago
+								def parse_duration(s):
-												Appease pyflakes8-3

											
										
										
											10 years ago
+								    if not isinstance(s, compat_basestring):
-												[cnn] Add multiple formats, duration, and upload_date

											
										
										
											11 years ago
+								        return None
-												[utils] Improve parse_duration

											
										
										
											10 years ago
+								    s = s.strip()
-												[utils] imporove parse_duration to handle more formats

											
										
										
											9 years ago
+								    days, hours, mins, secs, ms = [None] * 5
-												[utils] Improve parse_duration

											
										
										
											8 years ago
+								    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
-												[utils] imporove parse_duration to handle more formats

											
										
										
											9 years ago
+								    if m:
 								        days, hours, mins, secs, ms = m.groups()
 								    else:
 								        m = re.match(
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											7 years ago
+								            r'''(?ix)(?:P?
 								                (?:
 								                    [0-9]+\s*y(?:ears?)?\s*
 								                )?
 								                (?:
 								                    [0-9]+\s*m(?:onths?)?\s*
 								                )?
 								                (?:
 								                    [0-9]+\s*w(?:eeks?)?\s*
 								                )?
-												[ntvde] Add new extractor (Fixes #4850)

											
										
										
											10 years ago
+								                (?:
-												[utils] imporove parse_duration to handle more formats

											
										
										
											9 years ago
+								                    (?P<days>[0-9]+)\s*d(?:ays?)?\s*
-												[ntvde] Add new extractor (Fixes #4850)

											
										
										
											10 years ago
+								                )?
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											7 years ago
+								                T)?
-												[utils] imporove parse_duration to handle more formats

											
										
										
											9 years ago
+								                (?:
 								                    (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
 								                )?
 								                (?:
 								                    (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
 								                )?
 								                (?:
 								                    (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
-												[utils] Improve parse_duration

											
										
										
											8 years ago
+								                )?Z?$''', s)
-												[utils] imporove parse_duration to handle more formats

											
										
										
											9 years ago
+								        if m:
 								            days, hours, mins, secs, ms = m.groups()
 								        else:
-												[utils] Improve parse_duration

											
										
										
											8 years ago
+								            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
-												[utils] imporove parse_duration to handle more formats

											
										
										
											9 years ago
+								            if m:
 								                hours, mins = m.groups()
 								            else:
 								                return None
 								    duration = 0
 								    if secs:
 								        duration += float(secs)
 								    if mins:
 								        duration += float(mins) * 60
 								    if hours:
 								        duration += float(hours) * 60 * 60
 								    if days:
 								        duration += float(days) * 24 * 60 * 60
 								    if ms:
 								        duration += float(ms)
 								    return duration
-												FFmpegMetadataPP; Write temporary file to `something.temp.{ext}` (fixes #2079)

ffmpeg correctly recognize the formats of extensions like m4a, but it doesn’t works if it’s passed with the `—format` option.

											
										
										
											11 years ago
-												[utils] Improve prepend_extension

Now `ext` is appended to filename if real extension != expected extension.

											
										
										
											10 years ago
+								def prepend_extension(filename, ext, expected_real_ext=None):
-												PEP8 applied

											
										
										
											10 years ago
+								    name, real_ext = os.path.splitext(filename)
-												[utils] Improve prepend_extension

Now `ext` is appended to filename if real extension != expected extension.

											
										
										
											10 years ago
+								    return (
 								        '{0}.{1}{2}'.format(name, ext, real_ext)
 								        if not expected_real_ext or real_ext[1:] == expected_real_ext
 								        else '{0}.{1}'.format(filename, ext))
-												Move check_executable into a helper ufnction

											
										
										
											11 years ago
-												[utils] Add replace_extension

											
										
										
											10 years ago
+								def replace_extension(filename, ext, expected_real_ext=None):
 								    name, real_ext = os.path.splitext(filename)
 								    return '{0}.{1}'.format(
 								        name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
 								        ext)
-												Move check_executable into a helper ufnction

											
										
										
											11 years ago
+								def check_executable(exe, args=[]):
 								    """ Checks if the given binary is installed somewhere in PATH, and returns its name.
 								    args can be a list of arguments for a short output (like -version) """
 								    try:
 								        subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 								    except OSError:
 								        return False
 								    return exe
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											11 years ago
-												[ffmpeg] Move version detection to utils

											
										
										
											10 years ago
+								def get_exe_version(exe, args=['--version'],
-												Improve and test ffmpeg version detection

											
										
										
											10 years ago
+								                    version_re=None, unrecognized='present'):
-												[ffmpeg] Move version detection to utils

											
										
										
											10 years ago
+								    """ Returns the version of the specified executable,
 								    or False if the executable is not present """
 								    try:
-												[utils] Clarify for redirecting STDIN  in get_exe_version()

											
										
										
											8 years ago
+								        # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
 								        # SIGTTOU if youtube-dl is run in the background.
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								        # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
-												Improve and test ffmpeg version detection

											
										
										
											10 years ago
+								        out, _ = subprocess.Popen(
-												[utils] get_exe_version: encode executable name (fixes #5647)

It failed in python 2.x when $PATH contains a directory with non-ascii characters.

											
										
										
											10 years ago
+								            [encodeArgument(exe)] + args,
-												[get_exe_version] Do version probes with <&-

When doing version probes for ffmpeg, do the
equivalent of calling it as:

    ffmpeg -version <&-

Where <&- is shell syntax for closing stdin before calling the
program. This is roughly equivalent to </dev/null without actually
opening /dev/null.

This prevents ffmpeg -version from hanging when run in the background.
Fixes #955.

The reason is that ffmpeg tries to manipulate stdin to set up terminal
characteristic, and that causes the kernel to suspend the parent
process (youtube-dl).

Note that closing stdin is achieved by calling subprocess.Popen() with
stdin set to subprocess.PIPE and without passing any input to
Popen.communicate(). This is somewhat subtle.

											
										
										
											8 years ago
+								            stdin=subprocess.PIPE,
-												[ffmpeg] Move version detection to utils

											
										
										
											10 years ago
+								            stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
 								    except OSError:
 								        return False
-												Improve and test ffmpeg version detection

											
										
										
											10 years ago
+								    if isinstance(out, bytes):  # Python 2.x
 								        out = out.decode('ascii', 'ignore')
 								    return detect_exe_version(out, version_re, unrecognized)
 								def detect_exe_version(output, version_re=None, unrecognized='present'):
 								    assert isinstance(output, compat_str)
 								    if version_re is None:
 								        version_re = r'version\s+([-0-9._a-zA-Z]+)'
 								    m = re.search(version_re, output)
-												[ffmpeg] Move version detection to utils

											
										
										
											10 years ago
+								    if m:
 								        return m.group(1)
 								    else:
 								        return unrecognized
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											11 years ago
+								class PagedList(object):
-												Add __len__ to PagedLists

											
										
										
											11 years ago
+								    def __len__(self):
 								        # This is only useful for tests
 								        return len(self.getslice())
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											10 years ago
 								class OnDemandPagedList(PagedList):
-												[utils] Use in OnDemandPagedList by default
Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m

											
										
										
											7 years ago
+								    def __init__(self, pagefunc, pagesize, use_cache=True):
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											10 years ago
+								        self._pagefunc = pagefunc
 								        self._pagesize = pagesize
-												[utils] Implement cache for OnDemandPagedList

											
										
										
											9 years ago
+								        self._use_cache = use_cache
 								        if use_cache:
 								            self._cache = {}
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											10 years ago
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											11 years ago
+								    def getslice(self, start=0, end=None):
 								        res = []
 								        for pagenum in itertools.count(start // self._pagesize):
 								            firstid = pagenum * self._pagesize
 								            nextfirstid = pagenum * self._pagesize + self._pagesize
 								            if start >= nextfirstid:
 								                continue
-												[utils] Implement cache for OnDemandPagedList

											
										
										
											9 years ago
+								            page_results = None
 								            if self._use_cache:
 								                page_results = self._cache.get(pagenum)
 								            if page_results is None:
 								                page_results = list(self._pagefunc(pagenum))
 								            if self._use_cache:
 								                self._cache[pagenum] = page_results
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											11 years ago
 								            startv = (
 								                start % self._pagesize
 								                if firstid <= start < nextfirstid
 								                else 0)
 								            endv = (
 								                ((end - 1) % self._pagesize) + 1
 								                if (end is not None and firstid <= end <= nextfirstid)
 								                else None)
 								            if startv != 0 or endv is not None:
 								                page_results = page_results[startv:endv]
 								            res.extend(page_results)
 								            # A little optimization - if current page is not "full", ie. does
 								            # not contain page_size videos then we can assume that this page
 								            # is the last one - there are no more ids on further pages -
 								            # i.e. no need to query again.
 								            if len(page_results) + startv < self._pagesize:
 								                break
 								            # If we got the whole page, but the next page is not interesting,
 								            # break out early as well
 								            if end == nextfirstid:
 								                break
 								        return res
-												[youtube] Correct invalid JSON (Fixes #2353)

											
										
										
											11 years ago
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											10 years ago
+								class InAdvancePagedList(PagedList):
 								    def __init__(self, pagefunc, pagecount, pagesize):
 								        self._pagefunc = pagefunc
 								        self._pagecount = pagecount
 								        self._pagesize = pagesize
 								    def getslice(self, start=0, end=None):
 								        res = []
 								        start_page = start // self._pagesize
 								        end_page = (
 								            self._pagecount if end is None else (end // self._pagesize + 1))
 								        skip_elems = start - start_page * self._pagesize
 								        only_more = None if end is None else end - start
 								        for pagenum in range(start_page, end_page):
 								            page = list(self._pagefunc(pagenum))
 								            if skip_elems:
 								                page = page[skip_elems:]
 								                skip_elems = None
 								            if only_more is not None:
 								                if len(page) < only_more:
 								                    only_more -= len(page)
 								                else:
 								                    page = page[:only_more]
 								                    res.extend(page)
 								                    break
 								            res.extend(page)
 								        return res
-												[youtube] Correct invalid JSON (Fixes #2353)

											
										
										
											11 years ago
+								def uppercase_escape(s):
-												Fix unicode_escape (Fixes #2695)

											
										
										
											11 years ago
+								    unicode_escape = codecs.getdecoder('unicode_escape')
-												[youtube] Correct invalid JSON (Fixes #2353)

											
										
										
											11 years ago
+								    return re.sub(
-												[utils] Correct decoding of large unicode codepoints in uppercase_escape (Fixes #2664)

											
										
										
											11 years ago
+								        r'\\U[0-9a-fA-F]{8}',
-												Fix unicode_escape (Fixes #2695)

											
										
										
											11 years ago
+								        lambda m: unicode_escape(m.group(0))[0],
 								        s)
-												[NBC] Enhance embedURL extraction (closes #2549)

											
										
										
											10 years ago
 								def lowercase_escape(s):
 								    unicode_escape = codecs.getdecoder('unicode_escape')
 								    return re.sub(
 								        r'\\u[0-9a-fA-F]{4}',
 								        lambda m: unicode_escape(m.group(0))[0],
 								        s)
-												Fix f4m downloading on Python 2.6

											
										
										
											11 years ago
-												[YoutubeDL/utils] Clarify rationale for URL escaping in comment, move escape routines to utils and add some tests

											
										
										
											10 years ago
 								def escape_rfc3986(s):
 								    """Escape non-ASCII characters as suggested by RFC 3986"""
-												Appease pyflakes8-3

											
										
										
											10 years ago
+								    if sys.version_info < (3, 0) and isinstance(s, compat_str):
-												[YoutubeDL/utils] Clarify rationale for URL escaping in comment, move escape routines to utils and add some tests

											
										
										
											10 years ago
+								        s = s.encode('utf-8')
-												[utils] Modernize

											
										
										
											10 years ago
+								    return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
-												[YoutubeDL/utils] Clarify rationale for URL escaping in comment, move escape routines to utils and add some tests

											
										
										
											10 years ago
 								def escape_url(url):
 								    """Escape URL as suggested by RFC 3986"""
 								    url_parsed = compat_urllib_parse_urlparse(url)
 								    return url_parsed._replace(
-												[utils] Encode hostnames before passing to urllib

With IDN (Internationalized Domain Name) and a proxy, non-ascii URLs
are passed down to urllib/urllib2, causing UnicodeEncodeError

Fixes #8890

											
										
										
											9 years ago
+								        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
-												[YoutubeDL/utils] Clarify rationale for URL escaping in comment, move escape routines to utils and add some tests

											
										
										
											10 years ago
+								        path=escape_rfc3986(url_parsed.path),
 								        params=escape_rfc3986(url_parsed.params),
 								        query=escape_rfc3986(url_parsed.query),
 								        fragment=escape_rfc3986(url_parsed.fragment)
 								    ).geturl()
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											11 years ago
 								def read_batch_urls(batch_fd):
 								    def fixup(url):
 								        if not isinstance(url, compat_str):
 								            url = url.decode('utf-8', 'replace')
-												[utils] Remove stray u'

											
										
										
											10 years ago
+								        BOM_UTF8 = '\xef\xbb\xbf'
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											11 years ago
+								        if url.startswith(BOM_UTF8):
 								            url = url[len(BOM_UTF8):]
 								        url = url.strip()
 								        if url.startswith(('#', ';', ']')):
 								            return False
 								        return url
 								    with contextlib.closing(batch_fd) as fd:
 								        return [url for url in map(fixup, fd) if url]
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											11 years ago
 								def urlencode_postdata(*args, **kargs):
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											9 years ago
+								    return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
-												[generic] Suppress warning about doctypes in RSS parser

											
										
										
											11 years ago
-												[utils] add update_url_query function

											
										
										
											9 years ago
+								def update_url_query(url, query):
-												[utils] Don't touch URLs if not necessary

Fix test_Generic_15 (Google redirect)

											
										
										
											9 years ago
+								    if not query:
 								        return url
-												[utils] add update_url_query function

											
										
										
											9 years ago
+								    parsed_url = compat_urlparse.urlparse(url)
 								    qs = compat_parse_qs(parsed_url.query)
 								    qs.update(query)
 								    return compat_urlparse.urlunparse(parsed_url._replace(
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											9 years ago
+								        query=compat_urllib_parse_urlencode(qs, True)))
-												[utils] Add encode_dict

											
										
										
											9 years ago
-												[utils] Add encode_compat_str

											
										
										
											9 years ago
-												[utils] Add update_Request

											
										
										
											9 years ago
+								def update_Request(req, url=None, data=None, headers={}, query={}):
 								    req_headers = req.headers.copy()
 								    req_headers.update(headers)
 								    req_data = data or req.data
 								    req_url = update_url_query(url or req.get_full_url(), query)
-												[utils] Add PUTRequest

											
										
										
											9 years ago
+								    req_get_method = req.get_method()
 								    if req_get_method == 'HEAD':
 								        req_type = HEADRequest
 								    elif req_get_method == 'PUT':
 								        req_type = PUTRequest
 								    else:
 								        req_type = compat_urllib_request.Request
-												[utils] Add update_Request

											
										
										
											9 years ago
+								    new_req = req_type(
 								        req_url, data=req_data, headers=req_headers,
 								        origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
 								    if hasattr(req, 'timeout'):
 								        new_req.timeout = req.timeout
 								    return new_req
-												[utils] Rename try_multipart_encode to _multipart_encode_impl

To state that this is an internal function and people should be careful
when using it outside youtube-dl.

											
										
										
											8 years ago
+								def _multipart_encode_impl(data, boundary):
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
+								    content_type = 'multipart/form-data; boundary=%s' % boundary
 								    out = b''
 								    for k, v in data.items():
 								        out += b'--' + boundary.encode('ascii') + b'\r\n'
 								        if isinstance(k, compat_str):
 								            k = k.encode('utf-8')
 								        if isinstance(v, compat_str):
 								            v = v.encode('utf-8')
 								        # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
 								        # suggests sending UTF-8 directly. Firefox sends UTF-8, too
-												[utils] Fix multipart_encode for Python < 3.5

											
										
										
											8 years ago
+								        content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
+								        if boundary.encode('ascii') in content:
 								            raise ValueError('Boundary overlaps with data')
 								        out += content
 								    out += b'--' + boundary.encode('ascii') + b'--\r\n'
 								    return out, content_type
 								def multipart_encode(data, boundary=None):
 								    '''
 								    Encode a dict to RFC 7578-compliant form-data
 								    data:
 								        A dict where keys and values can be either Unicode or bytes-like
 								        objects.
 								    boundary:
 								        If specified a Unicode object, it's used as the boundary. Otherwise
 								        a random boundary is generated.
 								    Reference: https://tools.ietf.org/html/rfc7578
 								    '''
 								    has_specified_boundary = boundary is not None
 								    while True:
 								        if boundary is None:
 								            boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
 								        try:
-												[utils] Rename try_multipart_encode to _multipart_encode_impl

To state that this is an internal function and people should be careful
when using it outside youtube-dl.

											
										
										
											8 years ago
+								            out, content_type = _multipart_encode_impl(data, boundary)
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
+								            break
 								        except ValueError:
 								            if has_specified_boundary:
 								                raise
 								            boundary = None
 								    return out, content_type
-												[utils] Add ability to control skipping false values in dict_get

											
										
										
											9 years ago
+								def dict_get(d, key_or_keys, default=None, skip_false_values=True):
-												[utils] Add dict_get convenience method

											
										
										
											9 years ago
+								    if isinstance(key_or_keys, (list, tuple)):
 								        for key in key_or_keys:
-												[utils] Add ability to control skipping false values in dict_get

											
										
										
											9 years ago
+								            if key not in d or d[key] is None or skip_false_values and not d[key]:
 								                continue
 								            return d[key]
-												[utils] Add dict_get convenience method

											
										
										
											9 years ago
+								        return default
 								    return d.get(key_or_keys, default)
-												[utils] Add try_get

To reduce boilerplate when accessing JSON

											
										
										
											9 years ago
+								def try_get(src, getter, expected_type=None):
-												[extractor/common] Add support multiple getters in try_get

											
										
										
											8 years ago
+								    if not isinstance(getter, (list, tuple)):
 								        getter = [getter]
 								    for get in getter:
 								        try:
 								            v = get(src)
 								        except (AttributeError, KeyError, TypeError, IndexError):
 								            pass
 								        else:
 								            if expected_type is None or isinstance(v, expected_type):
 								                return v
-												[utils] Add try_get

To reduce boilerplate when accessing JSON

											
										
										
											9 years ago
-												[utils] Introduce merge_dicts

											
										
										
											7 years ago
+								def merge_dicts(*dicts):
 								    merged = {}
 								    for a_dict in dicts:
 								        for k, v in a_dict.items():
 								            if v is None:
 								                continue
 								            if (k not in merged or
 								                    (isinstance(v, compat_str) and v and
 								                        isinstance(merged[k], compat_str) and
 								                        not merged[k])):
 								                merged[k] = v
 								    return merged
-												[utils] Add encode_compat_str

											
										
										
											9 years ago
+								def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
 								    return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
-												[utils] Add encode_dict

											
										
										
											9 years ago
-												[pbs] Add support for video ratings

											
										
										
											11 years ago
+								US_RATINGS = {
 								    'G': 0,
 								    'PG': 10,
 								    'PG-13': 13,
 								    'R': 16,
 								    'NC': 18,
 								}
-												[washingtonpost] Add extractor (Fixes #2622)

											
										
										
											11 years ago
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											9 years ago
+								TV_PARENTAL_GUIDELINES = {
-												[utils] keep the original TV_PARENTAL_GUIDELINES dict

											
										
										
											7 years ago
+								    'TV-Y': 0,
 								    'TV-Y7': 7,
 								    'TV-G': 0,
 								    'TV-PG': 0,
 								    'TV-14': 14,
 								    'TV-MA': 17,
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											9 years ago
+								}
-												[utils] Add parse_age_limit

											
										
										
											10 years ago
+								def parse_age_limit(s):
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											9 years ago
+								    if type(s) == int:
 								        return s if 0 <= s <= 21 else None
 								    if not isinstance(s, compat_basestring):
-												[utils] Default age_limit to None

If we can't parse it, it means we don't have any information, not that the content is unrestricted.

											
										
										
											10 years ago
+								        return None
-												[utils] Add parse_age_limit

											
										
										
											10 years ago
+								    m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											9 years ago
+								    if m:
 								        return int(m.group('age'))
 								    if s in US_RATINGS:
 								        return US_RATINGS[s]
-												[utils] keep the original TV_PARENTAL_GUIDELINES dict

											
										
										
											7 years ago
+								    m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
-												[utils] Relax TV Parental Guidelines matching

											
										
										
											7 years ago
+								    if m:
-												[utils] keep the original TV_PARENTAL_GUIDELINES dict

											
										
										
											7 years ago
+								        return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
-												[utils] Relax TV Parental Guidelines matching

											
										
										
											7 years ago
+								    return None
-												[utils] Add parse_age_limit

											
										
										
											10 years ago
-												[washingtonpost] Add extractor (Fixes #2622)

											
										
										
											11 years ago
+								def strip_jsonp(code):
-												[npo] Improve npo.nl (Fixes #4173)

											
										
										
											10 years ago
+								    return re.sub(
-												[utils] Recognize more patterns in strip_jsonp()

Used in Youku Show pages

											
										
										
											8 years ago
+								        r'''(?sx)^
-												[utils] Allow JSONP with empty func name (closes #17028)

											
										
										
											7 years ago
+								            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
-												[utils] Recognize more patterns in strip_jsonp()

Used in Youku Show pages

											
										
										
											8 years ago
+								            (?:\s*&&\s*(?P=func_name))?
 								            \s*\(\s*(?P<callback_data>.*)\);?
 								            \s*?(?://[^\n]*)*$''',
 								        r'\g<callback_data>', code)
-												[clubic] Add extractor (Fixes #2773)

											
										
										
											11 years ago
-												[patreon] Simplify (#3390)

											
										
										
											11 years ago
+								def js_to_json(code):
-												[utils] Improve comments processing in js_to_json (closes #11947)

											
										
										
											8 years ago
+								    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
 								    SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
 								    INTEGER_TABLE = (
 								        (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
 								        (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
 								    )
-												[patreon] Simplify (#3390)

											
										
										
											11 years ago
+								    def fix_kv(m):
-												[utils] Improve and test js_to_json

											
										
										
											10 years ago
+								        v = m.group(0)
 								        if v in ('true', 'false', 'null'):
 								            return v
-												[utils] Handle single-line comments in js_to_json

											
										
										
											8 years ago
+								        elif v.startswith('/*') or v.startswith('//') or v == ',':
-												[utils] js_to_json: various improvements

now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.

											
										
										
											9 years ago
+								            return ""
 								        if v[0] in ("'", '"'):
 								            v = re.sub(r'(?s)\\.|"', lambda m: {
-												[utils] Improve and test js_to_json

											
										
										
											10 years ago
+								                '"': '\\"',
-												[utils] js_to_json: various improvements

now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.

											
										
										
											9 years ago
+								                "\\'": "'",
 								                '\\\n': '',
 								                '\\x': '\\u00',
 								            }.get(m.group(0), m.group(0)), v[1:-1])
-												[utils] Process non-base 10 integers in js_to_json

											
										
										
											9 years ago
+								        for regex, base in INTEGER_TABLE:
 								            im = re.match(regex, v)
 								            if im:
-												[utils] Correct octal/hexadecimal number detection in js_to_json

											
										
										
											8 years ago
+								                i = int(im.group(1), base)
-												[utils] Process non-base 10 integers in js_to_json

											
										
										
											9 years ago
+								                return '"%d":' % i if v.endswith(':') else '%d' % i
-												[utils] Improve and test js_to_json

											
										
										
											10 years ago
+								        return '"%s"' % v
-												[patreon] Simplify (#3390)

											
										
										
											11 years ago
-												[utils] js_to_json: various improvements

now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.

											
										
										
											9 years ago
+								    return re.sub(r'''(?sx)
 								        "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
 								        '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
-												[utils] Improve comments processing in js_to_json (closes #11947)

											
										
										
											8 years ago
+								        {comment}|,(?={skip}[\]}}])|
-												[util] Improve scientific notation handling in js_to_json (closes #14789)


											
										
										
											7 years ago
+								        (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
-												[utils] Improve comments processing in js_to_json (closes #11947)

											
										
										
											8 years ago
+								        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
 								        [0-9]+(?={skip}:)
 								        '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
-												[patreon] Simplify (#3390)

											
										
										
											11 years ago
-												[clubic] Add extractor (Fixes #2773)

											
										
										
											11 years ago
+								def qualities(quality_ids):
 								    """ Get a numeric quality value out of a list of possible values """
 								    def q(qid):
 								        try:
 								            return quality_ids.index(qid)
 								        except ValueError:
 								            return -1
 								    return q
-												[YoutubeDL] Do not require default output template to be set

											
										
										
											11 years ago
 								DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
-												Provide compatibility  check_output for 2.6 (Fixes #2926)

											
										
										
											11 years ago
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											10 years ago
 								def limit_length(s, length):
 								    """ Add ellipses to overly long strings """
 								    if s is None:
 								        return None
 								    ELLIPSES = '...'
 								    if len(s) > length:
 								        return s[:length - len(ELLIPSES)] + ELLIPSES
 								    return s
-												[ffmpeg] Warn if ffmpeg/avconv version is too old (Fixes #4026)

											
										
										
											10 years ago
 								def version_tuple(v):
-												[ffmpeg] Improve version check and call it from hls (Fixes #4377)

											
										
										
											10 years ago
+								    return tuple(int(e) for e in re.split(r'[-.]', v))
-												[ffmpeg] Warn if ffmpeg/avconv version is too old (Fixes #4026)

											
										
										
											10 years ago
 								def is_outdated_version(version, limit, assume_new=True):
 								    if not version:
 								        return not assume_new
 								    try:
 								        return version_tuple(version) < version_tuple(limit)
 								    except ValueError:
 								        return not assume_new
-												[utils] Improve update on error message somewhat

We still may want to implement a bulletproof check for the current version, and a better place to add this message so that it works for all kind of other errors too.

											
										
										
											10 years ago
 								def ytdl_is_updateable():
 								    """ Returns if youtube-dl can be updated with -U """
 								    from zipimport import zipimporter
 								    return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
-												Provide guidance when called with a YouTube ID starting with a dash.

Reported at https://news.ycombinator.com/item?id=8648121

											
										
										
											10 years ago
 								def args_to_str(args):
 								    # Get a short string representation for a subprocess command
-												[compat] Rename shlex_quote and remove unused subprocess_check_output

											
										
										
											9 years ago
+								    return ' '.join(compat_shlex_quote(a) for a in args)
-												[soulanime] Fix under Python 3

											
										
										
											10 years ago
-												Rename error_to_str to error_to_compat_str

											
										
										
											9 years ago
+								def error_to_compat_str(err):
-												[utils] Add error_to_str

											
										
										
											9 years ago
+								    err_str = str(err)
 								    # On python 2 error byte string must be decoded with proper
 								    # encoding rather than ascii
 								    if sys.version_info[0] < 3:
 								        err_str = err_str.decode(preferredencoding())
 								    return err_str
-												[sandia] Add new extractor (#4974)

											
										
										
											10 years ago
+								def mimetype2ext(mt):
-												[utils] Allow None mimetypes in mimetype2ext

											
										
										
											9 years ago
+								    if mt is None:
 								        return None
-												[utils] mimetype2ext: return 'm4a' for 'audio/mp4' (fixes #8620)

The youtube extractor was using 'mp4' for them, therefore filters like 'bestaudio[ext=m4a]' stopped working (94278f720272c5ad2cd5900f59f8e71f31d46633 broke it).

											
										
										
											9 years ago
+								    ext = {
 								        'audio/mp4': 'm4a',
-												[utils] Add audio/mpeg to mimetype2ext()

Used in WDR live radios (#6147)

											
										
										
											9 years ago
+								        # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
 								        # it's the most popular one
 								        'audio/mpeg': 'mp3',
-												[utils] mimetype2ext: return 'm4a' for 'audio/mp4' (fixes #8620)

The youtube extractor was using 'mp4' for them, therefore filters like 'bestaudio[ext=m4a]' stopped working (94278f720272c5ad2cd5900f59f8e71f31d46633 broke it).

											
										
										
											9 years ago
+								    }.get(mt)
 								    if ext is not None:
 								        return ext
-												[sandia] Add new extractor (#4974)

											
										
										
											10 years ago
+								    _, _, res = mt.rpartition('/')
-												[utils] Improve mimetype2ext

											
										
										
											8 years ago
+								    res = res.split(';')[0].strip().lower()
-												[sandia] Add new extractor (#4974)

											
										
										
											10 years ago
 								    return {
-												[utils] Add more items to mimetype2ext (#8293)

These are used in Youtube formats

											
										
										
											9 years ago
+								        '3gpp': '3gp',
-												add more subtitles mime types to mimetype2ext and fix the platform subtitle extraction

											
										
										
											9 years ago
+								        'smptett+xml': 'tt',
 								        'ttaf+xml': 'dfxp',
-												[utils] Reorder items in mimetype2ext alphabetically

											
										
										
											9 years ago
+								        'ttml+xml': 'ttml',
-												[utils] Add more items to mimetype2ext (#8293)

These are used in Youtube formats

											
										
										
											9 years ago
+								        'x-flv': 'flv',
-												[utils] Reorder items in mimetype2ext alphabetically

											
										
										
											9 years ago
+								        'x-mp4-fragmented': 'mp4',
-												[utils] add sami mimetype to mimetype2ext

											
										
										
											7 years ago
+								        'x-ms-sami': 'sami',
-												[utils] Reorder items in mimetype2ext alphabetically

											
										
										
											9 years ago
+								        'x-ms-wmv': 'wmv',
-												[utils] add mimetypes to determine manifest ext(m3u8, f4m, mpd)

											
										
										
											9 years ago
+								        'mpegurl': 'm3u8',
 								        'x-mpegurl': 'm3u8',
 								        'vnd.apple.mpegurl': 'm3u8',
 								        'dash+xml': 'mpd',
 								        'f4m+xml': 'f4m',
-												[utils] Add another f4m mimetype to mimetype2ext

											
										
										
											9 years ago
+								        'hds+xml': 'f4m',
-												[brightcove] skip ism manifests

											
										
										
											9 years ago
+								        'vnd.ms-sstr+xml': 'ism',
-												[utils] Add quicktime to mimetype2ext

											
										
										
											8 years ago
+								        'quicktime': 'mov',
-												[utils] add video/mp2t to mimetype2ext

											
										
										
											8 years ago
+								        'mp2t': 'ts',
-												[sandia] Add new extractor (#4974)

											
										
										
											10 years ago
+								    }.get(res, res)
-												[utils] add helper function for parsing codecs

											
										
										
											9 years ago
+								def parse_codecs(codecs_str):
 								    # http://tools.ietf.org/html/rfc6381
 								    if not codecs_str:
 								        return {}
 								    splited_codecs = list(filter(None, map(
 								        lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
 								    vcodec, acodec = None, None
 								    for full_codec in splited_codecs:
 								        codec = full_codec.split('.')[0]
-												[utils] Properly recognize AV1 codec (closes #17506)

											
										
										
											6 years ago
+								        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
-												[utils] add helper function for parsing codecs

											
										
										
											9 years ago
+								            if not vcodec:
 								                vcodec = full_codec
-												[utils] Recognize more audio codecs (#13081)

											
										
										
											8 years ago
+								        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
-												[utils] add helper function for parsing codecs

											
										
										
											9 years ago
+								            if not acodec:
 								                acodec = full_codec
 								        else:
-												[utils] Recognize more audio codecs (#13081)

											
										
										
											8 years ago
+								            write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
-												[utils] add helper function for parsing codecs

											
										
										
											9 years ago
+								    if not vcodec and not acodec:
 								        if len(splited_codecs) == 2:
 								            return {
 								                'vcodec': vcodec,
 								                'acodec': acodec,
 								            }
 								        elif len(splited_codecs) == 1:
 								            return {
 								                'vcodec': 'none',
 								                'acodec': vcodec,
 								            }
 								    else:
 								        return {
 								            'vcodec': vcodec or 'none',
 								            'acodec': acodec or 'none',
 								        }
 								    return {}
-												[soulanime] Fix under Python 3

											
										
										
											10 years ago
+								def urlhandle_detect_ext(url_handle):
-												[utils] Fix getheader in urlhandle_detect_ext

Fixes #7049, related to #9440

											
										
										
											9 years ago
+								    getheader = url_handle.headers.get
-												[soulanime] Fix under Python 3

											
										
										
											10 years ago
-												[hearthisat] Add support for more high-quality download links

											
										
										
											10 years ago
+								    cd = getheader('Content-Disposition')
 								    if cd:
 								        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
 								        if m:
 								            e = determine_ext(m.group('filename'), default_ext=None)
 								            if e:
 								                return e
-												[sandia] Add new extractor (#4974)

											
										
										
											10 years ago
+								    return mimetype2ext(getheader('Content-Type'))
-												Respect age_limit when listing extractors (Fixes #4653)

											
										
										
											10 years ago
-												[letv] Fix extraction

Using data URIs for passing the decrypted M3U8 manifest, which is
supported by ffmpeg only.

											
										
										
											10 years ago
+								def encode_data_uri(data, mime_type):
 								    return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
-												Respect age_limit when listing extractors (Fixes #4653)

											
										
										
											10 years ago
+								def age_restricted(content_limit, age_limit):
-												Revert "fix typos"

This reverts commit 36a0e46c39ea4f211dea9944177976e8f8364736.

											
										
										
											9 years ago
+								    """ Returns True iff the content should be blocked """
-												Respect age_limit when listing extractors (Fixes #4653)

											
										
										
											10 years ago
 								    if age_limit is None:  # No limit set
 								        return False
 								    if content_limit is None:
 								        return False  # Content available for everyone
 								    return age_limit < content_limit
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											10 years ago
 								def is_html(first_bytes):
 								    """ Detect whether a file contains HTML by examining its first bytes. """
 								    BOMS = [
 								        (b'\xef\xbb\xbf', 'utf-8'),
 								        (b'\x00\x00\xfe\xff', 'utf-32-be'),
 								        (b'\xff\xfe\x00\x00', 'utf-32-le'),
 								        (b'\xff\xfe', 'utf-16-le'),
 								        (b'\xfe\xff', 'utf-16-be'),
 								    ]
 								    for bom, enc in BOMS:
 								        if first_bytes.startswith(bom):
 								            s = first_bytes[len(bom):].decode(enc, 'replace')
 								            break
 								    else:
 								        s = first_bytes.decode('utf-8', 'replace')
 								    return re.match(r'^\s*<', s)
-												[downloader] Improve downloader selection

											
										
										
											10 years ago
 								def determine_protocol(info_dict):
 								    protocol = info_dict.get('protocol')
 								    if protocol is not None:
 								        return protocol
 								    url = info_dict['url']
 								    if url.startswith('rtmp'):
 								        return 'rtmp'
 								    elif url.startswith('mms'):
 								        return 'mms'
 								    elif url.startswith('rtsp'):
 								        return 'rtsp'
 								    ext = determine_ext(url)
 								    if ext == 'm3u8':
 								        return 'm3u8'
 								    elif ext == 'f4m':
 								        return 'f4m'
 								    return compat_urllib_parse_urlparse(url).scheme
-												Add --list-thumbnails

											
										
										
											10 years ago
 								def render_table(header_row, data):
 								    """ Render a list of rows, each as a list of values """
 								    table = [header_row] + data
 								    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
 								    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
 								    return '\n'.join(format_str % tuple(row) for row in table)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											10 years ago
 								def _match_one(filter_part, dct):
 								    COMPARISON_OPERATORS = {
 								        '<': operator.lt,
 								        '<=': operator.le,
 								        '>': operator.gt,
 								        '>=': operator.ge,
 								        '=': operator.eq,
 								        '!=': operator.ne,
 								    }
 								    operator_rex = re.compile(r'''(?x)\s*
 								        (?P<key>[a-z_]+)
 								        \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 								        (?:
 								            (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
-												[utils] Add support for quoted string literals in --match-filter (closes #8050, closes #12142, closes #12144)

											
										
										
											8 years ago
+								            (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											10 years ago
+								            (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
 								        )
 								        \s*$
 								        ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
 								    m = operator_rex.search(filter_part)
 								    if m:
 								        op = COMPARISON_OPERATORS[m.group('op')]
-												[utils] Fix --match-filter for int-like strings (closes #11082)

											
										
										
											8 years ago
+								        actual_value = dct.get(m.group('key'))
-												[utils] Add support for quoted string literals in --match-filter (closes #8050, closes #12142, closes #12144)

											
										
										
											8 years ago
+								        if (m.group('quotedstrval') is not None or
 								            m.group('strval') is not None or
-												[utils] Fix --match-filter for int-like strings (closes #11082)

											
										
										
											8 years ago
+								            # If the original field is a string and matching comparisonvalue is
 								            # a number we should respect the origin of the original field
 								            # and process comparison value as a string (see
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								            # https://github.com/ytdl-org/youtube-dl/issues/11082).
-												[utils] Fix --match-filter for int-like strings (closes #11082)

											
										
										
											8 years ago
+								            actual_value is not None and m.group('intval') is not None and
 								                isinstance(actual_value, compat_str)):
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											10 years ago
+								            if m.group('op') not in ('=', '!='):
 								                raise ValueError(
 								                    'Operator %s does not support string values!' % m.group('op'))
-												[utils] Add support for quoted string literals in --match-filter (closes #8050, closes #12142, closes #12144)

											
										
										
											8 years ago
+								            comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
 								            quote = m.group('quote')
 								            if quote is not None:
 								                comparison_value = comparison_value.replace(r'\%s' % quote, quote)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											10 years ago
+								        else:
 								            try:
 								                comparison_value = int(m.group('intval'))
 								            except ValueError:
 								                comparison_value = parse_filesize(m.group('intval'))
 								                if comparison_value is None:
 								                    comparison_value = parse_filesize(m.group('intval') + 'B')
 								                if comparison_value is None:
 								                    raise ValueError(
 								                        'Invalid integer value %r in filter part %r' % (
 								                            m.group('intval'), filter_part))
 								        if actual_value is None:
 								            return m.group('none_inclusive')
 								        return op(actual_value, comparison_value)
 								    UNARY_OPERATORS = {
-												[utils] Fix match_str for boolean meta fields

											
										
										
											7 years ago
+								        '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
 								        '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											10 years ago
+								    }
 								    operator_rex = re.compile(r'''(?x)\s*
 								        (?P<op>%s)\s*(?P<key>[a-z_]+)
 								        \s*$
 								        ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
 								    m = operator_rex.search(filter_part)
 								    if m:
 								        op = UNARY_OPERATORS[m.group('op')]
 								        actual_value = dct.get(m.group('key'))
 								        return op(actual_value)
 								    raise ValueError('Invalid filter part %r' % filter_part)
 								def match_str(filter_str, dct):
 								    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
 								    return all(
 								        _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
 								def match_filter_func(filter_str):
 								    def _match_func(info_dict):
 								        if match_str(filter_str, info_dict):
 								            return None
 								        else:
 								            video_title = info_dict.get('title', info_dict.get('id', 'video'))
 								            return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
 								    return _match_func
-												[letv] Add --cn-verification-proxy (Closes #5077)

											
										
										
											10 years ago
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
+								def parse_dfxp_time_expr(time_expr):
 								    if not time_expr:
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											9 years ago
+								        return
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
 								    mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
 								    if mobj:
 								        return float(mobj.group('time_offset'))
-												[utils] Support alternative timestamp format in TTML

Fixes #7608

											
										
										
											9 years ago
+								    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
+								    if mobj:
-												[utils] Support alternative timestamp format in TTML

Fixes #7608

											
										
										
											9 years ago
+								        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils,common] Merge format_srt_time and _subtitles_timecode

format_srt_time uses a comma as the delimiter between seconds and
milliseconds while _subtitles_timecode uses a dot. All .srt examples I
found on the Internet uses a comma, so I use a comma in the merged
version. See http://matroska.org/technical/specs/subtitles/srt.html and
http://devel.aegisub.org/wiki/SubtitleFormats/SRT

											
										
										
											10 years ago
+								def srt_subtitles_timecode(seconds):
 								    return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
 								def dfxp2srt(dfxp_data):
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											7 years ago
+								    '''
 								    @param dfxp_data A bytes-like object containing DFXP data
 								    @returns A unicode object containing converted SRT data
 								    '''
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								    LEGACY_NAMESPACES = (
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											7 years ago
+								        (b'http://www.w3.org/ns/ttml', [
 								            b'http://www.w3.org/2004/11/ttaf1',
 								            b'http://www.w3.org/2006/04/ttaf1',
 								            b'http://www.w3.org/2006/10/ttaf1',
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								        ]),
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											7 years ago
+								        (b'http://www.w3.org/ns/ttml#styling', [
 								            b'http://www.w3.org/ns/ttml#style',
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								        ]),
 								    )
 								    SUPPORTED_STYLING = [
 								        'color',
 								        'fontFamily',
 								        'fontSize',
 								        'fontStyle',
 								        'fontWeight',
 								        'textDecoration'
 								    ]
-												[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038

											
										
										
											10 years ago
+								    _x = functools.partial(xpath_with_ns, ns_map={
-												[utils] fix style id extraction for namespaced id attribute(closes #16551)

											
										
										
											7 years ago
+								        'xml': 'http://www.w3.org/XML/1998/namespace',
-												[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038

											
										
										
											10 years ago
+								        'ttml': 'http://www.w3.org/ns/ttml',
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								        'tts': 'http://www.w3.org/ns/ttml#styling',
-												[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038

											
										
										
											10 years ago
+								    })
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								    styles = {}
 								    default_style = {}
-												[utils] dfxp2srt: make TTMLPElementParser inherit from object

For consistency between python 2 and 3.

											
										
										
											9 years ago
+								    class TTMLPElementParser(object):
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								        _out = ''
 								        _unclosed_elements = []
 								        _applied_styles = []
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											9 years ago
+								        def start(self, tag, attrib):
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								            if tag in (_x('ttml:br'), 'br'):
 								                self._out += '\n'
 								            else:
 								                unclosed_elements = []
 								                style = {}
 								                element_style_id = attrib.get('style')
 								                if default_style:
 								                    style.update(default_style)
 								                if element_style_id:
 								                    style.update(styles.get(element_style_id, {}))
 								                for prop in SUPPORTED_STYLING:
 								                    prop_val = attrib.get(_x('tts:' + prop))
 								                    if prop_val:
 								                        style[prop] = prop_val
 								                if style:
 								                    font = ''
 								                    for k, v in sorted(style.items()):
 								                        if self._applied_styles and self._applied_styles[-1].get(k) == v:
 								                            continue
 								                        if k == 'color':
 								                            font += ' color="%s"' % v
 								                        elif k == 'fontSize':
 								                            font += ' size="%s"' % v
 								                        elif k == 'fontFamily':
 								                            font += ' face="%s"' % v
 								                        elif k == 'fontWeight' and v == 'bold':
 								                            self._out += '<b>'
 								                            unclosed_elements.append('b')
 								                        elif k == 'fontStyle' and v == 'italic':
 								                            self._out += '<i>'
 								                            unclosed_elements.append('i')
 								                        elif k == 'textDecoration' and v == 'underline':
 								                            self._out += '<u>'
 								                            unclosed_elements.append('u')
 								                    if font:
 								                        self._out += '<font' + font + '>'
 								                        unclosed_elements.append('font')
 								                    applied_style = {}
 								                    if self._applied_styles:
 								                        applied_style.update(self._applied_styles[-1])
 								                    applied_style.update(style)
 								                    self._applied_styles.append(applied_style)
 								                self._unclosed_elements.append(unclosed_elements)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											9 years ago
+								        def end(self, tag):
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								            if tag not in (_x('ttml:br'), 'br'):
 								                unclosed_elements = self._unclosed_elements.pop()
 								                for element in reversed(unclosed_elements):
 								                    self._out += '</%s>' % element
 								                if unclosed_elements and self._applied_styles:
 								                    self._applied_styles.pop()
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											9 years ago
+								        def data(self, data):
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								            self._out += data
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											9 years ago
 								        def close(self):
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								            return self._out.strip()
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											9 years ago
 								    def parse_node(node):
 								        target = TTMLPElementParser()
 								        parser = xml.etree.ElementTree.XMLParser(target=target)
 								        parser.feed(xml.etree.ElementTree.tostring(node))
 								        return parser.close()
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								    for k, v in LEGACY_NAMESPACES:
 								        for ns in v:
 								            dfxp_data = dfxp_data.replace(ns, k)
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											7 years ago
+								    dfxp = compat_etree_fromstring(dfxp_data)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
+								    out = []
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
-												[utils] Support TTML without default namespace

In a strict sense such TTML is invalid, but Yahoo uses it.

											
										
										
											10 years ago
 								    if not paras:
 								        raise ValueError('Invalid dfxp/TTML subtitle')
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								    repeat = False
 								    while True:
 								        for style in dfxp.findall(_x('.//ttml:style')):
-												[utils] fix style id extraction for namespaced id attribute(closes #16551)

											
										
										
											7 years ago
+								            style_id = style.get('id') or style.get(_x('xml:id'))
 								            if not style_id:
 								                continue
-												[utils] add support for ttml styles

											
										
										
											8 years ago
+								            parent_style_id = style.get('style')
 								            if parent_style_id:
 								                if parent_style_id not in styles:
 								                    repeat = True
 								                    continue
 								                styles[style_id] = styles[parent_style_id].copy()
 								            for prop in SUPPORTED_STYLING:
 								                prop_val = style.get(_x('tts:' + prop))
 								                if prop_val:
 								                    styles.setdefault(style_id, {})[prop] = prop_val
 								        if repeat:
 								            repeat = False
 								        else:
 								            break
 								    for p in ('body', 'div'):
 								        ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
 								        if ele is None:
 								            continue
 								        style = styles.get(ele.get('style'))
 								        if not style:
 								            continue
 								        default_style.update(style)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
+								    for para, index in zip(paras, itertools.count(1)):
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											9 years ago
+								        begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
-												[utils] Support 'dur' field in TTML

											
										
										
											10 years ago
+								        end_time = parse_dfxp_time_expr(para.attrib.get('end'))
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											9 years ago
+								        dur = parse_dfxp_time_expr(para.attrib.get('dur'))
 								        if begin_time is None:
 								            continue
-												[utils] Support 'dur' field in TTML

											
										
										
											10 years ago
+								        if not end_time:
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											9 years ago
+								            if not dur:
 								                continue
 								            end_time = begin_time + dur
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
+								        out.append('%d\n%s --> %s\n%s\n\n' % (
 								            index,
-												[utils,common] Merge format_srt_time and _subtitles_timecode

format_srt_time uses a comma as the delimiter between seconds and
milliseconds while _subtitles_timecode uses a dot. All .srt examples I
found on the Internet uses a comma, so I use a comma in the merged
version. See http://matroska.org/technical/specs/subtitles/srt.html and
http://devel.aegisub.org/wiki/SubtitleFormats/SRT

											
										
										
											10 years ago
+								            srt_subtitles_timecode(begin_time),
 								            srt_subtitles_timecode(end_time),
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											10 years ago
+								            parse_node(para)))
 								    return ''.join(out)
-												[utils] Generalize cli option converters

											
										
										
											9 years ago
+								def cli_option(params, command_option, param):
 								    param = params.get(param)
-												[external/curl] respect more downloader options and display progress

											
										
										
											8 years ago
+								    if param:
 								        param = compat_str(param)
-												[utils] Generalize cli option converters

											
										
										
											9 years ago
+								    return [command_option, param] if param is not None else []
 								def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
 								    param = params.get(param)
-												[utils] Skip missing params in cli_bool_option (closes #13865)

											
										
										
											7 years ago
+								    if param is None:
 								        return []
-												[utils] Generalize cli option converters

											
										
										
											9 years ago
+								    assert isinstance(param, bool)
 								    if separator:
 								        return [command_option + separator + (true_value if param else false_value)]
 								    return [command_option, true_value if param else false_value]
 								def cli_valueless_option(params, command_option, param, expected_value=True):
 								    param = params.get(param)
 								    return [command_option] if param == expected_value else []
 								def cli_configuration_args(params, param, default=[]):
 								    ex_args = params.get(param)
 								    if ex_args is None:
 								        return default
 								    assert isinstance(ex_args, list)
 								    return ex_args
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											10 years ago
+								class ISO639Utils(object):
 								    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 								    _lang_map = {
 								        'aa': 'aar',
 								        'ab': 'abk',
 								        'ae': 'ave',
 								        'af': 'afr',
 								        'ak': 'aka',
 								        'am': 'amh',
 								        'an': 'arg',
 								        'ar': 'ara',
 								        'as': 'asm',
 								        'av': 'ava',
 								        'ay': 'aym',
 								        'az': 'aze',
 								        'ba': 'bak',
 								        'be': 'bel',
 								        'bg': 'bul',
 								        'bh': 'bih',
 								        'bi': 'bis',
 								        'bm': 'bam',
 								        'bn': 'ben',
 								        'bo': 'bod',
 								        'br': 'bre',
 								        'bs': 'bos',
 								        'ca': 'cat',
 								        'ce': 'che',
 								        'ch': 'cha',
 								        'co': 'cos',
 								        'cr': 'cre',
 								        'cs': 'ces',
 								        'cu': 'chu',
 								        'cv': 'chv',
 								        'cy': 'cym',
 								        'da': 'dan',
 								        'de': 'deu',
 								        'dv': 'div',
 								        'dz': 'dzo',
 								        'ee': 'ewe',
 								        'el': 'ell',
 								        'en': 'eng',
 								        'eo': 'epo',
 								        'es': 'spa',
 								        'et': 'est',
 								        'eu': 'eus',
 								        'fa': 'fas',
 								        'ff': 'ful',
 								        'fi': 'fin',
 								        'fj': 'fij',
 								        'fo': 'fao',
 								        'fr': 'fra',
 								        'fy': 'fry',
 								        'ga': 'gle',
 								        'gd': 'gla',
 								        'gl': 'glg',
 								        'gn': 'grn',
 								        'gu': 'guj',
 								        'gv': 'glv',
 								        'ha': 'hau',
 								        'he': 'heb',
-												[utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765)

											
										
										
											6 years ago
+								        'iw': 'heb',  # Replaced by he in 1989 revision
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											10 years ago
+								        'hi': 'hin',
 								        'ho': 'hmo',
 								        'hr': 'hrv',
 								        'ht': 'hat',
 								        'hu': 'hun',
 								        'hy': 'hye',
 								        'hz': 'her',
 								        'ia': 'ina',
 								        'id': 'ind',
-												[utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765)

											
										
										
											6 years ago
+								        'in': 'ind',  # Replaced by id in 1989 revision
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											10 years ago
+								        'ie': 'ile',
 								        'ig': 'ibo',
 								        'ii': 'iii',
 								        'ik': 'ipk',
 								        'io': 'ido',
 								        'is': 'isl',
 								        'it': 'ita',
 								        'iu': 'iku',
 								        'ja': 'jpn',
 								        'jv': 'jav',
 								        'ka': 'kat',
 								        'kg': 'kon',
 								        'ki': 'kik',
 								        'kj': 'kua',
 								        'kk': 'kaz',
 								        'kl': 'kal',
 								        'km': 'khm',
 								        'kn': 'kan',
 								        'ko': 'kor',
 								        'kr': 'kau',
 								        'ks': 'kas',
 								        'ku': 'kur',
 								        'kv': 'kom',
 								        'kw': 'cor',
 								        'ky': 'kir',
 								        'la': 'lat',
 								        'lb': 'ltz',
 								        'lg': 'lug',
 								        'li': 'lim',
 								        'ln': 'lin',
 								        'lo': 'lao',
 								        'lt': 'lit',
 								        'lu': 'lub',
 								        'lv': 'lav',
 								        'mg': 'mlg',
 								        'mh': 'mah',
 								        'mi': 'mri',
 								        'mk': 'mkd',
 								        'ml': 'mal',
 								        'mn': 'mon',
 								        'mr': 'mar',
 								        'ms': 'msa',
 								        'mt': 'mlt',
 								        'my': 'mya',
 								        'na': 'nau',
 								        'nb': 'nob',
 								        'nd': 'nde',
 								        'ne': 'nep',
 								        'ng': 'ndo',
 								        'nl': 'nld',
 								        'nn': 'nno',
 								        'no': 'nor',
 								        'nr': 'nbl',
 								        'nv': 'nav',
 								        'ny': 'nya',
 								        'oc': 'oci',
 								        'oj': 'oji',
 								        'om': 'orm',
 								        'or': 'ori',
 								        'os': 'oss',
 								        'pa': 'pan',
 								        'pi': 'pli',
 								        'pl': 'pol',
 								        'ps': 'pus',
 								        'pt': 'por',
 								        'qu': 'que',
 								        'rm': 'roh',
 								        'rn': 'run',
 								        'ro': 'ron',
 								        'ru': 'rus',
 								        'rw': 'kin',
 								        'sa': 'san',
 								        'sc': 'srd',
 								        'sd': 'snd',
 								        'se': 'sme',
 								        'sg': 'sag',
 								        'si': 'sin',
 								        'sk': 'slk',
 								        'sl': 'slv',
 								        'sm': 'smo',
 								        'sn': 'sna',
 								        'so': 'som',
 								        'sq': 'sqi',
 								        'sr': 'srp',
 								        'ss': 'ssw',
 								        'st': 'sot',
 								        'su': 'sun',
 								        'sv': 'swe',
 								        'sw': 'swa',
 								        'ta': 'tam',
 								        'te': 'tel',
 								        'tg': 'tgk',
 								        'th': 'tha',
 								        'ti': 'tir',
 								        'tk': 'tuk',
 								        'tl': 'tgl',
 								        'tn': 'tsn',
 								        'to': 'ton',
 								        'tr': 'tur',
 								        'ts': 'tso',
 								        'tt': 'tat',
 								        'tw': 'twi',
 								        'ty': 'tah',
 								        'ug': 'uig',
 								        'uk': 'ukr',
 								        'ur': 'urd',
 								        'uz': 'uzb',
 								        've': 'ven',
 								        'vi': 'vie',
 								        'vo': 'vol',
 								        'wa': 'wln',
 								        'wo': 'wol',
 								        'xh': 'xho',
 								        'yi': 'yid',
-												[utils] Fix typo

											
										
										
											6 years ago
+								        'ji': 'yid',  # Replaced by yi in 1989 revision
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											10 years ago
+								        'yo': 'yor',
 								        'za': 'zha',
 								        'zh': 'zho',
 								        'zu': 'zul',
 								    }
 								    @classmethod
 								    def short2long(cls, code):
 								        """Convert language code from ISO 639-1 to ISO 639-2/T"""
 								        return cls._lang_map.get(code[:2])
 								    @classmethod
 								    def long2short(cls, code):
 								        """Convert language code from ISO 639-2/T to ISO 639-1"""
 								        for short_name, long_name in cls._lang_map.items():
 								            if long_name == code:
 								                return short_name
-												[utils] Add ISO3166Utils

											
										
										
											10 years ago
+								class ISO3166Utils(object):
 								    # From http://data.okfn.org/data/core/country-list
 								    _country_map = {
 								        'AF': 'Afghanistan',
 								        'AX': 'Åland Islands',
 								        'AL': 'Albania',
 								        'DZ': 'Algeria',
 								        'AS': 'American Samoa',
 								        'AD': 'Andorra',
 								        'AO': 'Angola',
 								        'AI': 'Anguilla',
 								        'AQ': 'Antarctica',
 								        'AG': 'Antigua and Barbuda',
 								        'AR': 'Argentina',
 								        'AM': 'Armenia',
 								        'AW': 'Aruba',
 								        'AU': 'Australia',
 								        'AT': 'Austria',
 								        'AZ': 'Azerbaijan',
 								        'BS': 'Bahamas',
 								        'BH': 'Bahrain',
 								        'BD': 'Bangladesh',
 								        'BB': 'Barbados',
 								        'BY': 'Belarus',
 								        'BE': 'Belgium',
 								        'BZ': 'Belize',
 								        'BJ': 'Benin',
 								        'BM': 'Bermuda',
 								        'BT': 'Bhutan',
 								        'BO': 'Bolivia, Plurinational State of',
 								        'BQ': 'Bonaire, Sint Eustatius and Saba',
 								        'BA': 'Bosnia and Herzegovina',
 								        'BW': 'Botswana',
 								        'BV': 'Bouvet Island',
 								        'BR': 'Brazil',
 								        'IO': 'British Indian Ocean Territory',
 								        'BN': 'Brunei Darussalam',
 								        'BG': 'Bulgaria',
 								        'BF': 'Burkina Faso',
 								        'BI': 'Burundi',
 								        'KH': 'Cambodia',
 								        'CM': 'Cameroon',
 								        'CA': 'Canada',
 								        'CV': 'Cape Verde',
 								        'KY': 'Cayman Islands',
 								        'CF': 'Central African Republic',
 								        'TD': 'Chad',
 								        'CL': 'Chile',
 								        'CN': 'China',
 								        'CX': 'Christmas Island',
 								        'CC': 'Cocos (Keeling) Islands',
 								        'CO': 'Colombia',
 								        'KM': 'Comoros',
 								        'CG': 'Congo',
 								        'CD': 'Congo, the Democratic Republic of the',
 								        'CK': 'Cook Islands',
 								        'CR': 'Costa Rica',
 								        'CI': 'Côte d\'Ivoire',
 								        'HR': 'Croatia',
 								        'CU': 'Cuba',
 								        'CW': 'Curaçao',
 								        'CY': 'Cyprus',
 								        'CZ': 'Czech Republic',
 								        'DK': 'Denmark',
 								        'DJ': 'Djibouti',
 								        'DM': 'Dominica',
 								        'DO': 'Dominican Republic',
 								        'EC': 'Ecuador',
 								        'EG': 'Egypt',
 								        'SV': 'El Salvador',
 								        'GQ': 'Equatorial Guinea',
 								        'ER': 'Eritrea',
 								        'EE': 'Estonia',
 								        'ET': 'Ethiopia',
 								        'FK': 'Falkland Islands (Malvinas)',
 								        'FO': 'Faroe Islands',
 								        'FJ': 'Fiji',
 								        'FI': 'Finland',
 								        'FR': 'France',
 								        'GF': 'French Guiana',
 								        'PF': 'French Polynesia',
 								        'TF': 'French Southern Territories',
 								        'GA': 'Gabon',
 								        'GM': 'Gambia',
 								        'GE': 'Georgia',
 								        'DE': 'Germany',
 								        'GH': 'Ghana',
 								        'GI': 'Gibraltar',
 								        'GR': 'Greece',
 								        'GL': 'Greenland',
 								        'GD': 'Grenada',
 								        'GP': 'Guadeloupe',
 								        'GU': 'Guam',
 								        'GT': 'Guatemala',
 								        'GG': 'Guernsey',
 								        'GN': 'Guinea',
 								        'GW': 'Guinea-Bissau',
 								        'GY': 'Guyana',
 								        'HT': 'Haiti',
 								        'HM': 'Heard Island and McDonald Islands',
 								        'VA': 'Holy See (Vatican City State)',
 								        'HN': 'Honduras',
 								        'HK': 'Hong Kong',
 								        'HU': 'Hungary',
 								        'IS': 'Iceland',
 								        'IN': 'India',
 								        'ID': 'Indonesia',
 								        'IR': 'Iran, Islamic Republic of',
 								        'IQ': 'Iraq',
 								        'IE': 'Ireland',
 								        'IM': 'Isle of Man',
 								        'IL': 'Israel',
 								        'IT': 'Italy',
 								        'JM': 'Jamaica',
 								        'JP': 'Japan',
 								        'JE': 'Jersey',
 								        'JO': 'Jordan',
 								        'KZ': 'Kazakhstan',
 								        'KE': 'Kenya',
 								        'KI': 'Kiribati',
 								        'KP': 'Korea, Democratic People\'s Republic of',
 								        'KR': 'Korea, Republic of',
 								        'KW': 'Kuwait',
 								        'KG': 'Kyrgyzstan',
 								        'LA': 'Lao People\'s Democratic Republic',
 								        'LV': 'Latvia',
 								        'LB': 'Lebanon',
 								        'LS': 'Lesotho',
 								        'LR': 'Liberia',
 								        'LY': 'Libya',
 								        'LI': 'Liechtenstein',
 								        'LT': 'Lithuania',
 								        'LU': 'Luxembourg',
 								        'MO': 'Macao',
 								        'MK': 'Macedonia, the Former Yugoslav Republic of',
 								        'MG': 'Madagascar',
 								        'MW': 'Malawi',
 								        'MY': 'Malaysia',
 								        'MV': 'Maldives',
 								        'ML': 'Mali',
 								        'MT': 'Malta',
 								        'MH': 'Marshall Islands',
 								        'MQ': 'Martinique',
 								        'MR': 'Mauritania',
 								        'MU': 'Mauritius',
 								        'YT': 'Mayotte',
 								        'MX': 'Mexico',
 								        'FM': 'Micronesia, Federated States of',
 								        'MD': 'Moldova, Republic of',
 								        'MC': 'Monaco',
 								        'MN': 'Mongolia',
 								        'ME': 'Montenegro',
 								        'MS': 'Montserrat',
 								        'MA': 'Morocco',
 								        'MZ': 'Mozambique',
 								        'MM': 'Myanmar',
 								        'NA': 'Namibia',
 								        'NR': 'Nauru',
 								        'NP': 'Nepal',
 								        'NL': 'Netherlands',
 								        'NC': 'New Caledonia',
 								        'NZ': 'New Zealand',
 								        'NI': 'Nicaragua',
 								        'NE': 'Niger',
 								        'NG': 'Nigeria',
 								        'NU': 'Niue',
 								        'NF': 'Norfolk Island',
 								        'MP': 'Northern Mariana Islands',
 								        'NO': 'Norway',
 								        'OM': 'Oman',
 								        'PK': 'Pakistan',
 								        'PW': 'Palau',
 								        'PS': 'Palestine, State of',
 								        'PA': 'Panama',
 								        'PG': 'Papua New Guinea',
 								        'PY': 'Paraguay',
 								        'PE': 'Peru',
 								        'PH': 'Philippines',
 								        'PN': 'Pitcairn',
 								        'PL': 'Poland',
 								        'PT': 'Portugal',
 								        'PR': 'Puerto Rico',
 								        'QA': 'Qatar',
 								        'RE': 'Réunion',
 								        'RO': 'Romania',
 								        'RU': 'Russian Federation',
 								        'RW': 'Rwanda',
 								        'BL': 'Saint Barthélemy',
 								        'SH': 'Saint Helena, Ascension and Tristan da Cunha',
 								        'KN': 'Saint Kitts and Nevis',
 								        'LC': 'Saint Lucia',
 								        'MF': 'Saint Martin (French part)',
 								        'PM': 'Saint Pierre and Miquelon',
 								        'VC': 'Saint Vincent and the Grenadines',
 								        'WS': 'Samoa',
 								        'SM': 'San Marino',
 								        'ST': 'Sao Tome and Principe',
 								        'SA': 'Saudi Arabia',
 								        'SN': 'Senegal',
 								        'RS': 'Serbia',
 								        'SC': 'Seychelles',
 								        'SL': 'Sierra Leone',
 								        'SG': 'Singapore',
 								        'SX': 'Sint Maarten (Dutch part)',
 								        'SK': 'Slovakia',
 								        'SI': 'Slovenia',
 								        'SB': 'Solomon Islands',
 								        'SO': 'Somalia',
 								        'ZA': 'South Africa',
 								        'GS': 'South Georgia and the South Sandwich Islands',
 								        'SS': 'South Sudan',
 								        'ES': 'Spain',
 								        'LK': 'Sri Lanka',
 								        'SD': 'Sudan',
 								        'SR': 'Suriname',
 								        'SJ': 'Svalbard and Jan Mayen',
 								        'SZ': 'Swaziland',
 								        'SE': 'Sweden',
 								        'CH': 'Switzerland',
 								        'SY': 'Syrian Arab Republic',
 								        'TW': 'Taiwan, Province of China',
 								        'TJ': 'Tajikistan',
 								        'TZ': 'Tanzania, United Republic of',
 								        'TH': 'Thailand',
 								        'TL': 'Timor-Leste',
 								        'TG': 'Togo',
 								        'TK': 'Tokelau',
 								        'TO': 'Tonga',
 								        'TT': 'Trinidad and Tobago',
 								        'TN': 'Tunisia',
 								        'TR': 'Turkey',
 								        'TM': 'Turkmenistan',
 								        'TC': 'Turks and Caicos Islands',
 								        'TV': 'Tuvalu',
 								        'UG': 'Uganda',
 								        'UA': 'Ukraine',
 								        'AE': 'United Arab Emirates',
 								        'GB': 'United Kingdom',
 								        'US': 'United States',
 								        'UM': 'United States Minor Outlying Islands',
 								        'UY': 'Uruguay',
 								        'UZ': 'Uzbekistan',
 								        'VU': 'Vanuatu',
 								        'VE': 'Venezuela, Bolivarian Republic of',
 								        'VN': 'Viet Nam',
 								        'VG': 'Virgin Islands, British',
 								        'VI': 'Virgin Islands, U.S.',
 								        'WF': 'Wallis and Futuna',
 								        'EH': 'Western Sahara',
 								        'YE': 'Yemen',
 								        'ZM': 'Zambia',
 								        'ZW': 'Zimbabwe',
 								    }
 								    @classmethod
 								    def short2full(cls, code):
 								        """Convert an ISO 3166-2 country code to the corresponding full name"""
 								        return cls._country_map.get(code.upper())
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											8 years ago
+								class GeoUtils(object):
 								    # Major IPv4 address blocks per country
 								    _country_ip_map = {
 								        'AD': '85.94.160.0/19',
 								        'AE': '94.200.0.0/13',
 								        'AF': '149.54.0.0/17',
 								        'AG': '209.59.64.0/18',
 								        'AI': '204.14.248.0/21',
 								        'AL': '46.99.0.0/16',
 								        'AM': '46.70.0.0/15',
 								        'AO': '105.168.0.0/13',
 								        'AP': '159.117.192.0/21',
 								        'AR': '181.0.0.0/12',
 								        'AS': '202.70.112.0/20',
 								        'AT': '84.112.0.0/13',
 								        'AU': '1.128.0.0/11',
 								        'AW': '181.41.0.0/18',
 								        'AZ': '5.191.0.0/16',
 								        'BA': '31.176.128.0/17',
 								        'BB': '65.48.128.0/17',
 								        'BD': '114.130.0.0/16',
 								        'BE': '57.0.0.0/8',
 								        'BF': '129.45.128.0/17',
 								        'BG': '95.42.0.0/15',
 								        'BH': '37.131.0.0/17',
 								        'BI': '154.117.192.0/18',
 								        'BJ': '137.255.0.0/16',
 								        'BL': '192.131.134.0/24',
 								        'BM': '196.12.64.0/18',
 								        'BN': '156.31.0.0/16',
 								        'BO': '161.56.0.0/16',
 								        'BQ': '161.0.80.0/20',
 								        'BR': '152.240.0.0/12',
 								        'BS': '24.51.64.0/18',
 								        'BT': '119.2.96.0/19',
 								        'BW': '168.167.0.0/16',
 								        'BY': '178.120.0.0/13',
 								        'BZ': '179.42.192.0/18',
 								        'CA': '99.224.0.0/11',
 								        'CD': '41.243.0.0/16',
 								        'CF': '196.32.200.0/21',
 								        'CG': '197.214.128.0/17',
 								        'CH': '85.0.0.0/13',
 								        'CI': '154.232.0.0/14',
 								        'CK': '202.65.32.0/19',
 								        'CL': '152.172.0.0/14',
 								        'CM': '165.210.0.0/15',
 								        'CN': '36.128.0.0/10',
 								        'CO': '181.240.0.0/12',
 								        'CR': '201.192.0.0/12',
 								        'CU': '152.206.0.0/15',
 								        'CV': '165.90.96.0/19',
 								        'CW': '190.88.128.0/17',
 								        'CY': '46.198.0.0/15',
 								        'CZ': '88.100.0.0/14',
 								        'DE': '53.0.0.0/8',
 								        'DJ': '197.241.0.0/17',
 								        'DK': '87.48.0.0/12',
 								        'DM': '192.243.48.0/20',
 								        'DO': '152.166.0.0/15',
 								        'DZ': '41.96.0.0/12',
 								        'EC': '186.68.0.0/15',
 								        'EE': '90.190.0.0/15',
 								        'EG': '156.160.0.0/11',
 								        'ER': '196.200.96.0/20',
 								        'ES': '88.0.0.0/11',
 								        'ET': '196.188.0.0/14',
 								        'EU': '2.16.0.0/13',
 								        'FI': '91.152.0.0/13',
 								        'FJ': '144.120.0.0/16',
 								        'FM': '119.252.112.0/20',
 								        'FO': '88.85.32.0/19',
 								        'FR': '90.0.0.0/9',
 								        'GA': '41.158.0.0/15',
 								        'GB': '25.0.0.0/8',
 								        'GD': '74.122.88.0/21',
 								        'GE': '31.146.0.0/16',
 								        'GF': '161.22.64.0/18',
 								        'GG': '62.68.160.0/19',
 								        'GH': '45.208.0.0/14',
 								        'GI': '85.115.128.0/19',
 								        'GL': '88.83.0.0/19',
 								        'GM': '160.182.0.0/15',
 								        'GN': '197.149.192.0/18',
 								        'GP': '104.250.0.0/19',
 								        'GQ': '105.235.224.0/20',
 								        'GR': '94.64.0.0/13',
 								        'GT': '168.234.0.0/16',
 								        'GU': '168.123.0.0/16',
 								        'GW': '197.214.80.0/20',
 								        'GY': '181.41.64.0/18',
 								        'HK': '113.252.0.0/14',
 								        'HN': '181.210.0.0/16',
 								        'HR': '93.136.0.0/13',
 								        'HT': '148.102.128.0/17',
 								        'HU': '84.0.0.0/14',
 								        'ID': '39.192.0.0/10',
 								        'IE': '87.32.0.0/12',
 								        'IL': '79.176.0.0/13',
 								        'IM': '5.62.80.0/20',
 								        'IN': '117.192.0.0/10',
 								        'IO': '203.83.48.0/21',
 								        'IQ': '37.236.0.0/14',
 								        'IR': '2.176.0.0/12',
 								        'IS': '82.221.0.0/16',
 								        'IT': '79.0.0.0/10',
 								        'JE': '87.244.64.0/18',
 								        'JM': '72.27.0.0/17',
 								        'JO': '176.29.0.0/16',
 								        'JP': '126.0.0.0/8',
 								        'KE': '105.48.0.0/12',
 								        'KG': '158.181.128.0/17',
 								        'KH': '36.37.128.0/17',
 								        'KI': '103.25.140.0/22',
 								        'KM': '197.255.224.0/20',
 								        'KN': '198.32.32.0/19',
 								        'KP': '175.45.176.0/22',
 								        'KR': '175.192.0.0/10',
 								        'KW': '37.36.0.0/14',
 								        'KY': '64.96.0.0/15',
 								        'KZ': '2.72.0.0/13',
 								        'LA': '115.84.64.0/18',
 								        'LB': '178.135.0.0/16',
 								        'LC': '192.147.231.0/24',
 								        'LI': '82.117.0.0/19',
 								        'LK': '112.134.0.0/15',
 								        'LR': '41.86.0.0/19',
 								        'LS': '129.232.0.0/17',
 								        'LT': '78.56.0.0/13',
 								        'LU': '188.42.0.0/16',
 								        'LV': '46.109.0.0/16',
 								        'LY': '41.252.0.0/14',
 								        'MA': '105.128.0.0/11',
 								        'MC': '88.209.64.0/18',
 								        'MD': '37.246.0.0/16',
 								        'ME': '178.175.0.0/17',
 								        'MF': '74.112.232.0/21',
 								        'MG': '154.126.0.0/17',
 								        'MH': '117.103.88.0/21',
 								        'MK': '77.28.0.0/15',
 								        'ML': '154.118.128.0/18',
 								        'MM': '37.111.0.0/17',
 								        'MN': '49.0.128.0/17',
 								        'MO': '60.246.0.0/16',
 								        'MP': '202.88.64.0/20',
 								        'MQ': '109.203.224.0/19',
 								        'MR': '41.188.64.0/18',
 								        'MS': '208.90.112.0/22',
 								        'MT': '46.11.0.0/16',
 								        'MU': '105.16.0.0/12',
 								        'MV': '27.114.128.0/18',
 								        'MW': '105.234.0.0/16',
 								        'MX': '187.192.0.0/11',
 								        'MY': '175.136.0.0/13',
 								        'MZ': '197.218.0.0/15',
 								        'NA': '41.182.0.0/16',
 								        'NC': '101.101.0.0/18',
 								        'NE': '197.214.0.0/18',
 								        'NF': '203.17.240.0/22',
 								        'NG': '105.112.0.0/12',
 								        'NI': '186.76.0.0/15',
 								        'NL': '145.96.0.0/11',
 								        'NO': '84.208.0.0/13',
 								        'NP': '36.252.0.0/15',
 								        'NR': '203.98.224.0/19',
 								        'NU': '49.156.48.0/22',
 								        'NZ': '49.224.0.0/14',
 								        'OM': '5.36.0.0/15',
 								        'PA': '186.72.0.0/15',
 								        'PE': '186.160.0.0/14',
 								        'PF': '123.50.64.0/18',
 								        'PG': '124.240.192.0/19',
 								        'PH': '49.144.0.0/13',
 								        'PK': '39.32.0.0/11',
 								        'PL': '83.0.0.0/11',
 								        'PM': '70.36.0.0/20',
 								        'PR': '66.50.0.0/16',
 								        'PS': '188.161.0.0/16',
 								        'PT': '85.240.0.0/13',
 								        'PW': '202.124.224.0/20',
 								        'PY': '181.120.0.0/14',
 								        'QA': '37.210.0.0/15',
 								        'RE': '139.26.0.0/16',
 								        'RO': '79.112.0.0/13',
 								        'RS': '178.220.0.0/14',
 								        'RU': '5.136.0.0/13',
 								        'RW': '105.178.0.0/15',
 								        'SA': '188.48.0.0/13',
 								        'SB': '202.1.160.0/19',
 								        'SC': '154.192.0.0/11',
 								        'SD': '154.96.0.0/13',
 								        'SE': '78.64.0.0/12',
 								        'SG': '152.56.0.0/14',
 								        'SI': '188.196.0.0/14',
 								        'SK': '78.98.0.0/15',
 								        'SL': '197.215.0.0/17',
 								        'SM': '89.186.32.0/19',
 								        'SN': '41.82.0.0/15',
 								        'SO': '197.220.64.0/19',
 								        'SR': '186.179.128.0/17',
 								        'SS': '105.235.208.0/21',
 								        'ST': '197.159.160.0/19',
 								        'SV': '168.243.0.0/16',
 								        'SX': '190.102.0.0/20',
 								        'SY': '5.0.0.0/16',
 								        'SZ': '41.84.224.0/19',
 								        'TC': '65.255.48.0/20',
 								        'TD': '154.68.128.0/19',
 								        'TG': '196.168.0.0/14',
 								        'TH': '171.96.0.0/13',
 								        'TJ': '85.9.128.0/18',
 								        'TK': '27.96.24.0/21',
 								        'TL': '180.189.160.0/20',
 								        'TM': '95.85.96.0/19',
 								        'TN': '197.0.0.0/11',
 								        'TO': '175.176.144.0/21',
 								        'TR': '78.160.0.0/11',
 								        'TT': '186.44.0.0/15',
 								        'TV': '202.2.96.0/19',
 								        'TW': '120.96.0.0/11',
 								        'TZ': '156.156.0.0/14',
 								        'UA': '93.72.0.0/13',
 								        'UG': '154.224.0.0/13',
 								        'US': '3.0.0.0/8',
 								        'UY': '167.56.0.0/13',
 								        'UZ': '82.215.64.0/18',
 								        'VA': '212.77.0.0/19',
 								        'VC': '24.92.144.0/20',
 								        'VE': '186.88.0.0/13',
 								        'VG': '172.103.64.0/18',
 								        'VI': '146.226.0.0/16',
 								        'VN': '14.160.0.0/11',
 								        'VU': '202.80.32.0/20',
 								        'WF': '117.20.32.0/21',
 								        'WS': '202.4.32.0/19',
 								        'YE': '134.35.0.0/16',
 								        'YT': '41.242.116.0/22',
 								        'ZA': '41.0.0.0/11',
 								        'ZM': '165.56.0.0/13',
 								        'ZW': '41.85.192.0/19',
 								    }
 								    @classmethod
-												Improve geo bypass mechanism
* Introduce geo bypass context
* Add ability to bypass based on IP blocks in CIDR notation
* Introduce --geo-bypass-ip-block

											
										
										
											7 years ago
+								    def random_ipv4(cls, code_or_block):
 								        if len(code_or_block) == 2:
 								            block = cls._country_ip_map.get(code_or_block.upper())
 								            if not block:
 								                return None
 								        else:
 								            block = code_or_block
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											8 years ago
+								        addr, preflen = block.split('/')
 								        addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
 								        addr_max = addr_min | (0xffffffff >> int(preflen))
-												[utils] Make random_ipv4 return unicode string

											
										
										
											8 years ago
+								        return compat_str(socket.inet_ntoa(
-												Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction

											
										
										
											8 years ago
+								            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											8 years ago
-												[letv] Add --cn-verification-proxy (Closes #5077)

											
										
										
											10 years ago
+								class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
-												[utils] Correct per-request proxy handling

											
										
										
											10 years ago
+								    def __init__(self, proxies=None):
 								        # Set default handlers
 								        for type in ('http', 'https'):
 								            setattr(self, '%s_open' % type,
 								                    lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
 								                        meth(r, proxy, type))
-												[utils] Remove return from __init__


											
										
										
											7 years ago
+								        compat_urllib_request.ProxyHandler.__init__(self, proxies)
-												[utils] Correct per-request proxy handling

											
										
										
											10 years ago
-												[letv] Add --cn-verification-proxy (Closes #5077)

											
										
										
											10 years ago
+								    def proxy_open(self, req, proxy, type):
-												[utils] Correct per-request proxy handling

											
										
										
											10 years ago
+								        req_proxy = req.headers.get('Ytdl-request-proxy')
-												[letv] Add --cn-verification-proxy (Closes #5077)

											
										
										
											10 years ago
+								        if req_proxy is not None:
 								            proxy = req_proxy
-												[utils] Correct per-request proxy handling

											
										
										
											10 years ago
+								            del req.headers['Ytdl-request-proxy']
 								        if proxy == '__noproxy__':
 								            return None  # No Proxy
-												[utils] Register SOCKS protocols in urllib and support SOCKS4A

											
										
										
											9 years ago
+								        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
-												[socks] Support SOCKS proxies

											
										
										
											9 years ago
+								            req.add_header('Ytdl-socks-proxy', proxy)
 								            # youtube-dl's http/https handlers do wrapping the socket with socks
 								            return None
-												[letv] Add --cn-verification-proxy (Closes #5077)

											
										
										
											10 years ago
+								        return compat_urllib_request.ProxyHandler.proxy_open(
 								            self, req, proxy, type)
-												[utils] Add OHDave's RSA encryption function

											
										
										
											9 years ago
-												[utils] Add bytes_to_long() and long_to_bytes()

Used in daisuki.net (#4738)

Both are adapted from public domain PyCrypto:
https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py

											
										
										
											8 years ago
+								# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
 								# released into Public Domain
 								# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
 								def long_to_bytes(n, blocksize=0):
 								    """long_to_bytes(n:long, blocksize:int) : string
 								    Convert a long integer to a byte string.
 								    If optional blocksize is given and greater than zero, pad the front of the
 								    byte string with binary zeros so that the length is a multiple of
 								    blocksize.
 								    """
 								    # after much testing, this algorithm was deemed to be the fastest
 								    s = b''
 								    n = int(n)
 								    while n > 0:
 								        s = compat_struct_pack('>I', n & 0xffffffff) + s
 								        n = n >> 32
 								    # strip off leading zeros
 								    for i in range(len(s)):
 								        if s[i] != b'\000'[0]:
 								            break
 								    else:
 								        # only happens when n == 0
 								        s = b'\000'
 								        i = 0
 								    s = s[i:]
 								    # add back some pad bytes.  this could be done more efficiently w.r.t. the
 								    # de-padding being done above, but sigh...
 								    if blocksize > 0 and len(s) % blocksize:
 								        s = (blocksize - len(s) % blocksize) * b'\000' + s
 								    return s
 								def bytes_to_long(s):
 								    """bytes_to_long(string) : long
 								    Convert a byte string to a long integer.
 								    This is (essentially) the inverse of long_to_bytes().
 								    """
 								    acc = 0
 								    length = len(s)
 								    if length % 4:
 								        extra = (4 - length % 4)
 								        s = b'\000' * extra + s
 								        length = length + extra
 								    for i in range(0, length, 4):
 								        acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
 								    return acc
-												[utils] Add OHDave's RSA encryption function

											
										
										
											9 years ago
+								def ohdave_rsa_encrypt(data, exponent, modulus):
 								    '''
 								    Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
 								    Input:
 								        data: data to encrypt, bytes-like object
 								        exponent, modulus: parameter e and N of RSA algorithm, both integer
 								    Output: hex string of encrypted data
 								    Limitation: supports one block encryption only
 								    '''
 								    payload = int(binascii.hexlify(data[::-1]), 16)
 								    encrypted = pow(payload, exponent, modulus)
 								    return '%x' % encrypted
-												[utils] Move base62 to utils

											
										
										
											9 years ago
-												[utils] Add pkcs1pad

Used in daisuki.net (#4738)

											
										
										
											8 years ago
+								def pkcs1pad(data, length):
 								    """
 								    Padding input data with PKCS#1 scheme
 								    @param {int[]} data        input data
 								    @param {int}   length      target length
 								    @returns {int[]}           padded data
 								    """
 								    if len(data) > length - 11:
 								        raise ValueError('Input data too long for PKCS#1 padding')
 								    pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
 								    return [0, 2] + pseudo_random + [0] + data
-												[utils] Multiple changes to base_n()

1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests

											
										
										
											9 years ago
+								def encode_base_n(num, n, table=None):
-												[utils] Merge base_n functions

											
										
										
											9 years ago
+								    FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
 								    if not table:
 								        table = FULL_TABLE[:n]
-												[utils] Multiple changes to base_n()

1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests

											
										
										
											9 years ago
+								    if n > len(table):
 								        raise ValueError('base %d exceeds table length %d' % (n, len(table)))
 								    if num == 0:
 								        return table[0]
-												[utils] Move base62 to utils

											
										
										
											9 years ago
+								    ret = ''
 								    while num:
 								        ret = table[num % n] + ret
 								        num = num // n
 								    return ret
-												[utils] Move codes for handling eval() from iqiyi.py

											
										
										
											9 years ago
 								def decode_packed_codes(code):
-												[utils] Expose PACKED_CODES_RE

											
										
										
											8 years ago
+								    mobj = re.search(PACKED_CODES_RE, code)
-												[utils] Move codes for handling eval() from iqiyi.py

											
										
										
											9 years ago
+								    obfucasted_code, base, count, symbols = mobj.groups()
 								    base = int(base)
 								    count = int(count)
 								    symbols = symbols.split('|')
 								    symbol_table = {}
 								    while count:
 								        count -= 1
-												[utils] Multiple changes to base_n()

1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests

											
										
										
											9 years ago
+								        base_n_count = encode_base_n(count, base)
-												[utils] Move codes for handling eval() from iqiyi.py

											
										
										
											9 years ago
+								        symbol_table[base_n_count] = symbols[count] or base_n_count
 								    return re.sub(
 								        r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
 								        obfucasted_code)
-												[downloader/hls] Add support for AES-128 encrypted segments in hlsnative downloader

											
										
										
											9 years ago
 								def parse_m3u8_attributes(attrib):
 								    info = {}
 								    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
 								        if val.startswith('"'):
 								            val = val[1:-1]
 								        info[key] = val
 								    return info
-												[utils] Add urshift()

Used in IqiyiIE and LeIE

											
										
										
											9 years ago
 								def urshift(val, n):
 								    return val >> n if val >= 0 else (val + 0x100000000) >> n
-												[utils] Add decode_png for openload (#9706)

											
										
										
											9 years ago
 								# Based on png2str() written by @gdkchan and improved by @yokrysty
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
-												[utils] Add decode_png for openload (#9706)

											
										
										
											9 years ago
+								def decode_png(png_data):
 								    # Reference: https://www.w3.org/TR/PNG/
 								    header = png_data[8:]
 								    if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
 								        raise IOError('Not a valid PNG file.')
 								    int_map = {1: '>B', 2: '>H', 4: '>I'}
 								    unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
 								    chunks = []
 								    while header:
 								        length = unpack_integer(header[:4])
 								        header = header[4:]
 								        chunk_type = header[:4]
 								        header = header[4:]
 								        chunk_data = header[:length]
 								        header = header[length:]
 								        header = header[4:]  # Skip CRC
 								        chunks.append({
 								            'type': chunk_type,
 								            'length': length,
 								            'data': chunk_data
 								        })
 								    ihdr = chunks[0]['data']
 								    width = unpack_integer(ihdr[:4])
 								    height = unpack_integer(ihdr[4:8])
 								    idat = b''
 								    for chunk in chunks:
 								        if chunk['type'] == b'IDAT':
 								            idat += chunk['data']
 								    if not idat:
 								        raise IOError('Unable to read PNG data.')
 								    decompressed_data = bytearray(zlib.decompress(idat))
 								    stride = width * 3
 								    pixels = []
 								    def _get_pixel(idx):
 								        x = idx % stride
 								        y = idx // stride
 								        return pixels[y][x]
 								    for y in range(height):
 								        basePos = y * (1 + stride)
 								        filter_type = decompressed_data[basePos]
 								        current_row = []
 								        pixels.append(current_row)
 								        for x in range(stride):
 								            color = decompressed_data[1 + basePos + x]
 								            basex = y * stride + x
 								            left = 0
 								            up = 0
 								            if x > 2:
 								                left = _get_pixel(basex - 3)
 								            if y > 0:
 								                up = _get_pixel(basex - stride)
 								            if filter_type == 1:  # Sub
 								                color = (color + left) & 0xff
 								            elif filter_type == 2:  # Up
 								                color = (color + up) & 0xff
 								            elif filter_type == 3:  # Average
 								                color = (color + ((left + up) >> 1)) & 0xff
 								            elif filter_type == 4:  # Paeth
 								                a = left
 								                b = up
 								                c = 0
 								                if x > 2 and y > 0:
 								                    c = _get_pixel(basex - stride - 3)
 								                p = a + b - c
 								                pa = abs(p - a)
 								                pb = abs(p - b)
 								                pc = abs(p - c)
 								                if pa <= pb and pa <= pc:
 								                    color = (color + a) & 0xff
 								                elif pb <= pc:
 								                    color = (color + b) & 0xff
 								                else:
 								                    color = (color + c) & 0xff
 								            current_row.append(color)
 								    return width, height, pixels
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
 								def write_xattr(path, key, value):
 								    # This mess below finds the best xattr tool for the job
 								    try:
 								        # try the pyxattr module...
 								        import xattr
-												[utils] Support xattr as well as pyxattr

Closes #9054

There are two xattr packages in Python, pyxattr [1] and xattr [2]. They
have different APIs.

In old days pyxattr supports Linux only and xattr supports Linux, Mac,
FreeBSD and Solaris, and pyxattr supports Linux only. Recently pyxattr
adds support for Mac OS X. [3]

An old version of [2] is shipped with Mac OS X. However, some Linux
distributions have pyxattr only, for example PLD-Linux [4] and old Arch
Linux. [5] As a result, supporting both is the way to go.

[1] https://github.com/iustin/pyxattr
[2] https://github.com/xattr/xattr
[3] https://github.com/iustin/pyxattr/pull/9
[4] https://github.com/rg3/youtube-dl/issues/5498
[5] https://git.archlinux.org/svntogit/community.git/commit/?id=427c4c76401e386d865ccddea4fbfdc74df80492
    https://git.archlinux.org/svntogit/community.git/commit/?id=59b40da7b69622a6761d364a8b07909e9cccaa56
    python-xattr is added on 2016/06/29 while pyxattr is there for more
    than 6 years

											
										
										
											8 years ago
+								        if hasattr(xattr, 'set'):  # pyxattr
 								            # Unicode arguments are not supported in python-pyxattr until
 								            # version 0.5.0
-												Start moving to ytdl-org

											
										
										
											6 years ago
+								            # See https://github.com/ytdl-org/youtube-dl/issues/5498
-												[utils] Support xattr as well as pyxattr

Closes #9054

There are two xattr packages in Python, pyxattr [1] and xattr [2]. They
have different APIs.

In old days pyxattr supports Linux only and xattr supports Linux, Mac,
FreeBSD and Solaris, and pyxattr supports Linux only. Recently pyxattr
adds support for Mac OS X. [3]

An old version of [2] is shipped with Mac OS X. However, some Linux
distributions have pyxattr only, for example PLD-Linux [4] and old Arch
Linux. [5] As a result, supporting both is the way to go.

[1] https://github.com/iustin/pyxattr
[2] https://github.com/xattr/xattr
[3] https://github.com/iustin/pyxattr/pull/9
[4] https://github.com/rg3/youtube-dl/issues/5498
[5] https://git.archlinux.org/svntogit/community.git/commit/?id=427c4c76401e386d865ccddea4fbfdc74df80492
    https://git.archlinux.org/svntogit/community.git/commit/?id=59b40da7b69622a6761d364a8b07909e9cccaa56
    python-xattr is added on 2016/06/29 while pyxattr is there for more
    than 6 years

											
										
										
											8 years ago
+								            pyxattr_required_version = '0.5.0'
 								            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
 								                # TODO: fallback to CLI tools
 								                raise XAttrUnavailableError(
 								                    'python-pyxattr is detected but is too old. '
 								                    'youtube-dl requires %s or above while your version is %s. '
 								                    'Falling back to other xattr implementations' % (
 								                        pyxattr_required_version, xattr.__version__))
 								            setxattr = xattr.set
 								        else:  # xattr
 								            setxattr = xattr.setxattr
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
 								        try:
-												[utils] Support xattr as well as pyxattr

Closes #9054

There are two xattr packages in Python, pyxattr [1] and xattr [2]. They
have different APIs.

In old days pyxattr supports Linux only and xattr supports Linux, Mac,
FreeBSD and Solaris, and pyxattr supports Linux only. Recently pyxattr
adds support for Mac OS X. [3]

An old version of [2] is shipped with Mac OS X. However, some Linux
distributions have pyxattr only, for example PLD-Linux [4] and old Arch
Linux. [5] As a result, supporting both is the way to go.

[1] https://github.com/iustin/pyxattr
[2] https://github.com/xattr/xattr
[3] https://github.com/iustin/pyxattr/pull/9
[4] https://github.com/rg3/youtube-dl/issues/5498
[5] https://git.archlinux.org/svntogit/community.git/commit/?id=427c4c76401e386d865ccddea4fbfdc74df80492
    https://git.archlinux.org/svntogit/community.git/commit/?id=59b40da7b69622a6761d364a8b07909e9cccaa56
    python-xattr is added on 2016/06/29 while pyxattr is there for more
    than 6 years

											
										
										
											8 years ago
+								            setxattr(path, key, value)
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											8 years ago
+								        except EnvironmentError as e:
 								            raise XAttrMetadataError(e.errno, e.strerror)
 								    except ImportError:
 								        if compat_os_name == 'nt':
 								            # Write xattrs to NTFS Alternate Data Streams:
 								            # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
 								            assert ':' not in key
 								            assert os.path.exists(path)
 								            ads_fn = path + ':' + key
 								            try:
 								                with open(ads_fn, 'wb') as f:
 								                    f.write(value)
 								            except EnvironmentError as e:
 								                raise XAttrMetadataError(e.errno, e.strerror)
 								        else:
 								            user_has_setfattr = check_executable('setfattr', ['--version'])
 								            user_has_xattr = check_executable('xattr', ['-h'])
 								            if user_has_setfattr or user_has_xattr:
 								                value = value.decode('utf-8')
 								                if user_has_setfattr:
 								                    executable = 'setfattr'
 								                    opts = ['-n', key, '-v', value]
 								                elif user_has_xattr:
 								                    executable = 'xattr'
 								                    opts = ['-w', key, value]
 								                cmd = ([encodeFilename(executable, True)] +
 								                       [encodeArgument(o) for o in opts] +
 								                       [encodeFilename(path, True)])
 								                try:
 								                    p = subprocess.Popen(
 								                        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
 								                except EnvironmentError as e:
 								                    raise XAttrMetadataError(e.errno, e.strerror)
 								                stdout, stderr = p.communicate()
 								                stderr = stderr.decode('utf-8', 'replace')
 								                if p.returncode != 0:
 								                    raise XAttrMetadataError(p.returncode, stderr)
 								            else:
 								                # On Unix, and can't find pyxattr, setfattr, or xattr.
 								                if sys.platform.startswith('linux'):
 								                    raise XAttrUnavailableError(
 								                        "Couldn't find a tool to set the xattrs. "
 								                        "Install either the python 'pyxattr' or 'xattr' "
 								                        "modules, or the GNU 'attr' package "
 								                        "(which contains the 'setfattr' tool).")
 								                else:
 								                    raise XAttrUnavailableError(
 								                        "Couldn't find a tool to set the xattrs. "
 								                        "Install either the python 'xattr' module, "
 								                        "or the 'xattr' binary.")
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
 								def random_birthday(year_field, month_field, day_field):
-												[utils] Fix random_birthday to generate existing dates only


											
										
										
											6 years ago
+								    start_date = datetime.date(1950, 1, 1)
 								    end_date = datetime.date(1995, 12, 31)
 								    offset = random.randint(0, (end_date - start_date).days)
 								    random_date = start_date + datetime.timedelta(offset)
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
+								    return {
-												[utils] Fix random_birthday to generate existing dates only


											
										
										
											6 years ago
+								        year_field: str(random_date.year),
 								        month_field: str(random_date.month),
 								        day_field: str(random_date.day),
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											8 years ago
+								    }