From ec2e44fc5752ca15fb7ce2e31994453226507f8b Mon Sep 17 00:00:00 2001 From: chris <6024426+iw0nderhow@users.noreply.github.com> Date: Fri, 17 Dec 2021 02:23:04 +0100 Subject: [PATCH] [docs] Improve manpage format (#2003) Closes #1448 Authored by: iw0nderhow, pukkandan --- README.md | 31 +++++++++++- devscripts/prepare_manpage.py | 89 +++++++++++++++++++++-------------- 2 files changed, 82 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 2041019c7..da0d9be9f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +
[![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) @@ -15,9 +16,13 @@ [![PyPi Downloads](https://img.shields.io/pypi/dm/yt-dlp?label=PyPi&style=for-the-badge)](https://pypi.org/project/yt-dlp)
+ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project + + + * [NEW FEATURES](#new-features) * [Differences in default behavior](#differences-in-default-behavior) * [INSTALLATION](#installation) @@ -61,6 +66,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) + # NEW FEATURES @@ -205,6 +211,7 @@ If you [installed with pip](#with-pip), simply re-run the same command that was If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` + ## RELEASE FILES #### Recommended @@ -231,6 +238,7 @@ File|Description [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums + ## DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. @@ -241,6 +249,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: --> While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended + * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) @@ -281,11 +290,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi # USAGE AND OPTIONS + yt-dlp [OPTIONS] [--] URL [URL...] `Ctrl+F` is your friend :D - + + ## General Options: -h, --help Print this help text and exit --version Print program version and exit @@ -1000,7 +1011,7 @@ You can configure yt-dlp by placing any supported command line option to a confi * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` - `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to (`C:\Users\\AppData\Roaming`) and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` + `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` 1. **System Configuration**: `/etc/yt-dlp.conf` For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: @@ -1048,7 +1059,9 @@ The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to. + **tl;dr:** [navigate me to examples](#output-template-examples). + The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). @@ -1056,11 +1069,17 @@ It may however also contain special sequences that will be replaced when downloa The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields + 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` + 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` + 1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s` + 1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` + 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma seperated **l**ist (flag `#` for `\n` newline-seperated) and a string **q**uoted for the terminal (flag `#` to split a list into different arguments), respectively + 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: @@ -1207,9 +1226,11 @@ The current default template is `%(title)s [%(id)s].%(ext)s`. In some cases, you don't want special characters such as δΈ­, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title. + #### Output template and Windows batch files If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. + #### Output template examples @@ -1253,7 +1274,9 @@ This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, i The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. + **tl;dr:** [navigate me to examples](#format-selection-examples). + The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. @@ -1589,6 +1612,8 @@ The following extractors use this feature: NOTE: These options may be changed/removed in the future without concern for backward compatibility + + # PLUGINS @@ -1720,6 +1745,8 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + + # DEPRECATED OPTIONS These are all the deprecated options and the current alternative to achieve the same effect diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 485b39e9f..b4446a368 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -13,12 +13,14 @@ PREFIX = r'''%yt-dlp(1) # NAME -youtube\-dl \- download videos from youtube.com or other video platforms +yt\-dlp \- A youtube-dl fork with additional features and patches # SYNOPSIS **yt-dlp** \[OPTIONS\] URL [URL...] +# DESCRIPTION + ''' @@ -33,47 +35,62 @@ def main(): with io.open(README_FILE, encoding='utf-8') as f: readme = f.read() - readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) - readme = re.sub(r'\s+yt-dlp \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) - readme = PREFIX + readme - + readme = filter_excluded_sections(readme) + readme = move_sections(readme) readme = filter_options(readme) with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(readme) + outf.write(PREFIX + readme) + + +def filter_excluded_sections(readme): + EXCLUDED_SECTION_BEGIN_STRING = re.escape('') + EXCLUDED_SECTION_END_STRING = re.escape('') + return re.sub( + rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n', + '', readme) + + +def move_sections(readme): + MOVE_TAG_TEMPLATE = '' + sections = re.findall(rf'(?m)^{re.escape(MOVE_TAG_TEMPLATE) % "(.+)"}$', readme) + + for section_name in sections: + move_tag = MOVE_TAG_TEMPLATE % section_name + if readme.count(move_tag) > 1: + raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected') + + sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme) + if len(sections) < 1: + raise Exception(f'The section {section_name} does not exist') + elif len(sections) > 1: + raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled') + + readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1) + return readme def filter_options(readme): - ret = '' - in_options = False - for line in readme.split('\n'): - if line.startswith('# '): - if line[2:].startswith('OPTIONS'): - in_options = True - else: - in_options = False - - if in_options: - if line.lstrip().startswith('-'): - split = re.split(r'\s{2,}', line.lstrip()) - # Description string may start with `-` as well. If there is - # only one piece then it's a description bit not an option. - if len(split) > 1: - option, description = split - split_option = option.split(' ') - - if not split_option[-1].startswith('-'): # metavar - option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) - - # Pandoc's definition_lists. See http://pandoc.org/README.html - # for more information. - ret += '\n%s\n: %s\n' % (option, description) - continue - ret += line.lstrip() + '\n' - else: - ret += line + '\n' - - return ret + section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) + options = '# OPTIONS\n' + for line in section.split('\n')[1:]: + if line.lstrip().startswith('-'): + split = re.split(r'\s{2,}', line.lstrip()) + # Description string may start with `-` as well. If there is + # only one piece then it's a description bit not an option. + if len(split) > 1: + option, description = split + split_option = option.split(' ') + + if not split_option[-1].startswith('-'): # metavar + option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*']) + + # Pandoc's definition_lists. See http://pandoc.org/README.html + options += f'\n{option}\n: {description}\n' + continue + options += line.lstrip() + '\n' + + return readme.replace(section, options, 1) if __name__ == '__main__':