From ec2e44fc5752ca15fb7ce2e31994453226507f8b Mon Sep 17 00:00:00 2001
From: chris <6024426+iw0nderhow@users.noreply.github.com>
Date: Fri, 17 Dec 2021 02:23:04 +0100
Subject: [PATCH] [docs] Improve manpage format (#2003)
Closes #1448
Authored by: iw0nderhow, pukkandan
---
README.md | 31 +++++++++++-
devscripts/prepare_manpage.py | 89 +++++++++++++++++++++--------------
2 files changed, 82 insertions(+), 38 deletions(-)
diff --git a/README.md b/README.md
index 2041019c7..da0d9be9f 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+
[![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme)
@@ -15,9 +16,13 @@
[![PyPi Downloads](https://img.shields.io/pypi/dm/yt-dlp?label=PyPi&style=for-the-badge)](https://pypi.org/project/yt-dlp)
+
yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
+
+
+
* [NEW FEATURES](#new-features)
* [Differences in default behavior](#differences-in-default-behavior)
* [INSTALLATION](#installation)
@@ -61,6 +66,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
* [Opening an Issue](CONTRIBUTING.md#opening-an-issue)
* [Developer Instructions](CONTRIBUTING.md#developer-instructions)
* [MORE](#more)
+
# NEW FEATURES
@@ -205,6 +211,7 @@ If you [installed with pip](#with-pip), simply re-run the same command that was
If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp`
+
## RELEASE FILES
#### Recommended
@@ -231,6 +238,7 @@ File|Description
[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc
[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums
[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums
+
## DEPENDENCIES
Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
@@ -241,6 +249,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https:
-->
While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended
+
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html)
* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
@@ -281,11 +290,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
# USAGE AND OPTIONS
+
yt-dlp [OPTIONS] [--] URL [URL...]
`Ctrl+F` is your friend :D
-
+
+
## General Options:
-h, --help Print this help text and exit
--version Print program version and exit
@@ -1000,7 +1011,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
* `~/yt-dlp.conf`
* `~/yt-dlp.conf.txt`
- `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to (`C:\Users\\AppData\Roaming`) and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%`
+ `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%`
1. **System Configuration**: `/etc/yt-dlp.conf`
For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
@@ -1048,7 +1059,9 @@ The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it
The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to.
+
**tl;dr:** [navigate me to examples](#output-template-examples).
+
The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing).
@@ -1056,11 +1069,17 @@ It may however also contain special sequences that will be replaced when downloa
The field names themselves (the part inside the parenthesis) can also have some special formatting:
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
+
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
+
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
+
1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s`
+
1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
+
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma seperated **l**ist (flag `#` for `\n` newline-seperated) and a string **q**uoted for the terminal (flag `#` to split a list into different arguments), respectively
+
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
To summarize, the general syntax for a field is:
@@ -1207,9 +1226,11 @@ The current default template is `%(title)s [%(id)s].%(ext)s`.
In some cases, you don't want special characters such as δΈ, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
+
#### Output template and Windows batch files
If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+
#### Output template examples
@@ -1253,7 +1274,9 @@ This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, i
The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
+
**tl;dr:** [navigate me to examples](#format-selection-examples).
+
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
@@ -1589,6 +1612,8 @@ The following extractors use this feature:
NOTE: These options may be changed/removed in the future without concern for backward compatibility
+
+
# PLUGINS
@@ -1720,6 +1745,8 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above
+
+
# DEPRECATED OPTIONS
These are all the deprecated options and the current alternative to achieve the same effect
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 485b39e9f..b4446a368 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -13,12 +13,14 @@ PREFIX = r'''%yt-dlp(1)
# NAME
-youtube\-dl \- download videos from youtube.com or other video platforms
+yt\-dlp \- A youtube-dl fork with additional features and patches
# SYNOPSIS
**yt-dlp** \[OPTIONS\] URL [URL...]
+# DESCRIPTION
+
'''
@@ -33,47 +35,62 @@ def main():
with io.open(README_FILE, encoding='utf-8') as f:
readme = f.read()
- readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
- readme = re.sub(r'\s+yt-dlp \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
- readme = PREFIX + readme
-
+ readme = filter_excluded_sections(readme)
+ readme = move_sections(readme)
readme = filter_options(readme)
with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(readme)
+ outf.write(PREFIX + readme)
+
+
+def filter_excluded_sections(readme):
+ EXCLUDED_SECTION_BEGIN_STRING = re.escape('')
+ EXCLUDED_SECTION_END_STRING = re.escape('')
+ return re.sub(
+ rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n',
+ '', readme)
+
+
+def move_sections(readme):
+ MOVE_TAG_TEMPLATE = ''
+ sections = re.findall(rf'(?m)^{re.escape(MOVE_TAG_TEMPLATE) % "(.+)"}$', readme)
+
+ for section_name in sections:
+ move_tag = MOVE_TAG_TEMPLATE % section_name
+ if readme.count(move_tag) > 1:
+ raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected')
+
+ sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme)
+ if len(sections) < 1:
+ raise Exception(f'The section {section_name} does not exist')
+ elif len(sections) > 1:
+ raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled')
+
+ readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1)
+ return readme
def filter_options(readme):
- ret = ''
- in_options = False
- for line in readme.split('\n'):
- if line.startswith('# '):
- if line[2:].startswith('OPTIONS'):
- in_options = True
- else:
- in_options = False
-
- if in_options:
- if line.lstrip().startswith('-'):
- split = re.split(r'\s{2,}', line.lstrip())
- # Description string may start with `-` as well. If there is
- # only one piece then it's a description bit not an option.
- if len(split) > 1:
- option, description = split
- split_option = option.split(' ')
-
- if not split_option[-1].startswith('-'): # metavar
- option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
-
- # Pandoc's definition_lists. See http://pandoc.org/README.html
- # for more information.
- ret += '\n%s\n: %s\n' % (option, description)
- continue
- ret += line.lstrip() + '\n'
- else:
- ret += line + '\n'
-
- return ret
+ section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
+ options = '# OPTIONS\n'
+ for line in section.split('\n')[1:]:
+ if line.lstrip().startswith('-'):
+ split = re.split(r'\s{2,}', line.lstrip())
+ # Description string may start with `-` as well. If there is
+ # only one piece then it's a description bit not an option.
+ if len(split) > 1:
+ option, description = split
+ split_option = option.split(' ')
+
+ if not split_option[-1].startswith('-'): # metavar
+ option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*'])
+
+ # Pandoc's definition_lists. See http://pandoc.org/README.html
+ options += f'\n{option}\n: {description}\n'
+ continue
+ options += line.lstrip() + '\n'
+
+ return readme.replace(section, options, 1)
if __name__ == '__main__':