From ec2e44fc5752ca15fb7ce2e31994453226507f8b Mon Sep 17 00:00:00 2001
From: chris <6024426+iw0nderhow@users.noreply.github.com>
Date: Fri, 17 Dec 2021 02:23:04 +0100
Subject: [PATCH] [docs] Improve manpage format (#2003)

Closes #1448
Authored by: iw0nderhow, pukkandan
---
 README.md                     | 31 +++++++++++-
 devscripts/prepare_manpage.py | 89 +++++++++++++++++++++--------------
 2 files changed, 82 insertions(+), 38 deletions(-)
diff --git a/README.md b/README.md
index 2041019c72..da0d9be9f0 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
 <div align="center">
 
 [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme)
@@ -15,9 +16,13 @@
 [![PyPi Downloads](https://img.shields.io/pypi/dm/yt-dlp?label=PyPi&style=for-the-badge)](https://pypi.org/project/yt-dlp)
 
 </div>
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
 yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
 
+<!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE -->
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
 * [NEW FEATURES](#new-features)
     * [Differences in default behavior](#differences-in-default-behavior)
 * [INSTALLATION](#installation)
@@ -61,6 +66,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
     * [Opening an Issue](CONTRIBUTING.md#opening-an-issue)
     * [Developer Instructions](CONTRIBUTING.md#developer-instructions)
 * [MORE](#more)
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
 
 # NEW FEATURES
@@ -205,6 +211,7 @@ If you [installed with pip](#with-pip), simply re-run the same command that was
 
 If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp`
 
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
 ## RELEASE FILES
 
 #### Recommended
@@ -231,6 +238,7 @@ File|Description
 [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc
 [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums
 [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
 ## DEPENDENCIES
 Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
@@ -241,6 +249,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https:
 -->
 
 While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended
+
 * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html)
 * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
 * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
@@ -281,11 +290,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
 
 # USAGE AND OPTIONS
 
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
     yt-dlp [OPTIONS] [--] URL [URL...]
 
 `Ctrl+F` is your friend :D
-<!-- Auto generated -->
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
+<!-- Auto generated -->
 ## General Options:
     -h, --help                       Print this help text and exit
     --version                        Print program version and exit
@@ -1000,7 +1011,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
     * `~/yt-dlp.conf`
     * `~/yt-dlp.conf.txt`
 
-    `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to (`C:\Users\<user name>\AppData\Roaming`) and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\<user name>`), or `%HOMEDRIVE%%HOMEPATH%`
+    `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\<user name>\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\<user name>`), or `%HOMEDRIVE%%HOMEPATH%`
 1. **System Configuration**: `/etc/yt-dlp.conf`
 
 For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
@@ -1048,7 +1059,9 @@ The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it
 
 The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to.
 
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
 **tl;dr:** [navigate me to examples](#output-template-examples).
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
 The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing).
 
@@ -1056,11 +1069,17 @@ It may however also contain special sequences that will be replaced when downloa
 
 The field names themselves (the part inside the parenthesis) can also have some special formatting:
 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
+
 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
+
 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
+
 1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s`
+
 1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
+
 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma seperated **l**ist (flag `#` for `\n` newline-seperated) and a string **q**uoted for the terminal (flag `#` to split a list into different arguments), respectively
+
 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
 
 To summarize, the general syntax for a field is:
@@ -1207,9 +1226,11 @@ The current default template is `%(title)s [%(id)s].%(ext)s`.
 
 In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
 
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
 #### Output template and Windows batch files
 
 If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
 #### Output template examples
 
@@ -1253,7 +1274,9 @@ This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, i
 
 The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
 
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
 **tl;dr:** [navigate me to examples](#format-selection-examples).
+<!-- MANPAGE: END EXCLUDED SECTION -->
 
 The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
 
@@ -1589,6 +1612,8 @@ The following extractors use this feature:
 
 NOTE: These options may be changed/removed in the future without concern for backward compatibility
 
+<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
+
 
 # PLUGINS
 
@@ -1720,6 +1745,8 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above
 
 
+<!-- MANPAGE: MOVE "NEW FEATURES" SECTION HERE -->
+
 # DEPRECATED OPTIONS
 
 These are all the deprecated options and the current alternative to achieve the same effect
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 485b39e9f3..b4446a3688 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -13,12 +13,14 @@ PREFIX = r'''%yt-dlp(1)
 
 # NAME
 
-youtube\-dl \- download videos from youtube.com or other video platforms
+yt\-dlp \- A youtube-dl fork with additional features and patches
 
 # SYNOPSIS
 
 **yt-dlp** \[OPTIONS\] URL [URL...]
 
+# DESCRIPTION
+
 '''
 
 
@@ -33,47 +35,62 @@ def main():
     with io.open(README_FILE, encoding='utf-8') as f:
         readme = f.read()
 
-    readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
-    readme = re.sub(r'\s+yt-dlp \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
-    readme = PREFIX + readme
-
+    readme = filter_excluded_sections(readme)
+    readme = move_sections(readme)
     readme = filter_options(readme)
 
     with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(readme)
+        outf.write(PREFIX + readme)
+
+
+def filter_excluded_sections(readme):
+    EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->')
+    EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->')
+    return re.sub(
+        rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n',
+        '', readme)
+
+
+def move_sections(readme):
+    MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
+    sections = re.findall(rf'(?m)^{re.escape(MOVE_TAG_TEMPLATE) % "(.+)"}$', readme)
+
+    for section_name in sections:
+        move_tag = MOVE_TAG_TEMPLATE % section_name
+        if readme.count(move_tag) > 1:
+            raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected')
+
+        sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme)
+        if len(sections) < 1:
+            raise Exception(f'The section {section_name} does not exist')
+        elif len(sections) > 1:
+            raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled')
+
+        readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1)
+    return readme
 
 
 def filter_options(readme):
-    ret = ''
-    in_options = False
-    for line in readme.split('\n'):
-        if line.startswith('# '):
-            if line[2:].startswith('OPTIONS'):
-                in_options = True
-            else:
-                in_options = False
-
-        if in_options:
-            if line.lstrip().startswith('-'):
-                split = re.split(r'\s{2,}', line.lstrip())
-                # Description string may start with `-` as well. If there is
-                # only one piece then it's a description bit not an option.
-                if len(split) > 1:
-                    option, description = split
-                    split_option = option.split(' ')
-
-                    if not split_option[-1].startswith('-'):  # metavar
-                        option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
-
-                    # Pandoc's definition_lists. See http://pandoc.org/README.html
-                    # for more information.
-                    ret += '\n%s\n:   %s\n' % (option, description)
-                    continue
-            ret += line.lstrip() + '\n'
-        else:
-            ret += line + '\n'
-
-    return ret
+    section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
+    options = '# OPTIONS\n'
+    for line in section.split('\n')[1:]:
+        if line.lstrip().startswith('-'):
+            split = re.split(r'\s{2,}', line.lstrip())
+            # Description string may start with `-` as well. If there is
+            # only one piece then it's a description bit not an option.
+            if len(split) > 1:
+                option, description = split
+                split_option = option.split(' ')
+
+                if not split_option[-1].startswith('-'):  # metavar
+                    option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*'])
+
+                # Pandoc's definition_lists. See http://pandoc.org/README.html
+                options += f'\n{option}\n:   {description}\n'
+                continue
+        options += line.lstrip() + '\n'
+
+    return readme.replace(section, options, 1)
 
 
 if __name__ == '__main__':