mirror of https://github.com/yt-dlp/yt-dlp
Add option `--parse-metadata`
* The fields extracted by this can be used in `--output` * Deprecated `--metadata-from-title` :ci skip dlpull/31/head
parent
9882064024
commit
5bfa486205
@ -0,0 +1,66 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import PostProcessor
|
||||||
|
from ..compat import compat_str
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataFromFieldPP(PostProcessor):
|
||||||
|
regex = r'(?P<field>\w+):(?P<format>.+)$'
|
||||||
|
|
||||||
|
def __init__(self, downloader, formats):
|
||||||
|
PostProcessor.__init__(self, downloader)
|
||||||
|
assert isinstance(formats, (list, tuple))
|
||||||
|
self._data = []
|
||||||
|
for f in formats:
|
||||||
|
assert isinstance(f, compat_str)
|
||||||
|
match = re.match(self.regex, f)
|
||||||
|
assert match is not None
|
||||||
|
self._data.append({
|
||||||
|
'field': match.group('field'),
|
||||||
|
'format': match.group('format'),
|
||||||
|
'regex': self.format_to_regex(match.group('format'))})
|
||||||
|
|
||||||
|
def format_to_regex(self, fmt):
|
||||||
|
r"""
|
||||||
|
Converts a string like
|
||||||
|
'%(title)s - %(artist)s'
|
||||||
|
to a regex like
|
||||||
|
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
||||||
|
"""
|
||||||
|
if not re.search(r'%\(\w+\)s', fmt):
|
||||||
|
return fmt
|
||||||
|
lastpos = 0
|
||||||
|
regex = ''
|
||||||
|
# replace %(..)s with regex group and escape other string parts
|
||||||
|
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||||
|
regex += re.escape(fmt[lastpos:match.start()])
|
||||||
|
regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)'
|
||||||
|
lastpos = match.end()
|
||||||
|
if lastpos < len(fmt):
|
||||||
|
regex += re.escape(fmt[lastpos:])
|
||||||
|
return regex
|
||||||
|
|
||||||
|
def run(self, info):
|
||||||
|
for dictn in self._data:
|
||||||
|
field, regex = dictn['field'], dictn['regex']
|
||||||
|
if field not in info:
|
||||||
|
self.report_warning('Video doesnot have a %s' % field)
|
||||||
|
continue
|
||||||
|
self.write_debug('Searching for r"%s" in %s' % (regex, field))
|
||||||
|
match = re.search(regex, info[field])
|
||||||
|
if match is None:
|
||||||
|
self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format']))
|
||||||
|
continue
|
||||||
|
for attribute, value in match.groupdict().items():
|
||||||
|
info[attribute] = value
|
||||||
|
self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA'))
|
||||||
|
return [], info
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
|
||||||
|
def __init__(self, downloader, titleformat):
|
||||||
|
super(MetadataFromTitlePP, self).__init__(downloader, ['title:%s' % titleformat])
|
||||||
|
self._titleformat = titleformat
|
||||||
|
self._titleregex = self._data[0]['regex']
|
@ -1,44 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import PostProcessor
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataFromTitlePP(PostProcessor):
|
|
||||||
def __init__(self, downloader, titleformat):
|
|
||||||
super(MetadataFromTitlePP, self).__init__(downloader)
|
|
||||||
self._titleformat = titleformat
|
|
||||||
self._titleregex = (self.format_to_regex(titleformat)
|
|
||||||
if re.search(r'%\(\w+\)s', titleformat)
|
|
||||||
else titleformat)
|
|
||||||
|
|
||||||
def format_to_regex(self, fmt):
|
|
||||||
r"""
|
|
||||||
Converts a string like
|
|
||||||
'%(title)s - %(artist)s'
|
|
||||||
to a regex like
|
|
||||||
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
|
||||||
"""
|
|
||||||
lastpos = 0
|
|
||||||
regex = ''
|
|
||||||
# replace %(..)s with regex group and escape other string parts
|
|
||||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
|
||||||
regex += re.escape(fmt[lastpos:match.start()])
|
|
||||||
regex += r'(?P<' + match.group(1) + '>.+)'
|
|
||||||
lastpos = match.end()
|
|
||||||
if lastpos < len(fmt):
|
|
||||||
regex += re.escape(fmt[lastpos:])
|
|
||||||
return regex
|
|
||||||
|
|
||||||
def run(self, info):
|
|
||||||
title = info['title']
|
|
||||||
match = re.match(self._titleregex, title)
|
|
||||||
if match is None:
|
|
||||||
self.to_screen('Could not interpret title of video as "%s"' % self._titleformat)
|
|
||||||
return [], info
|
|
||||||
for attribute, value in match.groupdict().items():
|
|
||||||
info[attribute] = value
|
|
||||||
self.to_screen('parsed %s: %s' % (attribute, value if value is not None else 'NA'))
|
|
||||||
|
|
||||||
return [], info
|
|
Loading…
Reference in New Issue