From 7d1eb38af154db77341b28b14776b23172a234e0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 29 Jul 2021 08:26:17 +0530 Subject: [PATCH] Add format types `j`, `l`, `q` for outtmpl Closes #345 --- README.md | 8 +++++--- test/test_YoutubeDL.py | 28 ++++++++++++++++++++++------ yt_dlp/YoutubeDL.py | 24 ++++++++++++++++++------ yt_dlp/options.py | 4 ++-- yt_dlp/utils.py | 2 ++ 5 files changed, 49 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 4a8364e57..7322c2a0a 100644 --- a/README.md +++ b/README.md @@ -789,10 +789,11 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t command. An additional field "filepath" that contains the final path of the downloaded file is also available. If no - fields are passed, "%(filepath)s" is - appended to the end of the command + fields are passed, %(filepath)q is appended + to the end of the command --exec-before-download CMD Execute a command before the actual download. The syntax is the same as --exec + but "filepath" is not available --convert-subs FORMAT Convert the subtitles to another format (currently supported: srt|vtt|ass|lrc) (Alias: --convert-subtitles) @@ -917,10 +918,11 @@ The simplest usage of `-o` is not to set any template arguments when downloading It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: -1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `j`, `l`, `q` can be used for converting to **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively To summarize, the general syntax for a field is: ``` diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e1287f222..9a0b286e2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -10,6 +10,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy +import json from test.helper import FakeYDL, assertRegexpMatches from yt_dlp import YoutubeDL @@ -647,6 +648,7 @@ class TestYoutubeDL(unittest.TestCase): 'title1': '$PATH', 'title2': '%PATH%', 'title3': 'foo/bar\\test', + 'title4': 'foo "bar" test', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, @@ -669,10 +671,12 @@ class TestYoutubeDL(unittest.TestCase): if callable(expected): self.assertTrue(expected(out)) self.assertTrue(expected(fname)) - elif isinstance(expected, compat_str): - self.assertEqual((out, fname), (expected, expected)) + elif isinstance(expected, str): + self.assertEqual(out, expected) + self.assertEqual(fname, expected) else: - self.assertEqual((out, fname), expected) + self.assertEqual(out, expected[0]) + self.assertEqual(fname, expected[1]) # Auto-generated fields test('%(id)s.%(ext)s', '1234.mp4') @@ -741,14 +745,26 @@ class TestYoutubeDL(unittest.TestCase): test('%(width|0)04d', '0000') test('a%(width|)d', 'a', outtmpl_na_placeholder='none') - # Internal formatting FORMATS = self.outtmpl_info['formats'] + sanitize = lambda x: x.replace(':', ' -').replace('"', "'") + + # Custom type casting + test('%(formats.:.id)l', 'id1, id2, id3') + test('%(ext)l', 'mp4') + test('%(formats.:.id) 15l', ' id1, id2, id3') + test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) + if compat_os_name == 'nt': + test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'")) + else: + test('%(title4)q', ('\'foo "bar" test\'', "'foo 'bar' test'")) + + # Internal formatting test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') test('%(title|%)s %(title|%%)s', '% %%') test('%(id+1-height+3)05d', '00158') test('%(width+100)05d', 'NA') - test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % str(FORMATS[0]).replace(':', ' -'))) - test('%(formats.0)r', (repr(FORMATS[0]), repr(FORMATS[0]).replace(':', ' -'))) + test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0])))) + test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0])))) test('%(height.0)03d', '001') test('%(-height.0)04d', '-001') test('%(formats.-1.id)s', FORMATS[-1]['id']) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3350042c9..6ce0d19c3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -35,6 +35,7 @@ from .compat import ( compat_kwargs, compat_numeric_types, compat_os_name, + compat_shlex_quote, compat_str, compat_tokenize_tokenize, compat_urllib_error, @@ -108,6 +109,7 @@ from .utils import ( try_get, UnavailableVideoError, url_basename, + variadic, version_tuple, write_json_file, write_string, @@ -871,9 +873,12 @@ class YoutubeDL(object): @classmethod def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' - outtmpl = cls.escape_outtmpl(cls._outtmpl_expandpath(outtmpl)) + outtmpl = re.sub( + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'), + lambda mobj: f'{mobj.group(0)[:-1]}s', + cls._outtmpl_expandpath(outtmpl)) try: - outtmpl % collections.defaultdict(int) + cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) return None except ValueError as err: return err @@ -900,7 +905,7 @@ class YoutubeDL(object): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}]')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -977,8 +982,15 @@ class YoutubeDL(object): value = default if value is None else value - if fmt == 'c': - value = compat_str(value) + str_fmt = f'{fmt[:-1]}s' + if fmt[-1] == 'l': + value, fmt = ', '.join(variadic(value)), str_fmt + elif fmt[-1] == 'j': + value, fmt = json.dumps(value), str_fmt + elif fmt[-1] == 'q': + value, fmt = compat_shlex_quote(str(value)), str_fmt + elif fmt[-1] == 'c': + value = str(value) if value is None: value, fmt = default, 's' else: @@ -992,7 +1004,7 @@ class YoutubeDL(object): if fmt[-1] == 'r': # If value is an object, sanitize might convert it to a string # So we convert it to repr first - value, fmt = repr(value), '%ss' % fmt[:-1] + value, fmt = repr(value), str_fmt if fmt[-1] in 'csr': value = sanitize(mobj['fields'].split('.')[-1], value) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 5c3ac0dcd..9b71427d1 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1286,11 +1286,11 @@ def parseOpts(overrideArguments=None): 'Execute a command on the file after downloading and post-processing. ' 'Similar syntax to the output template can be used to pass any field as arguments to the command. ' 'An additional field "filepath" that contains the final path of the downloaded file is also available. ' - 'If no fields are passed, "%(filepath)s" is appended to the end of the command')) + 'If no fields are passed, %(filepath)q is appended to the end of the command')) postproc.add_option( '--exec-before-download', metavar='CMD', dest='exec_before_dl_cmd', - help='Execute a command before the actual download. The syntax is the same as --exec') + help='Execute a command before the actual download. The syntax is the same as --exec but "filepath" is not available') postproc.add_option( '--convert-subs', '--convert-sub', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2bd0925b6..998689efe 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4451,8 +4451,10 @@ STR_FORMAT_RE_TMPL = r'''(?x) ) ''' + STR_FORMAT_TYPES = 'diouxXeEfFgGcrs' + def limit_length(s, length): """ Add ellipses to overly long strings """ if s is None: