From f5aa5cfbffeea9352ace141707f35c86f5e11b89 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Sep 2021 23:46:17 +0530 Subject: [PATCH] Add format type `B` for outtmpl to treat the value as bytes This is useful to limit the filename to a certain number of bytes rather than characters Closes #1003 --- README.md | 4 ++-- test/test_YoutubeDL.py | 2 ++ yt_dlp/YoutubeDL.py | 7 +++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a2c1cbd82..c4f996834 100644 --- a/README.md +++ b/README.md @@ -952,14 +952,14 @@ The `-o` option is used to indicate a template for the output file names while ` The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). -It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` -1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `j`, `l`, `q` can be used for converting to **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively To summarize, the general syntax for a field is: ``` diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e689978fd..e61492ec8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -649,6 +649,7 @@ class TestYoutubeDL(unittest.TestCase): 'title2': '%PATH%', 'title3': 'foo/bar\\test', 'title4': 'foo "bar" test', + 'title5': 'áéí', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, @@ -767,6 +768,7 @@ class TestYoutubeDL(unittest.TestCase): test('%(ext)l', 'mp4') test('%(formats.:.id) 15l', ' id1, id2, id3') test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) + test('%(title5).3B', 'á') if compat_os_name == 'nt': test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'")) else: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8432abf1a..c9dc50e64 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -907,7 +907,7 @@ class YoutubeDL(object): def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' outtmpl = re.sub( - STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'), + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqB]'), lambda mobj: f'{mobj.group(0)[:-1]}s', cls._outtmpl_expandpath(outtmpl)) try: @@ -939,7 +939,7 @@ class YoutubeDL(object): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqB]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -1031,6 +1031,9 @@ class YoutubeDL(object): value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt elif fmt[-1] == 'q': value, fmt = compat_shlex_quote(str(value)), str_fmt + elif fmt[-1] == 'B': + value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'c': value = str(value) if value is None: