From 2b8a2973bde415fc227790275dfd3e55e43babae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 Aug 2021 05:12:54 +0530 Subject: [PATCH] Allow entire infodict to be printed using `%()s` Makes `--dump-json` redundant --- README.md | 3 ++- test/test_YoutubeDL.py | 27 +++++++++++++++++---------- yt_dlp/YoutubeDL.py | 18 +++++++++++------- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d4f436d92..40a3bb873 100644 --- a/README.md +++ b/README.md @@ -919,7 +919,7 @@ The simplest usage of `-o` is not to set any template arguments when downloading It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: -1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` @@ -1417,6 +1417,7 @@ While these options are redundant, they are still expected to be used due to the --get-thumbnail --print thumbnail -e, --get-title --print title -g, --get-url --print urls + -j, --dump-json --print "%()j" #### Not recommended diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1e0865102..7e0133027 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -668,15 +668,13 @@ class TestYoutubeDL(unittest.TestCase): out = ydl.escape_outtmpl(outtmpl) % tmpl_dict fname = ydl.prepare_filename(info or self.outtmpl_info) - if callable(expected): - self.assertTrue(expected(out)) - self.assertTrue(expected(fname)) - elif isinstance(expected, str): - self.assertEqual(out, expected) - self.assertEqual(fname, expected) - else: - self.assertEqual(out, expected[0]) - self.assertEqual(fname, expected[1]) + if not isinstance(expected, (list, tuple)): + expected = (expected, expected) + for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected): + if callable(expect): + self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') + else: + self.assertEqual(got, expect, f'Wrong {name} from {tmpl}') # Side-effects original_infodict = dict(self.outtmpl_info) @@ -721,7 +719,16 @@ class TestYoutubeDL(unittest.TestCase): # Invalid templates self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') - test('%()s', 'NA') + test('%(..)s', 'NA') + + # Entire info_dict + def expect_same_infodict(out): + got_dict = json.loads(out) + for info_field, expected in self.outtmpl_info.items(): + self.assertEqual(got_dict.get(info_field), expected, info_field) + return True + + test('%()j', (expect_same_infodict, str)) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index acd85af05..ac99dd45b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -917,7 +917,7 @@ class YoutubeDL(object): } # Field is of the form key1.key2... # where keys (except first) can be string, int or slice - FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') + FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) INTERNAL_FORMAT_RE = re.compile(r'''(?x) @@ -928,12 +928,15 @@ class YoutubeDL(object): (?:\|(?P.*?))? $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) - get_key = lambda k: traverse_obj( - info_dict, k.split('.'), is_user_input=True, traverse_string=True) + def _traverse_infodict(k): + k = k.split('.') + if k[0] == '': + k.pop(0) + return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True) def get_value(mdict): # Object traversal - value = get_key(mdict['fields']) + value = _traverse_infodict(mdict['fields']) # Negative if mdict['negate']: value = float_or_none(value) @@ -955,7 +958,7 @@ class YoutubeDL(object): item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) offset = float_or_none(item) if offset is None: - offset = float_or_none(get_key(item)) + offset = float_or_none(_traverse_infodict(item)) try: value = operator(value, multiplier * offset) except (TypeError, ZeroDivisionError): @@ -2378,6 +2381,8 @@ class YoutubeDL(object): elif 'url' in info_dict: info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') + if self.params.get('forceprint') or self.params.get('forcejson'): + self.post_extract(info_dict) for tmpl in self.params.get('forceprint', []): if re.match(r'\w+$', tmpl): tmpl = '%({})s'.format(tmpl) @@ -2394,8 +2399,7 @@ class YoutubeDL(object): self.to_stdout(formatSeconds(info_dict['duration'])) print_mandatory('format') - if self.params.get('forcejson', False): - self.post_extract(info_dict) + if self.params.get('forcejson'): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def dl(self, name, info, subtitle=False, test=False):