Fix bugs related to `sanitize_info`

Related: 8012d892bd (r54555230)
pull/646/head
pukkandan 3 years ago
parent 575e17a1b9
commit 6e84b21559
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698

@ -18,7 +18,7 @@ from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urll
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.postprocessor.common import PostProcessor
from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList
TEST_URL = 'http://localhost/sample.mp4' TEST_URL = 'http://localhost/sample.mp4'
@ -678,10 +678,17 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(out, expected[0]) self.assertEqual(out, expected[0])
self.assertEqual(fname, expected[1]) self.assertEqual(fname, expected[1])
# Side-effects
original_infodict = dict(self.outtmpl_info)
test('foo.bar', 'foo.bar')
original_infodict['epoch'] = self.outtmpl_info.get('epoch')
self.assertTrue(isinstance(original_infodict['epoch'], int))
test('%(epoch)d', int_or_none)
self.assertEqual(original_infodict, self.outtmpl_info)
# Auto-generated fields # Auto-generated fields
test('%(id)s.%(ext)s', '1234.mp4') test('%(id)s.%(ext)s', '1234.mp4')
test('%(duration_string)s', ('27:46:40', '27-46-40')) test('%(duration_string)s', ('27:46:40', '27-46-40'))
test('%(epoch)d', int_or_none)
test('%(resolution)s', '1080p') test('%(resolution)s', '1080p')
test('%(playlist_index)s', '001') test('%(playlist_index)s', '001')
test('%(autonumber)s', '00001') test('%(autonumber)s', '00001')
@ -774,6 +781,12 @@ class TestYoutubeDL(unittest.TestCase):
test('%(formats.0.id.-1+id)f', '1235.000000') test('%(formats.0.id.-1+id)f', '1235.000000')
test('%(formats.0.id.-1+formats.1.id.-1)d', '3') test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
# Laziness
def gen():
yield from range(5)
raise self.assertTrue(False, 'LazyList should not be evaluated till here')
test('%(key.4)s', '4', info={'key': LazyList(gen())})
# Empty filename # Empty filename
test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
# test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme

@ -887,14 +887,16 @@ class YoutubeDL(object):
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
""" Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """ """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
info_dict = dict(info_dict) info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
na = self.params.get('outtmpl_na_placeholder', 'NA') na = self.params.get('outtmpl_na_placeholder', 'NA')
info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
for key in ('__original_infodict', '__postprocessors'):
info_dict.pop(key, None)
info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
formatSeconds(info_dict['duration'], '-' if sanitize else ':') formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None if info_dict.get('duration', None) is not None
else None) else None)
info_dict['epoch'] = int(time.time())
info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
if info_dict.get('resolution') is None: if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None) info_dict['resolution'] = self.format_resolution(info_dict, default=None)
@ -964,6 +966,11 @@ class YoutubeDL(object):
return value return value
def _dumpjson_default(obj):
if isinstance(obj, (set, LazyList)):
return list(obj)
raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
def create_key(outer_mobj): def create_key(outer_mobj):
if not outer_mobj.group('has_key'): if not outer_mobj.group('has_key'):
return f'%{outer_mobj.group(0)}' return f'%{outer_mobj.group(0)}'
@ -988,7 +995,7 @@ class YoutubeDL(object):
if fmt[-1] == 'l': if fmt[-1] == 'l':
value, fmt = ', '.join(variadic(value)), str_fmt value, fmt = ', '.join(variadic(value)), str_fmt
elif fmt[-1] == 'j': elif fmt[-1] == 'j':
value, fmt = json.dumps(value), str_fmt value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
elif fmt[-1] == 'q': elif fmt[-1] == 'q':
value, fmt = compat_shlex_quote(str(value)), str_fmt value, fmt = compat_shlex_quote(str(value)), str_fmt
elif fmt[-1] == 'c': elif fmt[-1] == 'c':
@ -2386,7 +2393,7 @@ class YoutubeDL(object):
if self.params.get('forcejson', False): if self.params.get('forcejson', False):
self.post_extract(info_dict) self.post_extract(info_dict)
self.to_stdout(json.dumps(self.sanitize_info(info_dict), default=repr)) self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
def dl(self, name, info, subtitle=False, test=False): def dl(self, name, info, subtitle=False, test=False):
@ -2861,7 +2868,7 @@ class YoutubeDL(object):
else: else:
if self.params.get('dump_single_json', False): if self.params.get('dump_single_json', False):
self.post_extract(res) self.post_extract(res)
self.to_stdout(json.dumps(self.filter_requested_info(res), default=repr)) self.to_stdout(json.dumps(self.sanitize_info(res)))
return self._download_retcode return self._download_retcode
@ -2885,15 +2892,18 @@ class YoutubeDL(object):
@staticmethod @staticmethod
def sanitize_info(info_dict, remove_private_keys=False): def sanitize_info(info_dict, remove_private_keys=False):
''' Sanitize the infodict for converting to json ''' ''' Sanitize the infodict for converting to json '''
remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict info_dict.setdefault('epoch', int(time.time()))
remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
keep_keys = ['_type'], # Always keep this to facilitate load-info-json keep_keys = ['_type'], # Always keep this to facilitate load-info-json
if remove_private_keys: if remove_private_keys:
remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url') remove_keys |= {
'requested_formats', 'requested_subtitles', 'requested_entries',
'filepath', 'entries', 'original_url', 'playlist_autonumber',
}
empty_values = (None, {}, [], set(), tuple()) empty_values = (None, {}, [], set(), tuple())
reject = lambda k, v: k not in keep_keys and ( reject = lambda k, v: k not in keep_keys and (
k.startswith('_') or k in remove_keys or v in empty_values) k.startswith('_') or k in remove_keys or v in empty_values)
else: else:
info_dict['epoch'] = int(time.time())
reject = lambda k, v: k in remove_keys reject = lambda k, v: k in remove_keys
filter_fn = lambda obj: ( filter_fn = lambda obj: (
list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set)) list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))

@ -1836,7 +1836,7 @@ def write_json_file(obj, fn):
try: try:
with tf: with tf:
json.dump(obj, tf, default=repr) json.dump(obj, tf)
if sys.platform == 'win32': if sys.platform == 'win32':
# Need to remove existing file on Windows, else os.rename raises # Need to remove existing file on Windows, else os.rename raises
# WindowsError or FileExistsError. # WindowsError or FileExistsError.

Loading…
Cancel
Save