From a3976e07600247786b23df1ec9f93695b6d899ae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Jul 2022 10:51:47 +0530 Subject: [PATCH] Improve chapter sanitization --- yt_dlp/YoutubeDL.py | 9 +++++++-- yt_dlp/extractor/youtube.py | 8 +++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 50b85cbfe..38d146bfc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2377,13 +2377,18 @@ class YoutubeDL: self.report_warning('"duration" field is negative, there is an error in extractor') chapters = info_dict.get('chapters') or [] + if chapters and chapters[0].get('start_time'): + chapters.insert(0, {'start_time': 0}) + dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} - for prev, current, next_ in zip( - (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)): + for idx, (prev, current, next_) in enumerate(zip( + (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1): if current.get('start_time') is None: current['start_time'] = prev.get('end_time') if not current.get('end_time'): current['end_time'] = next_.get('start_time') + if not current.get('title'): + current['title'] = f'' if 'playlist' not in info_dict: # It isn't part of a playlist diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 3e2ac030e..90d2435de 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2764,17 +2764,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not strict: chapter_list.sort(key=lambda c: c['start_time'] or 0) - chapters = [{'start_time': 0, 'title': ''}] + chapters = [{'start_time': 0}] for idx, chapter in enumerate(chapter_list): - if chapter['start_time'] is None or not chapter['title']: + if chapter['start_time'] is None: self.report_warning(f'Incomplete chapter {idx}') elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: - chapters[-1]['end_time'] = chapter['start_time'] chapters.append(chapter) else: self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"') - chapters[-1]['end_time'] = duration - return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:] + return chapters[1:] def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId')