From 7053aa3a48dbdfe8f11b12fa0f442a9bf8b136b1 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Sun, 6 Nov 2022 12:23:16 -0500 Subject: [PATCH] [extractor/epoch] Support videos without data-trailer (#5387) Closes #5359 Authored by: gibson042, pukkandan --- yt_dlp/extractor/epoch.py | 11 ++++++++++- yt_dlp/utils.py | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/epoch.py b/yt_dlp/extractor/epoch.py index 13eeabe3e..110e78c5b 100644 --- a/yt_dlp/extractor/epoch.py +++ b/yt_dlp/extractor/epoch.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import extract_attributes, get_element_html_by_id class EpochIE(InfoExtractor): @@ -28,13 +29,21 @@ class EpochIE(InfoExtractor): 'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago', } }, + { + 'url': 'https://www.theepochtimes.com/dick-morris-discusses-his-book-the-return-trumps-big-2024-comeback_4819205.html', + 'info_dict': { + 'id': '9489f994-2a20-4812-b233-ac0e5c345632', + 'ext': 'mp4', + 'title': 'Dick Morris Discusses His Book ‘The Return: Trump’s Big 2024 Comeback’', + } + }, ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - youmaker_video_id = self._search_regex(r'data-trailer="[\w-]+" data-id="([\w-]+)"', webpage, 'url') + youmaker_video_id = extract_attributes(get_element_html_by_id('videobox', webpage))['data-id'] formats, subtitles = self._extract_m3u8_formats_and_subtitles( f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ef4cc904c..cfc7ba63a 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -524,6 +524,7 @@ class HTMLAttributeParser(html.parser.HTMLParser): def handle_starttag(self, tag, attrs): self.attrs = dict(attrs) + raise compat_HTMLParseError('done') class HTMLListAttrsParser(html.parser.HTMLParser):