diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index eb61ad386..2195472b7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -975,6 +975,7 @@ from .motherless import ( from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviepilot import MoviepilotIE +from .moview import MoviewPlayIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE diff --git a/yt_dlp/extractor/jixie.py b/yt_dlp/extractor/jixie.py new file mode 100644 index 000000000..3bb685e01 --- /dev/null +++ b/yt_dlp/extractor/jixie.py @@ -0,0 +1,51 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + traverse_obj, + try_call, +) + +# more info about jixie: +# [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525, +# [2] https://scripts.jixie.media/jxvideo.3.1.min.js + + +class JixieBaseIE(InfoExtractor): + def _extract_data_from_jixie_id(self, display_id, video_id, webpage): + json_data = self._download_json( + 'https://apidam.jixie.io/api/public/stream', display_id, + query={'metadata': 'full', 'video_id': video_id})['data'] + + formats, subtitles = [], {} + for stream in json_data['streams']: + if stream.get('type') == 'HLS': + fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4') + if json_data.get('drm'): + for f in fmt: + f['has_drm'] = True + formats.extend(fmt) + self._merge_subtitles(sub, target=subtitles) + else: + formats.append({ + 'url': stream.get('url'), + 'width': stream.get('width'), + 'height': stream.get('height'), + 'ext': 'mp4', + }) + + self._sort_formats(formats) + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage), + 'description': (clean_html(traverse_obj(json_data, ('metadata', 'description'))) + or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)), + 'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')), + 'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))), + 'tags': try_call(lambda: (json_data['metadata']['keywords'] or None).split(',')), + 'categories': try_call(lambda: (json_data['metadata']['categories'] or None).split(',')), + 'uploader_id': json_data.get('owner_id'), + } diff --git a/yt_dlp/extractor/kompas.py b/yt_dlp/extractor/kompas.py index d400c42f3..03f5f30bd 100644 --- a/yt_dlp/extractor/kompas.py +++ b/yt_dlp/extractor/kompas.py @@ -1,17 +1,9 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - float_or_none, - traverse_obj, - try_call, -) +from .jixie import JixieBaseIE -# Video from www.kompas.tv and video.kompas.com seems use jixie player -# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525, -# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info +# Video from video.kompas.com seems use jixie player -class KompasVideoIE(InfoExtractor): +class KompasVideoIE(JixieBaseIE): _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P\d+)/(?P[\w-]+)' _TESTS = [{ 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', @@ -33,36 +25,4 @@ class KompasVideoIE(InfoExtractor): video_id, display_id = self._match_valid_url(url).group('id', 'slug') webpage = self._download_webpage(url, display_id) - json_data = self._download_json( - 'https://apidam.jixie.io/api/public/stream', display_id, - query={'metadata': 'full', 'video_id': video_id})['data'] - - formats, subtitles = [], {} - for stream in json_data['streams']: - if stream.get('type') == 'HLS': - fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4') - formats.extend(fmt) - self._merge_subtitles(sub, target=subtitles) - else: - formats.append({ - 'url': stream.get('url'), - 'width': stream.get('width'), - 'height': stream.get('height'), - 'ext': 'mp4', - }) - - self._sort_formats(formats) - return { - 'id': video_id, - 'display_id': display_id, - 'formats': formats, - 'subtitles': subtitles, - 'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage), - 'description': (clean_html(traverse_obj(json_data, ('metadata', 'description'))) - or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)), - 'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')), - 'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))), - 'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')), - 'categories': try_call(lambda: json_data['metadata']['categories'].split(',')), - 'uploader_id': json_data.get('owner_id'), - } + return self._extract_data_from_jixie_id(display_id, video_id, webpage) diff --git a/yt_dlp/extractor/moview.py b/yt_dlp/extractor/moview.py new file mode 100644 index 000000000..678b2eb06 --- /dev/null +++ b/yt_dlp/extractor/moview.py @@ -0,0 +1,43 @@ +from .jixie import JixieBaseIE + + +class MoviewPlayIE(JixieBaseIE): + _VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P[\w-]+)' + _TESTS = [ + { + # drm hls, only use direct link + 'url': 'https://www.moview.id/play/174/Candy-Monster', + 'info_dict': { + 'id': '146182', + 'ext': 'mp4', + 'display_id': 'Candy-Monster', + 'uploader_id': 'Mo165qXUUf', + 'duration': 528.2, + 'title': 'Candy Monster', + 'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?', + 'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg', + } + }, { + # non-drm hls + 'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16', + 'info_dict': { + 'id': '28210', + 'ext': 'mp4', + 'duration': 2595.666667, + 'display_id': 'Paris-Van-Java-Episode-16', + 'uploader_id': 'Mo165qXUUf', + 'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg', + 'description': 'md5:2a5e18d98eef9b39d7895029cac96c63', + 'title': 'Paris Van Java Episode 16', + } + } + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'video_id\s*=\s*"(?P[^"]+)', webpage, 'video_id') + + return self._extract_data_from_jixie_id(display_id, video_id, webpage)