[extractor/moview] Add extractor (#4607)

Authored by: HobbyistDev
pull/4663/head
HobbyistDev 2 years ago committed by GitHub
parent cb7cc448c0
commit 7695f5a0a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -975,6 +975,7 @@ from .motherless import (
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE from .movieclips import MovieClipsIE
from .moviepilot import MoviepilotIE from .moviepilot import MoviepilotIE
from .moview import MoviewPlayIE
from .moviezine import MoviezineIE from .moviezine import MoviezineIE
from .movingimage import MovingImageIE from .movingimage import MovingImageIE
from .msn import MSNIE from .msn import MSNIE

@ -0,0 +1,51 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
traverse_obj,
try_call,
)
# more info about jixie:
# [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js
class JixieBaseIE(InfoExtractor):
def _extract_data_from_jixie_id(self, display_id, video_id, webpage):
json_data = self._download_json(
'https://apidam.jixie.io/api/public/stream', display_id,
query={'metadata': 'full', 'video_id': video_id})['data']
formats, subtitles = [], {}
for stream in json_data['streams']:
if stream.get('type') == 'HLS':
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
if json_data.get('drm'):
for f in fmt:
f['has_drm'] = True
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
else:
formats.append({
'url': stream.get('url'),
'width': stream.get('width'),
'height': stream.get('height'),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
'tags': try_call(lambda: (json_data['metadata']['keywords'] or None).split(',')),
'categories': try_call(lambda: (json_data['metadata']['categories'] or None).split(',')),
'uploader_id': json_data.get('owner_id'),
}

@ -1,17 +1,9 @@
from .common import InfoExtractor from .jixie import JixieBaseIE
from ..utils import (
clean_html,
float_or_none,
traverse_obj,
try_call,
)
# Video from www.kompas.tv and video.kompas.com seems use jixie player # Video from video.kompas.com seems use jixie player
# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info
class KompasVideoIE(InfoExtractor): class KompasVideoIE(JixieBaseIE):
_VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)' _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
@ -33,36 +25,4 @@ class KompasVideoIE(InfoExtractor):
video_id, display_id = self._match_valid_url(url).group('id', 'slug') video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
json_data = self._download_json( return self._extract_data_from_jixie_id(display_id, video_id, webpage)
'https://apidam.jixie.io/api/public/stream', display_id,
query={'metadata': 'full', 'video_id': video_id})['data']
formats, subtitles = [], {}
for stream in json_data['streams']:
if stream.get('type') == 'HLS':
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
else:
formats.append({
'url': stream.get('url'),
'width': stream.get('width'),
'height': stream.get('height'),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')),
'categories': try_call(lambda: json_data['metadata']['categories'].split(',')),
'uploader_id': json_data.get('owner_id'),
}

@ -0,0 +1,43 @@
from .jixie import JixieBaseIE
class MoviewPlayIE(JixieBaseIE):
_VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
_TESTS = [
{
# drm hls, only use direct link
'url': 'https://www.moview.id/play/174/Candy-Monster',
'info_dict': {
'id': '146182',
'ext': 'mp4',
'display_id': 'Candy-Monster',
'uploader_id': 'Mo165qXUUf',
'duration': 528.2,
'title': 'Candy Monster',
'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
}
}, {
# non-drm hls
'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
'info_dict': {
'id': '28210',
'ext': 'mp4',
'duration': 2595.666667,
'display_id': 'Paris-Van-Java-Episode-16',
'uploader_id': 'Mo165qXUUf',
'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
'title': 'Paris Van Java Episode 16',
}
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
return self._extract_data_from_jixie_id(display_id, video_id, webpage)
Loading…
Cancel
Save