From 426764371fa52dde8fb9bedad69a3e58e5c391b9 Mon Sep 17 00:00:00 2001 From: MinePlayersPE <20515340+MinePlayersPE@users.noreply.github.com> Date: Thu, 20 Jan 2022 05:23:55 +0700 Subject: [PATCH] [iq.com] Add extractors (#2354) Closes #704 Authored by: MinePlayersPE --- yt_dlp/YoutubeDL.py | 4 +- yt_dlp/extractor/common.py | 5 +- yt_dlp/extractor/extractors.py | 6 +- yt_dlp/extractor/iqiyi.py | 343 ++++++++++++++++++++++++++++++++- 4 files changed, 353 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index dfca76bb0..21edfe339 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2751,7 +2751,9 @@ class YoutubeDL(object): if not test: for ph in self._progress_hooks: fd.add_progress_hook(ph) - urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) + urls = '", "'.join( + (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) + for f in info.get('requested_formats', []) or [info]) self.write_debug('Invoking downloader on "%s"' % urls) # Note: Ideally info should be a deep-copied so that hooks cannot modify it. diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3a61eecc1..a23840e41 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -45,6 +45,7 @@ from ..utils import ( determine_ext, determine_protocol, dict_get, + encode_data_uri, error_to_compat_str, extract_attributes, ExtractorError, @@ -2106,7 +2107,7 @@ class InfoExtractor(object): headers=headers, query=query, video_id=video_id) def _parse_m3u8_formats_and_subtitles( - self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8_native', + self, m3u8_doc, m3u8_url=None, ext=None, entry_protocol='m3u8_native', preference=None, quality=None, m3u8_id=None, live=False, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, video_id=None): @@ -2156,7 +2157,7 @@ class InfoExtractor(object): formats = [{ 'format_id': join_nonempty(m3u8_id, idx), 'format_index': idx, - 'url': m3u8_url, + 'url': m3u8_url or encode_data_uri(m3u8_doc.encode('utf-8'), 'application/x-mpegurl'), 'ext': ext, 'protocol': entry_protocol, 'preference': preference, diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 12348d629..2d707a575 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -636,7 +636,11 @@ from .iprima import ( IPrimaIE, IPrimaCNNIE ) -from .iqiyi import IqiyiIE +from .iqiyi import ( + IqiyiIE, + IqIE, + IqAlbumIE +) from .ir90tv import Ir90TvIE from .itv import ( ITVIE, diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index e33e23f08..5dc653125 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -11,14 +11,26 @@ from ..compat import ( compat_str, compat_urllib_parse_urlencode, ) +from .openload import PhantomJSwrapper from ..utils import ( clean_html, decode_packed_codes, + ExtractorError, + float_or_none, get_element_by_id, get_element_by_attribute, - ExtractorError, + int_or_none, + js_to_json, ohdave_rsa_encrypt, + parse_age_limit, + parse_duration, + parse_iso8601, + parse_resolution, + qualities, remove_start, + str_or_none, + traverse_obj, + urljoin, ) @@ -392,3 +404,332 @@ class IqiyiIE(InfoExtractor): 'title': title, 'formats': formats, } + + +class IqIE(InfoExtractor): + IE_NAME = 'iq.com' + IE_DESC = 'International version of iQiyi' + _VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P\w+)' + _TESTS = [{ + 'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4', + 'md5': '2d7caf6eeca8a32b407094b33b757d39', + 'info_dict': { + 'ext': 'mp4', + 'id': '1ma1i6ferf4', + 'title': '航海王 第1000集', + 'description': 'Subtitle available on Sunday 4PM(GMT+8).', + 'duration': 1430, + 'timestamp': 1637488203, + 'upload_date': '20211121', + 'episode_number': 1000, + 'episode': 'Episode 1000', + 'series': 'One Piece', + 'age_limit': 13, + 'average_rating': float, + }, + 'params': { + 'format': '500', + }, + 'expected_warnings': ['format is restricted'] + }] + _BID_TAGS = { + '100': '240P', + '200': '360P', + '300': '480P', + '500': '720P', + '600': '1080P', + '610': '1080P50', + '700': '2K', + '800': '4K', + } + _LID_TAGS = { + '1': 'zh_CN', + '2': 'zh_TW', + '3': 'en', + '18': 'th', + '21': 'my', + '23': 'vi', + '24': 'id', + '26': 'es', + '28': 'ar', + } + + _DASH_JS = ''' + console.log(page.evaluate(function() { + var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s"; + var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s"; var bid_list = %(bid_list)s; + var tm = new Date().getTime(); + var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x; + var authKey = cmd5x(cmd5x('') + tm + '' + tvid); + var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join(''); + var dash_paths = {}; + bid_list.forEach(function(bid) { + var query = { + 'tvid': tvid, + 'bid': bid, + 'ds': 1, + 'vid': vid, + 'src': src, + 'vt': 0, + 'rs': 1, + 'uid': 0, + 'ori': 'pcw', + 'ps': 1, + 'k_uid': k_uid, + 'pt': 0, + 'd': 0, + 's': '', + 'lid': '', + 'slid': 0, + 'cf': '', + 'ct': '', + 'authKey': authKey, + 'k_tag': 1, + 'ost': 0, + 'ppt': 0, + 'dfp': dfp, + 'prio': JSON.stringify({ + 'ff': 'f4v', + 'code': 2 + }), + 'k_err_retries': 0, + 'up': '', + 'su': 2, + 'applang': lang, + 'sver': 2, + 'X-USER-MODE': mode, + 'qd_v': 2, + 'tm': tm, + 'qdy': 'a', + 'qds': 0, + 'k_ft1': 141287244169348, + 'k_ft4': 34359746564, + 'k_ft5': 1, + 'bop': JSON.stringify({ + 'version': '10.0', + 'dfp': dfp + }), + 'ut': 0, // TODO: Set ut param for VIP members + }; + var enc_params = []; + for (var prop in query) { + enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop])); + } + var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path); + dash_paths[bid] = dash_path; + }); + return JSON.stringify(dash_paths); + })); + saveAndExit(); + ''' + + def _extract_vms_player_js(self, webpage, video_id): + player_js_cache = self._downloader.cache.load('iq', 'player_js') + if player_js_cache: + return player_js_cache + webpack_js_url = self._proto_relative_url(self._search_regex( + r'