\w+))?|$)' _TEMPLATE_URL = 'https://vk.com/videos' _TESTS = [{ 'url': 'https://vk.com/video/@mobidevices', 'info_dict': { 'id': '-17892518_all', }, 'playlist_mincount': 1355, }, { 'url': 'https://vk.com/video/@mobidevices?section=uploaded', 'info_dict': { 'id': '-17892518_uploaded', }, 'playlist_mincount': 182, }, { 'url': 'https://vk.com/video/playlist/-174476437_2', 'info_dict': { 'id': '-174476437_playlist_2', 'title': 'Анонсы' }, 'playlist_mincount': 108, }] _VIDEO = collections.namedtuple('Video', ['owner_id', 'id']) def _entries(self, page_id, section): video_list_json = self._download_payload('al_video', page_id, { 'act': 'load_videos_silent', 'offset': 0, 'oid': page_id, 'section': section, })[0][section] count = video_list_json['count'] total = video_list_json['total'] video_list = video_list_json['list'] while True: for video in video_list: v = self._VIDEO._make(video[:2]) video_id = '%d_%d' % (v.owner_id, v.id) yield self.url_result( 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id) if count >= total: break video_list_json = self._download_payload('al_video', page_id, { 'act': 'load_videos_silent', 'offset': count, 'oid': page_id, 'section': section, })[0][section] count += video_list_json['count'] video_list = video_list_json['list'] def _real_extract(self, url): u_id, section = self._match_valid_url(url).groups() webpage = self._download_webpage(url, u_id) if u_id.startswith('@'): page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id') elif '_' in u_id: page_id, section = u_id.split('_', 1) section = f'playlist_{section}' else: raise ExtractorError('Invalid URL', expected=True) if not section: section = 'all' playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage)) return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title) class VKWallPostIE(VKBaseIE): IE_NAME = 'vk:wallpost' _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P-?\d+_\d+)))' _TESTS = [{ # public page URL, audio playlist 'url': 'https://vk.com/bs.official?w=wall-23538238_35', 'info_dict': { 'id': '-23538238_35', 'title': 'Black Shadow - Wall post -23538238_35', 'description': 'md5:190c78f905a53e0de793d83933c6e67f', }, 'playlist': [{ 'md5': '5ba93864ec5b85f7ce19a9af4af080f6', 'info_dict': { 'id': '135220665_111806521', 'ext': 'm4a', 'title': 'Black Shadow - Слепое Верование', 'duration': 370, 'uploader': 'Black Shadow', 'artist': 'Black Shadow', 'track': 'Слепое Верование', }, }, { 'md5': '4cc7e804579122b17ea95af7834c9233', 'info_dict': { 'id': '135220665_111802303', 'ext': 'm4a', 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', 'duration': 423, 'uploader': 'Black Shadow', 'artist': 'Black Shadow', 'track': 'Война - Негасимое Бездны Пламя!', }, }], 'params': { 'skip_download': True, }, }, { # single YouTube embed with irrelevant reaction videos 'url': 'https://vk.com/wall-32370614_7173954', 'info_dict': { 'id': '-32370614_7173954', 'title': 'md5:9f93c405bbc00061d34007d78c75e3bc', 'description': 'md5:953b811f26fa9f21ee5856e2ea8e68fc', }, 'playlist_count': 1, }, { # wall page URL 'url': 'https://vk.com/wall-23538238_35', 'only_matching': True, }, { # mobile wall page URL 'url': 'https://m.vk.com/wall-23538238_35', 'only_matching': True, }] _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads']) def _decode(self, enc): dec = '' e = n = 0 for c in enc: r = self._BASE64_CHARS.index(c) cond = n % 4 e = 64 * e + r if cond else r n += 1 if cond: dec += chr(255 & e >> (-2 * n & 6)) return dec def _unmask_url(self, mask_url, vk_id): if 'audio_api_unavailable' in mask_url: extra = mask_url.split('?extra=')[1].split('#') func, base = self._decode(extra[1]).split(chr(11)) mask_url = list(self._decode(extra[0])) url_len = len(mask_url) indexes = [None] * url_len index = int(base) ^ vk_id for n in range(url_len - 1, -1, -1): index = (url_len * (n + 1) ^ index + n) % url_len indexes[n] = index for n in range(1, url_len): c = mask_url[n] index = indexes[url_len - 1 - n] mask_url[n] = mask_url[index] mask_url[index] = c mask_url = ''.join(mask_url) return mask_url def _real_extract(self, url): post_id = self._match_id(url) webpage = self._download_payload('wkview', post_id, { 'act': 'show', 'w': 'wall' + post_id, })[1] uploader = clean_html(get_element_by_class('PostHeaderTitle__authorName', webpage)) entries = [] for audio in re.findall(r'data-audio="([^"]+)', webpage): audio = self._parse_json(unescapeHTML(audio), post_id) if not audio['url']: continue title = unescapeHTML(audio.get('title')) artist = unescapeHTML(audio.get('artist')) entries.append({ 'id': f'{audio["owner_id"]}_{audio["id"]}', 'title': join_nonempty(artist, title, delim=' - '), 'thumbnails': try_call(lambda: [{'url': u} for u in audio['coverUrl'].split(',')]), 'duration': int_or_none(audio.get('duration')), 'uploader': uploader, 'artist': artist, 'track': title, 'formats': [{ 'url': audio['url'], 'ext': 'm4a', 'vcodec': 'none', 'acodec': 'mp3', 'container': 'm4a_dash', }], }) entries.extend(self.url_result(urljoin(url, entry), VKIE) for entry in set(re.findall( r']+href=(?:["\'])(/video(?:-?[\d_]+)[^"\']*)', get_element_html_by_id('wl_post_body', webpage)))) return self.playlist_result( entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '), clean_html(get_element_by_class('wall_post_text', webpage))) class VKPlayBaseIE(InfoExtractor): _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/' _RESOLUTIONS = { 'tiny': '256x144', 'lowest': '426x240', 'low': '640x360', 'medium': '852x480', 'high': '1280x720', 'full_hd': '1920x1080', 'quad_hd': '2560x1440', } def _extract_from_initial_state(self, url, video_id, path): webpage = self._download_webpage(url, video_id) video_info = traverse_obj(self._search_json( r']+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id), path, expected_type=dict) if not video_info: raise ExtractorError('Unable to extract video info from html inline initial state') return video_info def _extract_formats(self, stream_info, video_id): formats = [] for stream in traverse_obj(stream_info, ( 'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])): url = stream['url'] format_id = str_or_none(stream['type']) if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url: formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False)) elif format_id == 'dash': formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False)) elif format_id in ('live_dash', 'live_playback_dash'): self.write_debug(f'Not extracting unsupported format "{format_id}"') else: formats.append({ 'url': url, 'ext': 'mp4', 'format_id': format_id, **parse_resolution(self._RESOLUTIONS.get(format_id)), }) return formats def _extract_common_meta(self, stream_info): return traverse_obj(stream_info, { 'id': ('id', {str_or_none}), 'title': ('title', {str}), 'release_timestamp': ('startTime', {int_or_none}), 'thumbnail': ('previewUrl', {url_or_none}), 'view_count': ('count', 'views', {int_or_none}), 'like_count': ('count', 'likes', {int_or_none}), 'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}), 'uploader': (('user', ('blog', 'owner')), 'nick', {str}), 'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}), 'duration': ('duration', {int_or_none}), 'is_live': ('isOnline', {bool}), 'concurrent_view_count': ('count', 'viewers', {int_or_none}), }, get_all=False) class VKPlayIE(VKPlayBaseIE): _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P[^/#?]+)/record/(?P[\da-f-]+)' _TESTS = [{ 'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da', 'info_dict': { 'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da', 'ext': 'mp4', 'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!', 'uploader': 'ZitsmanN', 'uploader_id': '13159830', 'release_timestamp': 1683461378, 'release_date': '20230507', 'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview', 'duration': 10608, 'view_count': int, 'like_count': int, 'categories': ['Atomic Heart'], }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records', 'only_matching': True, }] def _real_extract(self, url): username, video_id = self._match_valid_url(url).groups() record_info = traverse_obj(self._download_json( f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False), ('data', 'record', {dict})) if not record_info: record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data')) return { **self._extract_common_meta(record_info), 'id': video_id, 'formats': self._extract_formats(record_info, video_id), } class VKPlayLiveIE(VKPlayBaseIE): _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P[^/#?]+)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://vkplay.live/bayda', 'info_dict': { 'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2', 'ext': 'mp4', 'title': r're:эскапизм крута .*', 'uploader': 'Bayda', 'uploader_id': '12279401', 'release_timestamp': 1687209962, 'release_date': '20230619', 'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview', 'view_count': int, 'concurrent_view_count': int, 'like_count': int, 'categories': ['EVE Online'], 'live_status': 'is_live', }, 'skip': 'livestream', 'params': {'skip_download': True}, }, { 'url': 'https://live.vkplay.ru/lebwa', 'only_matching': True, }] def _real_extract(self, url): username = self._match_id(url) stream_info = self._download_json( f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False) if not stream_info: stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream')) formats = self._extract_formats(stream_info, username) if not formats and not traverse_obj(stream_info, ('isOnline', {bool})): raise UserNotLive(video_id=username) return { **self._extract_common_meta(stream_info), 'formats': formats, }