[vlive:channel] Fix extraction

Based on https://github.com/ytdl-org/youtube-dl/pull/29866 Closes #749, #927, https://github.com/ytdl-org/youtube-dl/issues/29837 Authored by kikuyan, pukkandan
3 years ago · 457f6d6866
parent ad0090d0d2
commit 457f6d6866
1 changed files with 99 additions and 120 deletions
--- a/yt_dlp/extractor/vlive.py
+++ b/yt_dlp/extractor/vlive.py
@ -17,17 +17,65 @@ from ..utils import (
    strip_or_none,
    try_get,
    urlencode_postdata,
+    url_or_none,
 )


 class VLiveBaseIE(NaverBaseIE):
-    _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+    _NETRC_MACHINE = 'vlive'
+    _logged_in = False
+
+    def _real_initialize(self):
+        if not self._logged_in:
+            VLiveBaseIE._logged_in = self._login()
+
+    def _login(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return False
+
+        LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+        self._request_webpage(
+            LOGIN_URL, None, note='Downloading login cookies')
+
+        self._download_webpage(
+            LOGIN_URL, None, note='Logging in',
+            data=urlencode_postdata({'email': email, 'pwd': password}),
+            headers={
+                'Referer': LOGIN_URL,
+                'Content-Type': 'application/x-www-form-urlencoded'
+            })
+
+        login_info = self._download_json(
+            'https://www.vlive.tv/auth/loginInfo', None,
+            note='Checking login status',
+            headers={'Referer': 'https://www.vlive.tv/home'})
+
+        if not try_get(login_info, lambda x: x['message']['login'], bool):
+            raise ExtractorError('Unable to log in', expected=True)
+        return True
+
+    def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
+        if note is None:
+            note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
+        query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
+        if fields:
+            query['fields'] = fields
+        if query_add:
+            query.update(query_add)
+        try:
+            return self._download_json(
+                'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+                note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
+            raise


 class VLiveIE(VLiveBaseIE):
    IE_NAME = 'vlive'
    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
-    _NETRC_MACHINE = 'vlive'
    _TESTS = [{
        'url': 'http://www.vlive.tv/video/1326',
        'md5': 'cc7314812855ce56de70a06a27314983',
@ -81,53 +129,6 @@ class VLiveIE(VLiveBaseIE):
        'playlist_mincount': 120
    }]

-    def _real_initialize(self):
-        self._login()
-
-    def _login(self):
-        email, password = self._get_login_info()
-        if None in (email, password):
-            return
-
-        def is_logged_in():
-            login_info = self._download_json(
-                'https://www.vlive.tv/auth/loginInfo', None,
-                note='Downloading login info',
-                headers={'Referer': 'https://www.vlive.tv/home'})
-            return try_get(
-                login_info, lambda x: x['message']['login'], bool) or False
-
-        LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
-        self._request_webpage(
-            LOGIN_URL, None, note='Downloading login cookies')
-
-        self._download_webpage(
-            LOGIN_URL, None, note='Logging in',
-            data=urlencode_postdata({'email': email, 'pwd': password}),
-            headers={
-                'Referer': LOGIN_URL,
-                'Content-Type': 'application/x-www-form-urlencoded'
-            })
-
-        if not is_logged_in():
-            raise ExtractorError('Unable to log in', expected=True)
-
-    def _call_api(self, path_template, video_id, fields=None, limit=None):
-        query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
-        if fields:
-            query['fields'] = fields
-        if limit:
-            query['limit'] = limit
-        try:
-            return self._download_json(
-                'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
-                'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
-                headers={'Referer': 'https://www.vlive.tv/'}, query=query)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
-            raise
-
    def _real_extract(self, url):
        video_id = self._match_id(url)

@ -150,7 +151,7 @@ class VLiveIE(VLiveBaseIE):
            playlist_count = str_or_none(playlist.get('totalCount'))

            playlist = self._call_api(
-                'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count)
+                'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})

            entries = []
            for video_data in playlist['data']:
@ -216,7 +217,7 @@ class VLiveIE(VLiveBaseIE):
                raise ExtractorError('Unknown status ' + status)


-class VLivePostIE(VLiveIE):
+class VLivePostIE(VLiveBaseIE):
    IE_NAME = 'vlive:post'
    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
    _TESTS = [{
@ -238,8 +239,6 @@ class VLivePostIE(VLiveIE):
        'playlist_count': 1,
    }]
    _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
-    _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
-    _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'

    def _real_extract(self, url):
        post_id = self._match_id(url)
@ -266,7 +265,7 @@ class VLivePostIE(VLiveIE):
            entry = None
            if upload_type == 'SOS':
                download = self._call_api(
-                    self._SOS_TMPL, video_id)['videoUrl']['download']
+                    self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
                formats = []
                for f_id, f_url in download.items():
                    formats.append({
@ -284,7 +283,7 @@ class VLivePostIE(VLiveIE):
                vod_id = upload_info.get('videoId')
                if not vod_id:
                    continue
-                inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+                inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
                entry = self._extract_video_info(video_id, vod_id, inkey)
            if entry:
                entry['title'] = '%s_part%s' % (title, idx)
@ -295,7 +294,7 @@ class VLivePostIE(VLiveIE):

 class VLiveChannelIE(VLiveBaseIE):
    IE_NAME = 'vlive:channel'
-    _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
+    _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
    _TESTS = [{
        'url': 'http://channels.vlive.tv/FCD4B',
        'info_dict': {
@ -306,78 +305,58 @@ class VLiveChannelIE(VLiveBaseIE):
    }, {
        'url': 'https://www.vlive.tv/channel/FCD4B',
        'only_matching': True,
+    }, {
+        'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
+        'info_dict': {
+            'id': 'FCD4B-3546',
+            'title': 'MAMAMOO - Star Board',
+        },
+        'playlist_mincount': 880
    }]

-    def _call_api(self, path, channel_key_suffix, channel_value, note, query):
-        q = {
-            'app_id': self._APP_ID,
-            'channel' + channel_key_suffix: channel_value,
-        }
-        q.update(query)
-        return self._download_json(
-            'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
-            channel_value, note='Downloading ' + note, query=q)['result']
-
-    def _real_extract(self, url):
-        channel_code = self._match_id(url)
-
-        channel_seq = self._call_api(
-            'decodeChannelCode', 'Code', channel_code,
-            'decode channel code', {})['channelSeq']
-
-        channel_name = None
-        entries = []
+    def _entries(self, posts_id, board_name):
+        if board_name:
+            posts_path = 'post/v1.0/board-%s/posts'
+            query_add = {'limit': 100, 'sortType': 'LATEST'}
+        else:
+            posts_path = 'post/v1.0/channel-%s/starPosts'
+            query_add = {'limit': 100}

        for page_num in itertools.count(1):
            video_list = self._call_api(
-                'getChannelVideoList', 'Seq', channel_seq,
-                'channel list page #%d' % page_num, {
-                    # Large values of maxNumOfRows (~300 or above) may cause
-                    # empty responses (see [1]), e.g. this happens for [2] that
-                    # has more than 300 videos.
-                    # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
-                    # 2. http://channels.vlive.tv/EDBF.
-                    'maxNumOfRows': 100,
-                    'pageNo': page_num
-                }
-            )
-
-            if not channel_name:
-                channel_name = try_get(
-                    video_list,
-                    lambda x: x['channelInfo']['channelName'],
-                    compat_str)
+                posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
+                note=f'Downloading playlist page {page_num}')
+
+            for video in try_get(video_list, lambda x: x['data'], list) or []:
+                video_id = str(video.get('postId'))
+                video_title = str_or_none(video.get('title'))
+                video_url = url_or_none(video.get('url'))
+                if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
+                    continue
+                channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
+                yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)

-            videos = try_get(
-                video_list, lambda x: x['videoList'], list)
-            if not videos:
+            after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
+            if not after:
                break
+            query_add['after'] = after
+
+    def _real_extract(self, url):
+        channel_id, posts_id = self._match_valid_url(url).groups()

-            for video in videos:
-                video_id = video.get('videoSeq')
-                video_type = video.get('videoType')
+        board_name = None
+        if posts_id:
+            board = self._call_api(
+                'board/v1.0/board-%s', posts_id, 'title,boardType')
+            board_name = board.get('title') or 'Unknown'
+            if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
+                raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)

-                if not video_id or not video_type:
-                    continue
-                video_id = compat_str(video_id)
-
-                if video_type in ('PLAYLIST'):
-                    first_video_id = try_get(
-                        video,
-                        lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int)
-
-                    if not first_video_id:
-                        continue
-
-                    entries.append(
-                        self.url_result(
-                            'http://www.vlive.tv/video/%s' % first_video_id,
-                            ie=VLiveIE.ie_key(), video_id=first_video_id))
-                else:
-                    entries.append(
-                        self.url_result(
-                            'http://www.vlive.tv/video/%s' % video_id,
-                            ie=VLiveIE.ie_key(), video_id=video_id))
+        entries = self._entries(posts_id or channel_id, board_name)
+        first_video = next(entries)
+        channel_name = first_video['channel']

        return self.playlist_result(
-            entries, channel_code, channel_name)
+            itertools.chain([first_video], entries),
+            f'{channel_id}-{posts_id}' if posts_id else channel_id,
+            f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)