|
|
|
@ -1,15 +1,20 @@
|
|
|
|
|
import functools
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
from ..compat import compat_str
|
|
|
|
|
from ..utils import (
|
|
|
|
|
ExtractorError,
|
|
|
|
|
OnDemandPagedList,
|
|
|
|
|
determine_ext,
|
|
|
|
|
int_or_none,
|
|
|
|
|
float_or_none,
|
|
|
|
|
js_to_json,
|
|
|
|
|
clean_html,
|
|
|
|
|
get_elements_html_by_class,
|
|
|
|
|
get_element_html_by_class,
|
|
|
|
|
get_element_by_id,
|
|
|
|
|
extract_attributes,
|
|
|
|
|
orderedSet,
|
|
|
|
|
strip_jsonp,
|
|
|
|
|
strip_or_none,
|
|
|
|
@ -795,9 +800,10 @@ class PBSShowIE(InfoExtractor):
|
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
_JSON_SEARCH = r'<script[^>]+id="content-strip-data" type="application/json">'
|
|
|
|
|
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
|
|
|
|
|
PAGE_SIZE = 40
|
|
|
|
|
|
|
|
|
|
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
|
|
|
|
PAGE_SIZE = 25
|
|
|
|
|
HTML_CLASS_NAMES = {
|
|
|
|
|
'channel': {
|
|
|
|
|
'container': 'channel-videos-container',
|
|
|
|
@ -816,22 +822,48 @@ class PBSShowIE(InfoExtractor):
|
|
|
|
|
def _make_url(playlist_id):
|
|
|
|
|
return f'https://watch.opb.org/show/{playlist_id}'
|
|
|
|
|
|
|
|
|
|
def _fetch_season_page(self, playlist_id, page_num):
|
|
|
|
|
playlist_url = self._make_url(playlist_id)
|
|
|
|
|
season_id = f'{playlist_id}-{page_num}'
|
|
|
|
|
|
|
|
|
|
season_page = self._download_webpage(f'{playlist_url}/episodes/season/{page_num}', video_id=season_id)
|
|
|
|
|
season_data = get_elements_html_by_class("video-summary", season_page)
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
playlist_id = self._match_valid_url(url).group('id')
|
|
|
|
|
playlist_url = self._make_url(playlist_id)
|
|
|
|
|
webpage = self._download_webpage(self._make_url(playlist_id), playlist_id)
|
|
|
|
|
show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id)
|
|
|
|
|
# show_metadata = self._search_json(self._SHOW_JSON_SEARCH, webpage, 'show metadata', playlist_id)
|
|
|
|
|
|
|
|
|
|
for show_season_metadata in sorted(show_data.get('episodes_data', {}).get('seasons', []), key=lambda x: x.get('ordinal', 0), reverse=True):
|
|
|
|
|
season_ordinal = show_season_metadata.get('ordinal', 0)
|
|
|
|
|
if season_ordinal == 0:
|
|
|
|
|
continue
|
|
|
|
|
playlist_description = clean_html(get_element_html_by_class("show-hero__description--long is-hidden", webpage))
|
|
|
|
|
show_metadata = extract_attributes(get_element_html_by_class("show-hero__my-list btn--mylist--placeholder", webpage))
|
|
|
|
|
|
|
|
|
|
season_id = f'{playlist_id}-{season_ordinal}'
|
|
|
|
|
playlist_title = show_metadata['data-gtml-label']
|
|
|
|
|
clean_html(playlist_description[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return self.playlist_result(
|
|
|
|
|
OnDemandPagedList(
|
|
|
|
|
pagefunc=functools.partial(self._fetch_season_page, playlist_id),
|
|
|
|
|
pagesize=self.PAGE_SIZE
|
|
|
|
|
),
|
|
|
|
|
playlist_id=playlist_id,
|
|
|
|
|
playlist_title=playlist_title,
|
|
|
|
|
# playlist_title=
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_ordinal}', video_id=season_id)
|
|
|
|
|
season_data = get_elements_html_by_class("video-summary", season_page)
|
|
|
|
|
pass
|
|
|
|
|
# for show_season_metadata in sorted(show_data.get('episodes_data', {}).get('seasons', []), key=lambda x: x.get('ordinal', 0), reverse=True):
|
|
|
|
|
# season_ordinal = show_season_metadata.get('ordinal', 0)
|
|
|
|
|
# if season_ordinal == 0:
|
|
|
|
|
# continue
|
|
|
|
|
#
|
|
|
|
|
# season_id = f'{playlist_id}-{season_ordinal}'
|
|
|
|
|
#
|
|
|
|
|
# season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_ordinal}', video_id=season_id)
|
|
|
|
|
# season_data = get_elements_html_by_class("video-summary", season_page)
|
|
|
|
|
# pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|