pull/9907/head
Jesse Bannon 3 weeks ago
parent 3d9d8592bc
commit 5da9b4ae9d

@ -1,15 +1,20 @@
import functools
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
int_or_none,
float_or_none,
js_to_json,
clean_html,
get_elements_html_by_class,
get_element_html_by_class,
get_element_by_id,
extract_attributes,
orderedSet,
strip_jsonp,
strip_or_none,
@ -795,9 +800,10 @@ class PBSShowIE(InfoExtractor):
}]
_JSON_SEARCH = r'<script[^>]+id="content-strip-data" type="application/json">'
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
PAGE_SIZE = 40
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
PAGE_SIZE = 25
HTML_CLASS_NAMES = {
'channel': {
'container': 'channel-videos-container',
@ -816,22 +822,48 @@ class PBSShowIE(InfoExtractor):
def _make_url(playlist_id):
return f'https://watch.opb.org/show/{playlist_id}'
def _fetch_season_page(self, playlist_id, page_num):
playlist_url = self._make_url(playlist_id)
season_id = f'{playlist_id}-{page_num}'
season_page = self._download_webpage(f'{playlist_url}/episodes/season/{page_num}', video_id=season_id)
season_data = get_elements_html_by_class("video-summary", season_page)
def _real_extract(self, url):
playlist_id = self._match_valid_url(url).group('id')
playlist_url = self._make_url(playlist_id)
webpage = self._download_webpage(self._make_url(playlist_id), playlist_id)
show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id)
# show_metadata = self._search_json(self._SHOW_JSON_SEARCH, webpage, 'show metadata', playlist_id)
for show_season_metadata in sorted(show_data.get('episodes_data', {}).get('seasons', []), key=lambda x: x.get('ordinal', 0), reverse=True):
season_ordinal = show_season_metadata.get('ordinal', 0)
if season_ordinal == 0:
continue
playlist_description = clean_html(get_element_html_by_class("show-hero__description--long is-hidden", webpage))
show_metadata = extract_attributes(get_element_html_by_class("show-hero__my-list btn--mylist--placeholder", webpage))
season_id = f'{playlist_id}-{season_ordinal}'
playlist_title = show_metadata['data-gtml-label']
clean_html(playlist_description[0])
return self.playlist_result(
OnDemandPagedList(
pagefunc=functools.partial(self._fetch_season_page, playlist_id),
pagesize=self.PAGE_SIZE
),
playlist_id=playlist_id,
playlist_title=playlist_title,
# playlist_title=
)
season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_ordinal}', video_id=season_id)
season_data = get_elements_html_by_class("video-summary", season_page)
pass
# for show_season_metadata in sorted(show_data.get('episodes_data', {}).get('seasons', []), key=lambda x: x.get('ordinal', 0), reverse=True):
# season_ordinal = show_season_metadata.get('ordinal', 0)
# if season_ordinal == 0:
# continue
#
# season_id = f'{playlist_id}-{season_ordinal}'
#
# season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_ordinal}', video_id=season_id)
# season_data = get_elements_html_by_class("video-summary", season_page)
# pass
return

Loading…
Cancel
Save