pull/9907/head
Jesse Bannon 3 weeks ago
parent 0f167c960d
commit bebcaf482e

@ -1,4 +1,3 @@
import functools
import re
from .common import InfoExtractor
@ -13,15 +12,13 @@ from ..utils import (
clean_html,
get_elements_html_by_class,
get_element_html_by_class,
get_element_by_id,
extract_attributes,
extract_attributes,
orderedSet,
strip_jsonp,
strip_or_none,
traverse_obj,
unified_strdate,
url_or_none,
urlencode_postdata,
US_RATINGS,
)
@ -764,6 +761,7 @@ class PBSKidsIE(InfoExtractor):
})
}
class PBSShowIE(InfoExtractor):
_VALID_URL = r'(?:https://)?(?:www\.)?pbs\.org\/show\/(?P<presumptive_id>[^/]+?)(?:\.html)?\/?(?:$|[?#])'
@ -788,21 +786,31 @@ class PBSShowIE(InfoExtractor):
# pbs does not show metadata, use a different station that does
return f'https://video.ksps.org/show/{playlist_id}'
def _fetch_seasons(self, playlist_id, season_indices):
def _iterate_entries(self, playlist_id, season_indices):
playlist_url = self._make_url(playlist_id)
for season_idx in season_indices:
season_id = f'{playlist_id}-{season_idx}'
season_id = f'{playlist_id}-season-{season_idx}'
season_page = self._download_webpage(f'{playlist_url}/episodes/season/{season_idx}', video_id=season_id)
episodes_metadata = [extract_attributes(elem) for elem in get_elements_html_by_class("video-summary", season_page)]
for episode_metadata in episodes_metadata:
season_page = self._download_webpage(
f'{playlist_url}/episodes/season/{season_idx}',
video_id=season_id
)
episodes_metadata = [
extract_attributes(elem)
for elem in get_elements_html_by_class("video-summary", season_page)
]
num_eps = len(episodes_metadata)
for i, episode_metadata in enumerate(episodes_metadata):
print(f's{season_idx}e{num_eps - i} {episode_metadata["data-title"]}')
yield self.url_result(
url=f'https://pbs.org/video/{episode_metadata["data-video-slug"]}',
ie=PBSIE,
video_id=episode_metadata["data-cid"],
url_transparent=True,
title=episode_metadata["data-title"]
title=episode_metadata["data-title"],
season=season_idx,
episode_index=num_eps - i,
)
def _real_extract(self, url):
@ -810,17 +818,27 @@ class PBSShowIE(InfoExtractor):
webpage = self._download_webpage(self._make_url(playlist_id), playlist_id)
show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id)
playlist_description = clean_html(get_element_html_by_class("show-hero__description--long is-hidden", webpage))
show_metadata = extract_attributes(get_element_html_by_class("show-hero__my-list btn--mylist--placeholder", webpage))
playlist_description = clean_html(get_element_html_by_class(
"show-hero__description--long is-hidden", webpage)
)
show_metadata = extract_attributes(
get_element_html_by_class("show-hero__my-list btn--mylist--placeholder", webpage)
)
playlist_title = show_metadata['data-gtm-label']
clean_html(playlist_description[0])
# iterate seasons in reverse to get newest vids first
season_indices = list(sorted([x['ordinal'] for x in show_data['episodes_data']['seasons'] if x.get('ordinal', 0) != 0], reverse=True))
season_indices = list(sorted(
[
x['ordinal'] for x in show_data['episodes_data']['seasons']
if x.get('ordinal', 0) != 0
],
reverse=True
))
return self.playlist_result(
LazyList(self._fetch_seasons(playlist_id, season_indices)),
LazyList(self._iterate_entries(playlist_id, season_indices)),
playlist_id=playlist_id,
playlist_title=playlist_title,
playlist_description=playlist_description,

Loading…
Cancel
Save