bergoid 1 month ago committed by GitHub
commit c6f6754d10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,144 +1,177 @@
import functools import functools
import json import json
import time
import urllib.parse import urllib.parse
import urllib.request
from .gigya import GigyaBaseIE from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
clean_html, clean_html,
extract_attributes, extract_attributes,
float_or_none, float_or_none,
get_element_by_class, get_element_by_class,
get_element_html_by_class, get_element_html_by_class,
int_or_none, int_or_none,
join_nonempty,
jwt_encode_hs256,
make_archive_id, make_archive_id,
merge_dicts, merge_dicts,
parse_age_limit, parse_age_limit,
parse_iso8601, parse_iso8601,
unified_strdate,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
traverse_obj, traverse_obj,
url_or_none, url_or_none,
urlencode_postdata,
) )
class VRTBaseIE(GigyaBaseIE): class VRTBaseIE(InfoExtractor):
_GEO_BYPASS = False _GEO_BYPASS = False
_PLAYER_INFO = {
'platform': 'desktop',
'app': {
'type': 'browser',
'name': 'Chrome',
},
'device': 'undefined (undefined)',
'os': {
'name': 'Windows',
'version': 'x86_64'
},
'player': {
'name': 'VRT web player',
'version': '2.7.4-prod-2023-04-19T06:05:45'
}
}
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
_JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
_JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev
# player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
# player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
def _extract_formats_and_subtitles(self, data, video_id): def _extract_formats_and_subtitles(self, data, video_id):
if traverse_obj(data, 'drm'): if traverse_obj(data, 'drm'):
self.report_drm(video_id) self.report_drm(video_id)
formats, subtitles = [], {} formats, subtitles = [], {}
for target in traverse_obj(data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type'])): for target in traverse_obj(
data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type'])
):
format_type = target['type'].upper() format_type = target['type'].upper()
format_url = target['url'] format_url = target['url']
if format_type in ('HLS', 'HLS_AES'): if format_type in ('HLS', 'HLS_AES'):
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False) format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False
)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif format_type == 'HDS': elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats( formats.extend(
format_url, video_id, f4m_id=format_type, fatal=False)) self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False
)
)
elif format_type == 'MPEG_DASH': elif format_type == 'MPEG_DASH':
fmts, subs = self._extract_mpd_formats_and_subtitles( fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id=format_type, fatal=False) format_url, video_id, mpd_id=format_type, fatal=False
)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif format_type == 'HSS': elif format_type == 'HSS':
fmts, subs = self._extract_ism_formats_and_subtitles( fmts, subs = self._extract_ism_formats_and_subtitles(
format_url, video_id, ism_id='mss', fatal=False) format_url, video_id, ism_id='mss', fatal=False
)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
else: else:
formats.append({ formats.append(
'format_id': format_type, {
'url': format_url, 'format_id': format_type,
}) 'url': format_url,
}
for sub in traverse_obj(data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED')): )
for sub in traverse_obj(
data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED')
):
subtitles.setdefault('nl', []).append({'url': sub['url']}) subtitles.setdefault('nl', []).append({'url': sub['url']})
return formats, subtitles return formats, subtitles
def _call_api(self, video_id, client='null', id_token=None, version='v2'): def _call_api(self, video_id, client='null', id_token=None, version='v2'):
player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO} json_response = self._download_json(
player_token = self._download_json( f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/tokens',
'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens', None,
video_id, 'Downloading player token', headers={ 'Downloading player token',
**self.geo_verification_headers(), 'Failed to download player token',
'Content-Type': 'application/json', headers={'Content-Type': 'application/json'},
}, data=json.dumps({ data=json.dumps(
'identityToken': id_token or {}, {
'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={ 'identityToken': id_token
'kid': self._JWT_KEY_ID or self._get_cookies('https://www.vrt.be')
}).decode() .get('vrtnu-site_profile_vt')
}, separators=(',', ':')).encode())['vrtPlayerToken'] .value
}
).encode(),
)
player_token = json_response['vrtPlayerToken']
return self._download_json( return self._download_json(
f'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}', f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/videos/{video_id}',
video_id, 'Downloading API JSON', query={ video_id,
'Downloading API JSON',
'Failed to download API JSON',
query={
'vrtPlayerToken': player_token, 'vrtPlayerToken': player_token,
'client': client, 'client': client,
}, expected_status=400) },
)
class VRTIE(VRTBaseIE): class VRTLoginIE(VRTBaseIE):
_NETRC_MACHINE = 'vrtnu'
_authenticated = False
def _perform_login(self, username, password):
self._request_webpage(
'https://www.vrt.be/vrtnu/sso/login',
None,
note='Getting session cookies',
errnote='Failed to get session cookies',
)
self._download_json(
'https://login.vrt.be/perform_login',
None,
data=json.dumps(
{'loginID': username, 'password': password, 'clientId': 'vrtnu-site'}
).encode(),
headers={
'Content-Type': 'application/json',
'Oidcxsrf': self._get_cookies('https://login.vrt.be')
.get('OIDCXSRF')
.value,
},
note='Logging in',
errnote='Login failed',
)
self._authenticated = True
return
class VRTIE(VRTLoginIE):
IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza' IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [
'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/', {
'info_dict': { 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd', 'info_dict': {
'ext': 'mp4', 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand', 'ext': 'mp4',
'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff', 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
'duration': 31.2, 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff',
'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg', 'duration': 31.2,
'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg',
},
'params': {'skip_download': 'm3u8'},
}, },
'params': {'skip_download': 'm3u8'}, {
}, { 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/', 'info_dict': {
'info_dict': { 'id': 'pbs-pub-e1d6e4ec-cbf4-451e-9e87-d835bb65cd28$vid-2ad45eb6-9bc8-40d4-ad72-5f25c0f59d75',
'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818', 'ext': 'mp4',
'ext': 'mp4', 'title': 'De Belgian Cats zijn klaar voor het EK',
'title': 'De Belgian Cats zijn klaar voor het EK', 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal',
'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal', 'duration': 115.17,
'duration': 115.17, 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg',
'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg', },
'params': {'skip_download': 'm3u8'},
}, },
'params': {'skip_download': 'm3u8'}, ]
}] _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CLIENT_MAP = { _CONTEXT_ID = 'R3595707040'
'vrt.be/vrtnws': 'vrtnieuws', _REST_API_BASE_TOKEN = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
'sporza.be': 'sporza', _REST_API_BASE_VIDEO = 'https://media-services-public.vrt.be/media-aggregator/v2'
_HLS_ENTRY_PROTOCOLS_MAP = {
'HLS': 'm3u8_native',
'HLS_AES': 'm3u8_native',
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -147,16 +180,22 @@ class VRTIE(VRTBaseIE):
attrs = extract_attributes(get_element_html_by_class('vrtvideo', webpage) or '') attrs = extract_attributes(get_element_html_by_class('vrtvideo', webpage) or '')
asset_id = attrs.get('data-video-id') or attrs['data-videoid'] asset_id = attrs.get('data-video-id') or attrs['data-videoid']
publication_id = traverse_obj(attrs, 'data-publication-id', 'data-publicationid') publication_id = traverse_obj(
attrs, 'data-publication-id', 'data-publicationid'
)
if publication_id: if publication_id:
asset_id = f'{publication_id}${asset_id}' asset_id = f'{publication_id}${asset_id}'
client = traverse_obj(attrs, 'data-client-code', 'data-client') or self._CLIENT_MAP[site] client = (
traverse_obj(attrs, 'data-client-code', 'data-client')
or self._CLIENT_MAP[site]
)
data = self._call_api(asset_id, client) data = self._call_api(asset_id, client)
formats, subtitles = self._extract_formats_and_subtitles(data, asset_id) formats, subtitles = self._extract_formats_and_subtitles(data, asset_id)
description = self._html_search_meta( description = self._html_search_meta(
['og:description', 'twitter:description', 'description'], webpage) ['og:description', 'twitter:description', 'description'], webpage
)
if description == '': if description == '':
description = None description = None
@ -168,164 +207,155 @@ class VRTIE(VRTBaseIE):
'thumbnail': url_or_none(attrs.get('data-posterimage')), 'thumbnail': url_or_none(attrs.get('data-posterimage')),
'duration': float_or_none(attrs.get('data-duration'), 1000), 'duration': float_or_none(attrs.get('data-duration'), 1000),
'_old_archive_ids': [make_archive_id('Canvas', asset_id)], '_old_archive_ids': [make_archive_id('Canvas', asset_id)],
**traverse_obj(data, { **traverse_obj(
'title': ('title', {str}), data,
'description': ('shortDescription', {str}), {
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), 'title': ('title', {str}),
'thumbnail': ('posterImageUrl', {url_or_none}), 'description': ('shortDescription', {str}),
}), 'duration': (
'duration',
{functools.partial(float_or_none, scale=1000)},
),
'thumbnail': ('posterImageUrl', {url_or_none}),
},
),
} }
class VrtNUIE(VRTBaseIE): class VrtNUIE(VRTLoginIE):
IE_DESC = 'VRT MAX' IE_DESC = 'VRT MAX'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)' _VALID_URL = (
_TESTS = [{ r'https?://(?:www\.)?vrt\.be/(vrtmax|vrtnu)/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
# CONTENT_IS_AGE_RESTRICTED )
'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/', _TESTS = [
'info_dict': { {
'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f', 'url': 'https://www.vrt.be/vrtmax/a-z/pano/trailer/pano-trailer-najaar-2023/',
'ext': 'mp4', 'info_dict': {
'title': 'Tom Waes', 'title': 'Pano - Nieuwe afleveringen vanaf 15 november - Trailer | VRT MAX',
'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.', 'description': 'Duidingsmagazine met indringende reportages over de grote thema\'s van deze tijd. Een gedreven team van reporters diept de beste nieuwsverhalen uit en zoekt het antwoord op actuele vragen. Bekijk de trailer met VRT MAX via de site of app.',
'timestamp': 1673905125, 'timestamp': 1699246800,
'release_timestamp': 1673905125, 'release_timestamp': 1699246800,
'series': 'De ideale wereld', 'release_date': '20231106',
'season_id': '1672830988794', 'upload_date': '20231106',
'episode': 'Aflevering 1', 'series': 'Pano',
'episode_number': 1, 'season': 'Trailer',
'episode_id': '1672830988861', 'season_number': 2023,
'display_id': 'de-ideale-wereld-d20230116', 'season_id': '/vrtnu/a-z/pano/trailer/#tvseason',
'channel': 'VRT', 'episode_id': '3226122918145',
'duration': 1939.0, 'id': 'pbs-pub-5260ad6d-372c-46d3-a542-0e781fd5831a$vid-75fdb750-82f5-4157-8ea9-4485f303f20b',
'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg', 'channel': 'VRT',
'release_date': '20230116', 'duration': 37.16,
'upload_date': '20230116', 'thumbnail': 'https://images.vrt.be/orig/2023/11/03/f570eb9b-7a4e-11ee-91d7-02b7b76bf47f.jpg',
'age_limit': 12, 'ext': 'mp4',
},
}, },
}, { {
'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/', 'url': 'https://www.vrt.be/vrtnu/a-z/factcheckers/trailer/factcheckers-trailer-s4/',
'info_dict': { 'info_dict': {
'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee', 'title': 'Factcheckers - Nieuwe afleveringen vanaf 15 november - Trailer | VRT MAX',
'ext': 'mp4', 'season_number': 2023,
'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'', 'description': 'Infotainmentprogramma waarin Thomas Vanderveken, Jan Van Looveren en Britt Van Marsenille checken wat er nu eigenlijk klopt van de tsunami aan berichten, beweringen en weetjes die we dagelijks over ons heen krijgen. Bekijk de trailer met VRT MAX via de site of app.',
'description': 'md5:197424726c61384b4e5c519f16c0cf02', 'timestamp': 1699160400,
'timestamp': 1652940000, 'release_timestamp': 1699160400,
'release_timestamp': 1652940000, 'release_date': '20231105',
'series': 'Buurman, wat doet u nu?', 'upload_date': '20231105',
'season': 'Seizoen 6', 'series': 'Factcheckers',
'season_number': 6, 'episode': '0',
'season_id': '1652344200907', 'episode_number': 0,
'episode': 'Aflevering 0', 'season': 'Trailer',
'episode_number': 0, 'season_id': '/vrtnu/a-z/factcheckers/trailer/#tvseason',
'episode_id': '1652951873524', 'episode_id': '3179360900145',
'display_id': 'buurman--wat-doet-u-nu--s6-trailer', 'id': 'pbs-pub-aa9397e9-ec2b-45f9-9148-7ce71b690b45$vid-04c67438-4866-4f5c-8978-51d173c0074b',
'channel': 'VRT', 'channel': 'VRT',
'duration': 33.13, 'duration': 33.08,
'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg', 'thumbnail': 'https://images.vrt.be/orig/2023/11/07/37d244f0-7d8a-11ee-91d7-02b7b76bf47f.jpg',
'release_date': '20220519', 'ext': 'mp4',
'upload_date': '20220519', },
}, },
'params': {'skip_download': 'm3u8'}, ]
}]
_NETRC_MACHINE = 'vrtnu'
_authenticated = False
def _perform_login(self, username, password): _NETRC_MACHINE = 'vrtnu'
auth_info = self._gigya_login({
'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy',
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
})
if auth_info.get('errorDetails'):
raise ExtractorError(f'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected=True)
# Sometimes authentication fails for no good reason, retry
for retry in self.RetryManager():
if retry.attempt > 1:
self._sleep(1, None)
try:
self._request_webpage(
'https://token.vrt.be/vrtnuinitlogin', None, note='Requesting XSRF Token',
errnote='Could not get XSRF Token', query={
'provider': 'site',
'destination': 'https://www.vrt.be/vrtnu/',
})
self._request_webpage(
'https://login.vrt.be/perform_login', None,
note='Performing login', errnote='Login failed',
query={'client_id': 'vrtnu-site'}, data=urlencode_postdata({
'UID': auth_info['UID'],
'UIDSignature': auth_info['UIDSignature'],
'signatureTimestamp': auth_info['signatureTimestamp'],
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
}))
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
retry.error = e
continue
raise
self._authenticated = True _VIDEOPAGE_QUERY = 'query VideoPage($pageId: ID!) {\n page(id: $pageId) {\n ... on EpisodePage {\n id\n title\n seo {\n ...seoFragment\n __typename\n }\n ldjson\n episode {\n onTimeRaw\n ageRaw\n name\n episodeNumberRaw\n program {\n title\n __typename\n }\n watchAction {\n streamId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\nfragment seoFragment on SeoProperties {\n __typename\n title\n description\n}'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
parsed_url = urllib.parse.urlparse(url) parsed_url = urllib.parse.urlparse(url)
details = self._download_json(
f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json', self._request_webpage(
display_id, 'Downloading asset JSON', 'Unable to download asset JSON')['details'] 'https://www.vrt.be/vrtnu/sso/login',
None,
watch_info = traverse_obj(details, ( note='Getting tokens',
'actions', lambda _, v: v['type'] == 'watch-episode', {dict}), get_all=False) or {} errnote='Failed to get tokens',
video_id = join_nonempty( )
'episodePublicationId', 'episodeVideoId', delim='$', from_dict=watch_info)
if '$' not in video_id: metadata = self._download_json(
raise ExtractorError('Unable to extract video ID') 'https://www.vrt.be/vrtnu-api/graphql/v1',
display_id,
vrtnutoken = self._download_json( 'Downloading asset JSON',
'https://token.vrt.be/refreshtoken', video_id, note='Retrieving vrtnutoken', 'Unable to download asset JSON',
errnote='Token refresh failed')['vrtnutoken'] if self._authenticated else None headers={
'Content-Type': 'application/json',
video_info = self._call_api(video_id, 'vrtnu-web@PROD', vrtnutoken) 'Authorization': f'Bearer {self._get_cookies("https://www.vrt.be").get("vrtnu-site_profile_at").value}',
},
if 'title' not in video_info: data=json.dumps(
code = video_info.get('code') {
if code in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'): 'operationName': 'VideoPage',
self.raise_login_required(code, method='password') 'query': self._VIDEOPAGE_QUERY,
elif code in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'): 'variables': {
self.raise_geo_restricted(countries=['BE']) 'pageId': f'{parsed_url.path.rstrip("/")}.model.json'
elif code == 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS': },
if not self._authenticated: }
self.raise_login_required(code, method='password') ).encode(),
self.raise_geo_restricted(countries=['BE']) )['data']['page']
raise ExtractorError(code, expected=True)
video_id = metadata['episode']['watchAction']['streamId']
formats, subtitles = self._extract_formats_and_subtitles(video_info, video_id) try:
ld_json = json.loads(metadata['ldjson'][1])
except Exception:
ld_json = {}
streaming_info = self._call_api(video_id, client='vrtnu-web@PROD')
formats, subtitles = self._extract_formats_and_subtitles(
streaming_info, video_id
)
return { return {
**traverse_obj(details, { **traverse_obj(
'title': 'title', metadata,
'description': ('description', {clean_html}), {
'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}), 'title': ('seo', 'title', {str_or_none}),
'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}), 'season_number': (
'series': ('data', 'program', 'title'), 'episode',
'season': ('data', 'season', 'title', 'value'), 'onTimeRaw',
'season_number': ('data', 'season', 'title', 'raw', {int_or_none}), {lambda x: x[:4]},
'season_id': ('data', 'season', 'id', {str_or_none}), {int_or_none},
'episode': ('data', 'episode', 'number', 'value', {str_or_none}), ),
'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}), 'description': ('seo', 'description', {str_or_none}),
'episode_id': ('data', 'episode', 'id', {str_or_none}), 'timestamp': ('episode', 'onTimeRaw', {parse_iso8601}),
'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}), 'release_timestamp': ('episode', 'onTimeRaw', {parse_iso8601}),
}), 'release_date': ('episode', 'onTimeRaw', {unified_strdate}),
'upload_date': ('episode', 'onTimeRaw', {unified_strdate}),
'series': ('episode', 'program', 'title'),
'episode': ('episode', 'episodeNumberRaw', {str_or_none}),
'episode_number': ('episode', 'episodeNumberRaw', {int_or_none}),
'age_limit': ('episode', 'ageRaw', {parse_age_limit}),
'display_id': ('episode', 'name', {parse_age_limit}),
},
),
**traverse_obj(
ld_json,
{
'season': ('partOfSeason', 'name'),
'season_id': ('partOfSeason', '@id'),
'episode_id': ('@id', {str_or_none}),
},
),
'id': video_id, 'id': video_id,
'display_id': display_id,
'channel': 'VRT', 'channel': 'VRT',
'formats': formats, 'formats': formats,
'duration': float_or_none(video_info.get('duration'), 1000), 'duration': float_or_none(streaming_info.get('duration'), 1000),
'thumbnail': url_or_none(video_info.get('posterImageUrl')), 'thumbnail': url_or_none(streaming_info.get('posterImageUrl')),
'subtitles': subtitles, 'subtitles': subtitles,
'_old_archive_ids': [make_archive_id('Canvas', video_id)], '_old_archive_ids': [make_archive_id('Canvas', video_id)],
} }
@ -333,26 +363,30 @@ class VrtNUIE(VRTBaseIE):
class KetnetIE(VRTBaseIE): class KetnetIE(VRTBaseIE):
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{ _TESTS = [
'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5', {
'info_dict': { 'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5',
'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e', 'info_dict': {
'ext': 'mp4', 'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
'title': 'Meisjes', 'ext': 'mp4',
'episode': 'Reeks 6: Week 5', 'title': 'Meisjes',
'season': 'Reeks 6', 'episode': 'Reeks 6: Week 5',
'series': 'Meisjes', 'season': 'Reeks 6',
'timestamp': 1685251800, 'series': 'Meisjes',
'upload_date': '20230528', 'timestamp': 1685251800,
}, 'upload_date': '20230528',
'params': {'skip_download': 'm3u8'}, },
}] 'params': {'skip_download': 'm3u8'},
}
]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
video = self._download_json( video = self._download_json(
'https://senior-bff.ketnet.be/graphql', display_id, query={ 'https://senior-bff.ketnet.be/graphql',
display_id,
query={
'query': '''{ 'query': '''{
video(id: "content/ketnet/nl/%s.model.json") { video(id: "content/ketnet/nl/%s.model.json") {
description description
@ -365,8 +399,10 @@ class KetnetIE(VRTBaseIE):
subtitleVideodetail subtitleVideodetail
titleVideodetail titleVideodetail
} }
}''' % display_id, }'''
})['data']['video'] % display_id,
},
)['data']['video']
video_id = urllib.parse.unquote(video['mediaReference']) video_id = urllib.parse.unquote(video['mediaReference'])
data = self._call_api(video_id, 'ketnet@PROD', version='v1') data = self._call_api(video_id, 'ketnet@PROD', version='v1')
@ -377,39 +413,45 @@ class KetnetIE(VRTBaseIE):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'_old_archive_ids': [make_archive_id('Canvas', video_id)], '_old_archive_ids': [make_archive_id('Canvas', video_id)],
**traverse_obj(video, { **traverse_obj(
'title': ('titleVideodetail', {str}), video,
'description': ('description', {str}), {
'thumbnail': ('thumbnail', {url_or_none}), 'title': ('titleVideodetail', {str}),
'timestamp': ('publicationDate', {parse_iso8601}), 'description': ('description', {str}),
'series': ('programTitle', {str}), 'thumbnail': ('thumbnail', {url_or_none}),
'season': ('seasonTitle', {str}), 'timestamp': ('publicationDate', {parse_iso8601}),
'episode': ('subtitleVideodetail', {str}), 'series': ('programTitle', {str}),
'episode_number': ('episodeNr', {int_or_none}), 'season': ('seasonTitle', {str}),
}), 'episode': ('subtitleVideodetail', {str}),
'episode_number': ('episodeNr', {int_or_none}),
},
),
} }
class DagelijkseKostIE(VRTBaseIE): class DagelijkseKostIE(VRTBaseIE):
IE_DESC = 'dagelijksekost.een.be' IE_DESC = 'dagelijksekost.een.be'
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof', {
'info_dict': { 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa', 'info_dict': {
'ext': 'mp4', 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
'title': 'Hachis parmentier met witloof', 'ext': 'mp4',
'description': 'md5:9960478392d87f63567b5b117688cdc5', 'title': 'Hachis parmentier met witloof',
'display_id': 'hachis-parmentier-met-witloof', 'description': 'md5:9960478392d87f63567b5b117688cdc5',
}, 'display_id': 'hachis-parmentier-met-witloof',
'params': {'skip_download': 'm3u8'}, },
}] 'params': {'skip_download': 'm3u8'},
}
]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id'
)
data = self._call_api(video_id, 'dako@prod', version='v1') data = self._call_api(video_id, 'dako@prod', version='v1')
formats, subtitles = self._extract_formats_and_subtitles(data, video_id) formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
@ -419,11 +461,14 @@ class DagelijkseKostIE(VRTBaseIE):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'display_id': display_id, 'display_id': display_id,
'title': strip_or_none(get_element_by_class( 'title': strip_or_none(
'dish-metadata__title', webpage) or self._html_search_meta('twitter:title', webpage)), get_element_by_class('dish-metadata__title', webpage)
'description': clean_html(get_element_by_class( or self._html_search_meta('twitter:title', webpage)
'dish-description', webpage)) or self._html_search_meta( ),
['description', 'twitter:description', 'og:description'], webpage), 'description': clean_html(get_element_by_class('dish-description', webpage))
or self._html_search_meta(
['description', 'twitter:description', 'og:description'], webpage
),
'_old_archive_ids': [make_archive_id('Canvas', video_id)], '_old_archive_ids': [make_archive_id('Canvas', video_id)],
} }

Loading…
Cancel
Save