From f9c443ba2c40746f56ccce18a8a29fff267bab7c Mon Sep 17 00:00:00 2001 From: tuxcoder Date: Fri, 12 Apr 2024 20:10:25 +0200 Subject: [PATCH 1/5] [on.orf.at] add subtitle parsing use the linked subtitles in the v4.3 api --- yt_dlp/extractor/orf.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 526e9acaf..5d6eda8e3 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -604,6 +604,19 @@ class ORFONIE(InfoExtractor): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + for subtitle_type in ['vtt']: # not working formats 'xml', 'srt', 'sami', 'ttml', 'stl' + subtitle_url = traverse_obj(api_json, ('_embedded', 'subtitle', f'{subtitle_type}_url'), {str}) + if subtitle_url is None: + continue + self._merge_subtitles({ + 'de': [ + { + 'url': subtitle_url, + 'ext': f'{subtitle_type}', + } + ], + }, target=subtitles) + return { 'id': video_id, 'formats': formats, From 8c6550ae434798c6e0ddbb16539b409510ad95da Mon Sep 17 00:00:00 2001 From: tuxcoder Date: Fri, 12 Apr 2024 20:12:02 +0200 Subject: [PATCH 2/5] [on.orf.at] fix url parsing some urls are without slug also some video-Id's are shorter that 8 digits and could probably be bigger than 8 --- yt_dlp/extractor/orf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 5d6eda8e3..ba8d72b2b 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -569,7 +569,7 @@ class ORFFM4StoryIE(InfoExtractor): class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' - _VALID_URL = r'https?://on\.orf\.at/video/(?P\d{8})/(?P[\w-]+)' + _VALID_URL = r'https?://on\.orf\.at/video/(?P\d+)(/(?P[\w-]+))?' _TESTS = [{ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', 'info_dict': { @@ -631,6 +631,8 @@ class ORFONIE(InfoExtractor): def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'slug') + if display_id is None: + display_id = video_id webpage = self._download_webpage(url, display_id) return { From 501f7e068ec2e4f676ca37227e71426d67e27f5a Mon Sep 17 00:00:00 2001 From: tuxcoder Date: Fri, 12 Apr 2024 20:14:40 +0200 Subject: [PATCH 3/5] [on.orf.at] add better drm detection some videos formats are not reported as DRM protected fixes: #9652 --- yt_dlp/extractor/orf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index ba8d72b2b..9e024b5fb 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -590,6 +590,9 @@ class ORFONIE(InfoExtractor): api_json = self._download_json( f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id) + if traverse_obj(api_json, 'is_drm_protected'): + self.report_drm(video_id) + formats, subtitles = [], {} for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): From 90a4ecb25aeede0bbbbbcb7f98e70d9d284ca3d6 Mon Sep 17 00:00:00 2001 From: tuxcoder Date: Fri, 12 Apr 2024 21:21:45 +0200 Subject: [PATCH 4/5] [on.orf.at] parse age_limit --- yt_dlp/extractor/orf.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 9e024b5fb..591d88140 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -620,10 +620,20 @@ class ORFONIE(InfoExtractor): ], }, target=subtitles) + age_classification = traverse_obj(api_json, ('age_classification'), {str}) + age_limit = None + if isinstance(age_classification, str) and len(age_classification) != 0: + # age_classification is in the format `+` + # example: "6+" or "18+" + age_limit_str = age_classification[:-1] + if age_limit_str.isdigit(): + age_limit = int(age_limit_str) + return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, + 'age_limit': age_limit, **traverse_obj(api_json, { 'duration': ('duration_second', {float_or_none}), 'title': (('title', 'headline'), {str}), From f51d16b307b22b90337eb26ec884f807cab45e9f Mon Sep 17 00:00:00 2001 From: tuxcoder Date: Fri, 12 Apr 2024 21:25:37 +0200 Subject: [PATCH 5/5] [on.orf.at] add better tests this test file has no kill_date, so I hope it will be online for longer old one would be not available after "2024-08-12T21:05:00+02:00" --- yt_dlp/extractor/orf.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 591d88140..0a1b7ebb1 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -571,17 +571,18 @@ class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' _VALID_URL = r'https?://on\.orf\.at/video/(?P\d+)(/(?P[\w-]+))?' _TESTS = [{ - 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', + 'url': 'https://on.orf.at/video/3220355', + 'md5': 'f94d98e667cf9a3851317efb4e136662', 'info_dict': { - 'id': '14210000', + 'id': '3220355', 'ext': 'mp4', - 'duration': 2651.08, - 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg', - 'title': 'School of Champions (4/8)', - 'description': 'md5:d09ad279fc2e8502611e7648484b6afd', + 'duration': 445.04, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png', + 'title': '50 Jahre Burgenland: Der Festumzug', + 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0', 'media_type': 'episode', - 'timestamp': 1706472362, - 'upload_date': '20240128', + 'timestamp': 52916400, + 'upload_date': '19710905', } }]