Update to ytdl-commit-8a158a9

[NHK] Use new API URL 6508688e88 Closes #2337, Closes #4063
2 years ago · 6d1b34896e
parent 7b2c3f47c6
commit 6d1b34896e
12 changed files with 295 additions and 154 deletions
--- a/README.md
+++ b/README.md
@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
 # NEW FEATURES
-* Based on **youtube-dl 2021.12.17 [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a)** ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
+* Based on **youtube-dl 2021.12.17 [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a936c8b002ef536e9e2b778ded02c09c0fa)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
 * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
--- a/test/test_download.py
+++ b/test/test_download.py
@ -102,9 +102,10 @@ def generator(test_case, tname):
        def print_skipping(reason):
            print('Skipping %s: %s' % (test_case['name'], reason))
            self.skipTest(reason)
        if not ie.working():
            print_skipping('IE marked as not _WORKING')
            return
        for tc in test_cases:
            info_dict = tc.get('info_dict', {})
@ -118,11 +119,10 @@ def generator(test_case, tname):
        if 'skip' in test_case:
            print_skipping(test_case['skip'])
-            return
+
        for other_ie in other_ies:
            if not other_ie.working():
                print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
                return
        params = get_params(test_case.get('params', {}))
        params['outtmpl'] = tname + '_' + params['outtmpl']
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
        self.DL = FakeYDL()
        self.ie = self.IE()
        self.DL.add_info_extractor(self.ie)
        if not self.IE.working():
            print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
            self.skipTest('IE marked as not _WORKING')
    def getInfoDict(self):
        info_dict = self.DL.extract_info(self.url, download=False)
@ -57,6 +60,21 @@ class BaseTestSubtitles(unittest.TestCase):
@is_download_test
 class TestYoutubeSubtitles(BaseTestSubtitles):
    # Available subtitles for QRS8MkLhQmM:
    # Language formats
    # ru       vtt, ttml, srv3, srv2, srv1, json3
    # fr       vtt, ttml, srv3, srv2, srv1, json3
    # en       vtt, ttml, srv3, srv2, srv1, json3
    # nl       vtt, ttml, srv3, srv2, srv1, json3
    # de       vtt, ttml, srv3, srv2, srv1, json3
    # ko       vtt, ttml, srv3, srv2, srv1, json3
    # it       vtt, ttml, srv3, srv2, srv1, json3
    # zh-Hant  vtt, ttml, srv3, srv2, srv1, json3
    # hi       vtt, ttml, srv3, srv2, srv1, json3
    # pt-BR    vtt, ttml, srv3, srv2, srv1, json3
    # es-MX    vtt, ttml, srv3, srv2, srv1, json3
    # ja       vtt, ttml, srv3, srv2, srv1, json3
    # pl       vtt, ttml, srv3, srv2, srv1, json3
    url = 'QRS8MkLhQmM'
    IE = YoutubeIE
@ -65,47 +83,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles.keys()), 13)
-        self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
+        self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
-        self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
+        self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
        for lang in ['fr', 'de']:
            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
-    def test_youtube_subtitles_ttml_format(self):
+    def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'ttml'
+        self.DL.params['subtitlesformat'] = fmt
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
+        self.assertEqual(md5(subtitles[lang]), md5_hash)
    def test_youtube_subtitles_ttml_format(self):
        self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
    def test_youtube_subtitles_vtt_format(self):
-        self.DL.params['writesubtitles'] = True
+        self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
        self.DL.params['subtitlesformat'] = 'vtt'
        subtitles = self.getSubtitles()
        self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
-    def test_youtube_automatic_captions(self):
+    def test_youtube_subtitles_json3_format(self):
-        self.url = '8YoUxe5ncPo'
+        self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
        self.DL.params['writeautomaticsub'] = True
        self.DL.params['subtitleslangs'] = ['it']
        subtitles = self.getSubtitles()
        self.assertTrue(subtitles['it'] is not None)
-    def test_youtube_no_automatic_captions(self):
+    def _test_automatic_captions(self, url, lang):
-        self.url = 'QRS8MkLhQmM'
+        self.url = url
        self.DL.params['writeautomaticsub'] = True
        self.DL.params['subtitleslangs'] = [lang]
        subtitles = self.getSubtitles()
-        self.assertTrue(not subtitles)
+        self.assertTrue(subtitles[lang] is not None)
    def test_youtube_automatic_captions(self):
        # Available automatic captions for 8YoUxe5ncPo:
        # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
        # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
        # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
        # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
        # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
        # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
        # mt, ms, mr, ug, ta, my, af, sw, is, am,
        #                                         *it*, iw, sv, ar,
        # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
        # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
        # ky, sd
        # ...
        self._test_automatic_captions('8YoUxe5ncPo', 'it')
    @unittest.skip('Video unavailable')
    def test_youtube_translated_subtitles(self):
-        # This video has a subtitles track, which can be translated
+        # This video has a subtitles track, which can be translated (#4555)
-        self.url = 'i0ZabxXmH4Y'
+        self._test_automatic_captions('Ky9eprVWzlI', 'it')
        self.DL.params['writeautomaticsub'] = True
        self.DL.params['subtitleslangs'] = ['it']
        subtitles = self.getSubtitles()
        self.assertTrue(subtitles['it'] is not None)
    def test_youtube_nosubtitles(self):
        self.DL.expect_warning('video doesn\'t have subtitles')
-        self.url = 'n5BB19UTcdA'
+        # Available automatic captions for 8YoUxe5ncPo:
        # ...
        # 8YoUxe5ncPo has no subtitles
        self.url = '8YoUxe5ncPo'
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
@ -137,6 +168,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestTedSubtitles(BaseTestSubtitles):
    url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
    IE = TedTalkIE
@ -162,12 +194,12 @@ class TestVimeoSubtitles(BaseTestSubtitles):
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
-        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+        self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
-        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+        self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
    def test_nosubtitles(self):
        self.DL.expect_warning('video doesn\'t have subtitles')
-        self.url = 'http://vimeo.com/56015672'
+        self.url = 'http://vimeo.com/68093876'
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
@ -175,6 +207,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestWallaSubtitles(BaseTestSubtitles):
    url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
    IE = WallaIE
@ -197,6 +230,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
    IE = CeskaTelevizeIE
@ -219,6 +253,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestLyndaSubtitles(BaseTestSubtitles):
    url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
    IE = LyndaIE
@ -232,6 +267,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestNPOSubtitles(BaseTestSubtitles):
    url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
    IE = NPOIE
@ -245,6 +281,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestMTVSubtitles(BaseTestSubtitles):
    url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
    IE = ComedyCentralIE
@ -269,8 +306,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(set(subtitles.keys()), {'no'})
+        self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
-        self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+        self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
@is_download_test
@ -295,6 +332,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken - DRM only')
 class TestVikiSubtitles(BaseTestSubtitles):
    url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
    IE = VikiIE
@ -323,6 +361,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
@is_download_test
@unittest.skip('IE broken')
 class TestThePlatformFeedSubtitles(BaseTestSubtitles):
    url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
    IE = ThePlatformFeedIE
@ -360,7 +399,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(set(subtitles.keys()), {'en'})
-        self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+        self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
    def test_subtitles_in_page(self):
        self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@ -368,7 +407,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(set(subtitles.keys()), {'en'})
-        self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+        self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
@is_download_test
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -401,10 +401,15 @@ def validate_options(opts):
    if opts.no_sponsorblock:
        opts.sponsorblock_mark = opts.sponsorblock_remove = set()
    default_downloader = None
    for proto, path in opts.external_downloader.items():
-        if get_external_downloader(path) is None:
+        ed = get_external_downloader(path)
        if ed is None:
            raise ValueError(
                f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"')
        elif ed and proto == 'default':
            default_downloader = ed.get_basename()
    warnings, deprecation_warnings = [], []
    # Common mistake: -f best
@ -415,13 +420,18 @@ def validate_options(opts):
            'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
    # --(postprocessor/downloader)-args without name
-    def report_args_compat(name, value, key1, key2=None):
+    def report_args_compat(name, value, key1, key2=None, where=None):
        if key1 in value and key2 not in value:
-            warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s')
+            warnings.append(f'{name.title()} arguments given without specifying name. '
                            f'The arguments will be given to {where or f"all {name}s"}')
            return True
        return False
-    report_args_compat('external downloader', opts.external_downloader_args, 'default')
+    if report_args_compat('external downloader', opts.external_downloader_args,
                          'default', where=default_downloader) and default_downloader:
        # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1
        opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default'))
    if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'):
        opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat')
        opts.postprocessor_args.setdefault('sponskrub', [])
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -752,6 +752,7 @@ from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
 from .konserthusetplay import KonserthusetPlayIE
 from .koo import KooIE
 from .kth import KTHIE
 from .krasview import KrasViewIE
 from .ku6 import Ku6IE
 from .kusi import KUSIIE
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -677,6 +677,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
            'vcodec': 'none'
        }]
        for a_format in formats:
            a_format.setdefault('http_headers', {}).update({
                'Referer': url,
            })
        song = self._call_api('song/info', au_id)
        title = song['title']
        statistic = song.get('statistic') or {}
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@ -382,5 +382,5 @@ class KalturaIE(InfoExtractor):
            'duration': info.get('duration'),
            'timestamp': info.get('createdAt'),
            'uploader_id': format_field(info, 'userId', ignore=('None', None)),
-            'view_count': info.get('plays'),
+            'view_count': int_or_none(info.get('plays')),
        }
--- a/yt_dlp/extractor/kth.py
+++ b/yt_dlp/extractor/kth.py
@ -0,0 +1,28 @@
 from .common import InfoExtractor
 from ..utils import smuggle_url
 class KTHIE(InfoExtractor):
    _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
    _TEST = {
        'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
        'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
        'info_dict': {
            'id': '0_uoop6oz9',
            'ext': 'mp4',
            'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
            'thumbnail': 're:https?://.+/thumbnail/.+',
            'duration': 3516,
            'timestamp': 1647345358,
            'upload_date': '20220315',
            'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        result = self.url_result(
            smuggle_url('kaltura:308:%s' % video_id, {
                'service_url': 'https://api.kaltura.nordu.net'}),
            'Kaltura')
        return result
--- a/yt_dlp/extractor/ndr.py
+++ b/yt_dlp/extractor/ndr.py
@ -1,11 +1,15 @@
 import re
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
-    parse_duration,
+    merge_dicts,
    parse_iso8601,
    qualities,
    try_get,
    unified_strdate,
    urljoin,
 )
@ -14,120 +18,139 @@ class NDRBaseIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        display_id = next(group for group in mobj.groups() if group)
        id = mobj.group('id')
        webpage = self._download_webpage(url, display_id)
-        return self._extract_embed(webpage, display_id, id)
+        return self._extract_embed(webpage, display_id, url)
 class NDRIE(NDRBaseIE):
    IE_NAME = 'ndr'
    IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
-    _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html'
+    _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
    _TESTS = [{
        # httpVideo, same content id
        'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
        'md5': '6515bc255dc5c5f8c85bbc38e035a659',
        'info_dict': {
            'id': 'hafengeburtstag988',
            'display_id': 'Party-Poette-und-Parade',
            'ext': 'mp4',
            'title': 'Party, Pötte und Parade',
            'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg',
            'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
-            'series': None,
+            'uploader': 'ndrtv',
-            'channel': 'NDR Fernsehen',
+            'timestamp': 1431255671,
-            'upload_date': '20150508',
+            'upload_date': '20150510',
            'duration': 3498,
        },
-    }, {
+        'params': {
-        'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html',
+            'skip_download': True,
        'only_matching': True
    }, {
        'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html',
        'info_dict': {
            'id': 'kommunalwahl1296',
            'ext': 'mp4',
            'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik',
            'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg',
            'description': 'md5:5c6e2ad744cef499135735a1036d7aa7',
            'series': 'Hallo Niedersachsen',
            'channel': 'NDR Fernsehen',
            'upload_date': '20210913',
            'duration': 438,
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
-        'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+        # httpVideo, different content id
        'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
        'md5': '1043ff203eab307f0c51702ec49e9a71',
        'info_dict': {
-            'id': 'sendung1091858',
+            'id': 'osna272',
            'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
            'ext': 'mp4',
-            'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+            'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
-            'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg',
+            'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
-            'description': 'md5:700f6de264010585012a72f97b0ac0c9',
+            'uploader': 'ndrtv',
-            'series': 'extra 3',
+            'timestamp': 1442059200,
-            'channel': 'NDR Fernsehen',
+            'upload_date': '20150912',
-            'upload_date': '20201111',
+            'duration': 510,
-            'duration': 1749,
+        },
-        }
+        'params': {
            'skip_download': True,
        },
        'skip': 'No longer available',
    }, {
        # httpAudio, same content id
        'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
        'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
        'info_dict': {
            'id': 'audio51535',
            'display_id': 'La-Valette-entgeht-der-Hinrichtung',
            'ext': 'mp3',
            'title': 'La Valette entgeht der Hinrichtung',
            'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg',
            'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
-            'upload_date': '20140729',
+            'uploader': 'ndrinfo',
-            'duration': 884.0,
+            'timestamp': 1631711863,
            'upload_date': '20210915',
            'duration': 884,
        },
-        'expected_warnings': ['unable to extract json url'],
+        'params': {
            'skip_download': True,
        },
    }, {
        # with subtitles
        'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
        'info_dict': {
            'id': 'extra18674',
            'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
            'ext': 'mp4',
            'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
            'description': 'md5:700f6de264010585012a72f97b0ac0c9',
            'uploader': 'ndrtv',
            'upload_date': '20201207',
            'timestamp': 1614349457,
            'duration': 1749,
            'subtitles': {
                'de': [{
                    'ext': 'ttml',
                    'url': r're:^https://www\.ndr\.de.+',
                }],
            },
        },
        'params': {
            'skip_download': True,
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
        'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
        'only_matching': True,
    }]
-    def _extract_embed(self, webpage, display_id, id):
+    def _extract_embed(self, webpage, display_id, url):
-        formats = []
+        embed_url = (
-        base_url = 'https://www.ndr.de'
+            self._html_search_meta(
-        json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage,
+                'embedURL', webpage, 'embed URL',
-                                      'json url', fatal=False)
+                default=None)
-        if json_url:
+            or self._search_regex(
-            data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json',
+                r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
-                                            id, fatal=False)
+                'embed URL', group='url', default=None)
-            info_json = data_json.get('_info', {})
+            or self._search_regex(
-            media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray'])
+                r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
-            for media in media_json:
+                'embed URL', group='url', default=''))
-                if media.get('_quality') == 'auto':
+        # some more work needed if we only found sophoraID
-                    formats.extend(self._extract_m3u8_formats(media['_stream'], id))
+        if re.match(r'^[a-z]+\d+$', embed_url):
-            subtitles = {}
+            # get the initial part of the url path,. eg /panorama/archiv/2022/
-            sub_url = data_json.get('_subtitleUrl')
+            parsed_url = compat_urllib_parse_urlparse(url)
-            if sub_url:
+            path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
-                subtitles.setdefault('de', []).append({
+            # find tell-tale image with the actual ID
-                    'url': base_url + sub_url,
+            ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
-                })
+            # or try to use special knowledge!
-            self._sort_formats(formats)
+            NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
-            return {
+            embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
-                'id': id,
+        if not embed_url:
-                'title': info_json.get('clipTitle'),
+            raise ExtractorError('Unable to extract embedUrl')
-                'thumbnail': base_url + data_json.get('_previewImage'),
+
-                'description': info_json.get('clipDescription'),
+        description = self._search_regex(
-                'series': info_json.get('seriesTitle') or None,
+            r'<p[^>]+itemprop="description">([^<]+)</p>',
-                'channel': info_json.get('channelTitle'),
+            webpage, 'description', default=None) or self._og_search_description(webpage)
-                'upload_date': unified_strdate(info_json.get('clipDate')),
+        timestamp = parse_iso8601(
-                'duration': data_json.get('_duration'),
+            self._search_regex(
-                'formats': formats,
+                (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
-                'subtitles': subtitles,
+                 r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
-            }
+                webpage, 'upload date', group='cont', default=None))
-        else:
+        info = self._search_json_ld(webpage, display_id, default={})
-            json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace(
+        return merge_dicts({
-                '_belongsToPodcast-', '')
+            '_type': 'url_transparent',
-            data_json = self._download_json(json_url, id, fatal=False)
+            'url': embed_url,
-            return {
+            'display_id': display_id,
-                'id': id,
+            'description': description,
-                'title': data_json.get('title'),
+            'timestamp': timestamp,
-                'thumbnail': base_url + data_json.get('poster'),
+        }, info)
                'description': data_json.get('summary'),
                'upload_date': unified_strdate(data_json.get('publicationDate')),
                'duration': parse_duration(data_json.get('duration')),
                'formats': [{
                    'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])),
                    'vcodec': 'none',
                    'ext': 'mp3',
                }],
            }
 class NJoyIE(NDRBaseIE):
@ -151,19 +174,19 @@ class NJoyIE(NDRBaseIE):
        'params': {
            'skip_download': True,
        },
        'skip': 'No longer available',
    }, {
        # httpVideo, different content id
        'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
        'md5': '417660fffa90e6df2fda19f1b40a64d8',
        'info_dict': {
-            'id': 'dockville882',
+            'id': 'livestream283',
            'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
-            'ext': 'mp4',
+            'ext': 'mp3',
-            'title': '"Ich hab noch nie" mit Felix Jaehn',
+            'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
-            'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+            'description': 'md5:681698f527b8601e511e7b79edde7d2c',
            'uploader': 'njoy',
-            'upload_date': '20150822',
+            'upload_date': '20210830',
            'duration': 211,
        },
        'params': {
            'skip_download': True,
@ -173,18 +196,25 @@ class NJoyIE(NDRBaseIE):
        'only_matching': True,
    }]
-    def _extract_embed(self, webpage, display_id, id):
+    def _extract_embed(self, webpage, display_id, url=None):
        # find tell-tale URL with the actual ID, or ...
        video_id = self._search_regex(
-            r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
+            (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
-        description = self._search_regex(
+             r'<iframe[^>]+id="pp_([\da-z]+)"', ),
-            r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+            webpage, 'NDR id', default=None)
-            webpage, 'description', fatal=False)
+
        description = (
            self._html_search_meta('description', webpage)
            or self._search_regex(
                r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
                webpage, 'description', fatal=False))
        return {
            '_type': 'url_transparent',
            'ie_key': 'NDREmbedBase',
            'url': 'ndr:%s' % video_id,
            'display_id': display_id,
            'description': description,
            'title': display_id.replace('-', ' ').strip(),
        }
@ -287,7 +317,7 @@ class NDREmbedBaseIE(InfoExtractor):
 class NDREmbedIE(NDREmbedBaseIE):
    IE_NAME = 'ndr:embed'
-    _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+    _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
    _TESTS = [{
        'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
        'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@ -300,6 +330,7 @@ class NDREmbedIE(NDREmbedBaseIE):
            'upload_date': '20150907',
            'duration': 132,
        },
        'skip': 'No longer available',
    }, {
        'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
        'md5': '002085c44bae38802d94ae5802a36e78',
@ -315,6 +346,7 @@ class NDREmbedIE(NDREmbedBaseIE):
        'params': {
            'skip_download': True,
        },
        'skip': 'No longer available',
    }, {
        'url': 'http://www.ndr.de/info/audio51535-player.html',
        'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
@ -324,7 +356,7 @@ class NDREmbedIE(NDREmbedBaseIE):
            'title': 'La Valette entgeht der Hinrichtung',
            'is_live': False,
            'uploader': 'ndrinfo',
-            'upload_date': '20140729',
+            'upload_date': '20210915',
            'duration': 884,
        },
        'params': {
@ -345,15 +377,17 @@ class NDREmbedIE(NDREmbedBaseIE):
        'params': {
            'skip_download': True,
        },
        'skip': 'No longer available',
    }, {
        # httpVideoLive
        'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
        'info_dict': {
            'id': 'livestream217',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'is_live': True,
-            'upload_date': '20150910',
+            'upload_date': '20210409',
            'uploader': 'ndrtv',
        },
        'params': {
            'skip_download': True,
@ -391,9 +425,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
            'ext': 'mp4',
            'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
            'is_live': False,
-            'upload_date': '20150807',
+            'upload_date': '20200826',
            'duration': 1011,
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
        # httpAudio
        'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
@ -410,6 +445,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
        'params': {
            'skip_download': True,
        },
        'skip': 'No longer available',
    }, {
        # httpAudioLive, no explicit ext
        'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
@ -419,7 +455,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
            'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'is_live': True,
            'uploader': 'njoy',
-            'upload_date': '20150810',
+            'upload_date': '20210830',
        },
        'params': {
            'skip_download': True,
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@ -11,7 +11,7 @@ from ..utils import (
 class NhkBaseIE(InfoExtractor):
-    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+    _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
    _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
    _TYPE_REGEX = r'/(?P<type>video|audio)/'
@ -27,7 +27,7 @@ class NhkBaseIE(InfoExtractor):
    def _extract_episode_info(self, url, episode=None):
        fetch_episode = episode is None
        lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
-        if episode_id.isdigit():
+        if len(episode_id) == 7:
            episode_id = episode_id[:4] + '-' + episode_id[4:]
        is_video = m_type == 'video'
@ -89,7 +89,8 @@ class NhkBaseIE(InfoExtractor):
 class NhkVodIE(NhkBaseIE):
-    _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+    # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
    _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
    # Content available only for a limited period of time. Visit
    # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
    _TESTS = [{
@ -129,6 +130,19 @@ class NhkVodIE(NhkBaseIE):
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
        'only_matching': True,
    }, {
        # video, alphabetic character in ID #29670
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
        'only_matching': True,
        'info_dict': {
            'id': 'qfjay6cg',
            'ext': 'mp4',
            'title': 'DESIGN TALKS plus - Fishermen’s Finery',
            'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
            'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
            'upload_date': '20210615',
            'timestamp': 1623722008,
        }
    }]
    def _real_extract(self, url):
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@ -21,7 +21,7 @@ from ..utils import (
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:.+?\.)?%s/
@ -32,7 +32,7 @@ class XHamsterIE(InfoExtractor):
                    ''' % _DOMAINS
    _TESTS = [{
        'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
-        'md5': '98b4687efb1ffd331c4197854dc09e8f',
+        'md5': '34e1ab926db5dc2750fed9e1f34304bb',
        'info_dict': {
            'id': '1509445',
            'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@ -41,6 +41,7 @@ class XHamsterIE(InfoExtractor):
            'timestamp': 1350194821,
            'upload_date': '20121014',
            'uploader': 'Ruseful2011',
            'uploader_id': 'ruseful2011',
            'duration': 893,
            'age_limit': 18,
        },
@ -70,6 +71,7 @@ class XHamsterIE(InfoExtractor):
            'timestamp': 1454948101,
            'upload_date': '20160208',
            'uploader': 'parejafree',
            'uploader_id': 'parejafree',
            'duration': 72,
            'age_limit': 18,
        },
@ -115,6 +117,9 @@ class XHamsterIE(InfoExtractor):
    }, {
        'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
        'only_matching': True,
    }, {
        'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -244,7 +249,6 @@ class XHamsterIE(InfoExtractor):
                categories = None
            uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
            return {
                'id': video_id,
                'display_id': display_id,
@ -263,7 +267,7 @@ class XHamsterIE(InfoExtractor):
                'dislike_count': int_or_none(try_get(
                    video, lambda x: x['rating']['dislikes'], int)),
                'comment_count': int_or_none(video.get('views')),
-                'age_limit': age_limit,
+                'age_limit': age_limit if age_limit is not None else 18,
                'categories': categories,
                'formats': formats,
            }
@ -423,6 +427,9 @@ class XHamsterUserIE(InfoExtractor):
            'id': 'firatkaan',
        },
        'playlist_mincount': 1,
    }, {
        'url': 'https://xhday.com/users/mobhunter',
        'only_matching': True,
    }]
    def _entries(self, user_id):
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@ -135,9 +135,10 @@ class YouPornIE(InfoExtractor):
            r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
            webpage, 'uploader', fatal=False)
        upload_date = unified_strdate(self._html_search_regex(
-            [r'UPLOADED:\s*<span>([^<]+)',
+            (r'UPLOADED:\s*<span>([^<]+)',
             r'Date\s+[Aa]dded:\s*<span>([^<]+)',
-             r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+             r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
             r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
            webpage, 'upload date', fatal=False))
        age_limit = self._rta_search(webpage)