From 04417e89dee0c447eea5c8562ab5587c07192ca7 Mon Sep 17 00:00:00 2001 From: MinePlayersPE <20515340+MinePlayersPE@users.noreply.github.com> Date: Sat, 8 Oct 2022 15:45:27 +0700 Subject: [PATCH] [Roblox] Add extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/roblox.py | 129 ++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 yt_dlp/extractor/roblox.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1a355b2dc..dc5cb3a62 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1434,6 +1434,7 @@ from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE +from .roblox import RobloxIE from .rockstargames import RockstarGamesIE from .rokfin import ( RokfinIE, diff --git a/yt_dlp/extractor/roblox.py b/yt_dlp/extractor/roblox.py new file mode 100644 index 000000000..a411078d4 --- /dev/null +++ b/yt_dlp/extractor/roblox.py @@ -0,0 +1,129 @@ +from .common import InfoExtractor +from ..postprocessor.ffmpeg import FFmpegPostProcessor +from ..utils import ( + ExtractorError, + extract_attributes, + float_or_none, + get_element_by_id, + get_element_by_class, + get_element_html_by_class, + get_element_html_by_id, + int_or_none, + PostProcessingError, + strip_or_none, + str_or_none, + str_to_int, + traverse_obj, + try_call, + unified_timestamp, +) + + +class RobloxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?roblox\.com/library/(?P\d+)' + _TESTS = [{ + # UGC Audio + 'url': 'https://www.roblox.com/library/7910582982/Backrooms-Ambiance-High-Quality', + 'md5': '', + 'info_dict': { + 'id': '7910582982', + 'ext': 'ogg', + 'title': 'Backrooms Ambiance (High Quality)', + 'description': 'Found an actual higher quality of the sound.', + 'uploader': 'ChaseDJ549', + 'uploader_id': '412014916', + 'categories': ['Horror'], + 'like_count': int, + 'timestamp': 1636142127, + 'modified_timestamp': 1656694893 + }, + }] + + def _real_extract(self, url): + asset_id = self._match_id(url) + webpage = self._download_webpage(url, asset_id) + + item_container_div = get_element_html_by_id('item-container', webpage) + item_container_attrs = extract_attributes(item_container_div[:item_container_div.find('>')+1]) + asset_type = item_container_attrs.get('data-asset-type') + if asset_type and (asset_type not in ('Audio', 'Video')): + raise ExtractorError('This asset is not an audio/video', expected=True) + asset_uploader_id, asset_uploader_name = self._search_regex( + r'>By \d+)[^"\']*["\'][^>]*>@?(?P\w+)