From bab753c54740c846dab390f3378da2470e255598 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Mon, 22 Apr 2024 11:04:45 -0400 Subject: [PATCH 01/10] add fyptt extrator empty file --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/fyptt.py | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 yt_dlp/extractor/fyptt.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 42034275b..9ab41bde1 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -665,6 +665,7 @@ from .funimation import ( from .funk import FunkIE from .funker530 import Funker530IE from .fuyintv import FuyinTVIE +from .fyptt import FYPTTIE from .gab import ( GabTVIE, GabIE, diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py new file mode 100644 index 000000000..7db877aa8 --- /dev/null +++ b/yt_dlp/extractor/fyptt.py @@ -0,0 +1,41 @@ +from .common import InfoExtractor + + +class FYPTTIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + # For videos, only the 'id' and 'ext' fields are required to RUN the test: + 'id': '42', + 'ext': 'mp4', + # Then if the test run fails, it will output the missing/incorrect fields. + # Properties can be added as: + # * A value, e.g. + # 'title': 'Video title goes here', + # * MD5 checksum; start the string with 'md5:', e.g. + # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', + # * A regular expression; start the string with 're:', e.g. + # 'thumbnail': r're:^https?://.*\.jpg$', + # * A count of elements in a list; start the string with 'count:', e.g. + # 'tags': 'count:10', + # * Any Python type, e.g. + # 'view_count': int, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see yt_dlp/extractor/common.py) + } \ No newline at end of file From c4790a0de826d3595ea18b5bcdb33fa5615cd7a2 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Mon, 22 Apr 2024 11:43:07 -0400 Subject: [PATCH 02/10] add valid url regex --- yt_dlp/extractor/fyptt.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index 7db877aa8..c77d0c423 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -2,13 +2,13 @@ from .common import InfoExtractor class FYPTTIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _VALID_URL = r'https?://(?:stream\.|)fyptt\.to/(?P[0-9a-zA-Z]+)(?:\.|/)' _TESTS = [{ - 'url': 'https://yourextractor.com/watch/42', + 'url': 'https://fyptt.to/203/gorgeous-naughty-blonde-with-beautiful-curves-shows-her-naked-boobies-on-nsfw-tiktok/', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { # For videos, only the 'id' and 'ext' fields are required to RUN the test: - 'id': '42', + 'id': '203', 'ext': 'mp4', # Then if the test run fails, it will output the missing/incorrect fields. # Properties can be added as: @@ -22,12 +22,18 @@ class FYPTTIE(InfoExtractor): # 'tags': 'count:10', # * Any Python type, e.g. # 'view_count': int, - } + }, + }, { + 'url': 'https://fyptt.to/10382/beautiful-livestream-tits-and-nipples-slip-from-girls-who-loves-talking-with-their-viewers/', + 'only_matching': True, + }, { + 'url': 'https://fyptt.to/120/small-tits-fit-blonde-dancing-naked-at-the-front-door-on-tiktok', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + #video_id = self._match_id(url) + #webpage = self._download_webpage(url, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') From 73b672a979a9161a8ebbc52c9971c35ca1f5c1c3 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Mon, 29 Apr 2024 14:15:24 -0400 Subject: [PATCH 03/10] add regex for title --- yt_dlp/extractor/fyptt.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index c77d0c423..42fcf4e9e 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -1,8 +1,7 @@ from .common import InfoExtractor - class FYPTTIE(InfoExtractor): - _VALID_URL = r'https?://(?:stream\.|)fyptt\.to/(?P[0-9a-zA-Z]+)(?:\.|/)' + _VALID_URL = r'https?://(?:stream\.|)fyptt\.to/(?P[0-9a-zA-Z]+)(?:|/)' _TESTS = [{ 'url': 'https://fyptt.to/203/gorgeous-naughty-blonde-with-beautiful-curves-shows-her-naked-boobies-on-nsfw-tiktok/', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -32,16 +31,31 @@ class FYPTTIE(InfoExtractor): }] def _real_extract(self, url): - #video_id = self._match_id(url) - #webpage = self._download_webpage(url, video_id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + formats = [] + +# format_url = self._html_search_regex(r'(.+?)', webpage, 'title') + #format_url = self._html_search_regex(r'', webpage, 'video URL') + print("format_url") + format_url = self._html_search_regex(r'', webpage, 'format_url') + print(format_url) + formats.append({ + 'url': format_url, + 'format_id': 'default', + }) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + +# + - # TODO more code goes here, for example ... - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') return { 'id': video_id, 'title': title, 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see yt_dlp/extractor/common.py) + 'age_limit': 18, + 'formats': formats, } \ No newline at end of file From 16d3268cf9f030d3fc5e10d3c6d170bc144e9ad0 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Tue, 30 Apr 2024 08:17:27 -0400 Subject: [PATCH 04/10] add real format url to FYPTT --- yt_dlp/extractor/fyptt.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index 42fcf4e9e..fdbbc33a0 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -1,4 +1,7 @@ from .common import InfoExtractor +from ..utils import escapeHTML + +import re class FYPTTIE(InfoExtractor): _VALID_URL = r'https?://(?:stream\.|)fyptt\.to/(?P[0-9a-zA-Z]+)(?:|/)' @@ -35,11 +38,22 @@ class FYPTTIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = [] + # format_url = self._html_search_regex(r'(.+?)', webpage, 'title') - #format_url = self._html_search_regex(r'', webpage, 'video URL') + format_url = self._html_search_regex(r'"embedURL":"([^"]+)"', webpage, 'video URL') + + # Remove invalid characters using regex + format_url = re.sub(r'\\', '', format_url) print("format_url") - format_url = self._html_search_regex(r'', webpage, 'format_url') print(format_url) + webpage_video = self._download_webpage(format_url, video_id) + + + # format_url = self._html_search_regex(r'(.+?)', webpage, 'title') -# - - return { 'id': video_id, From 3997c6d84b047767f3abd1fd2bde0cba43b2dae4 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Fri, 3 May 2024 07:35:03 -0400 Subject: [PATCH 05/10] clean code --- yt_dlp/extractor/fyptt.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index fdbbc33a0..424f1d630 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -32,28 +32,25 @@ class FYPTTIE(InfoExtractor): 'url': 'https://fyptt.to/120/small-tits-fit-blonde-dancing-naked-at-the-front-door-on-tiktok', 'only_matching': True, }] - + + def _download_webpage_handle(self, *args, **kwargs): + headers = kwargs.get('headers', {}).copy() + headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' + kwargs['headers'] = headers + return super(FYPTTIE, self)._download_webpage_handle( + *args, **kwargs) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) formats = [] - - -# format_url = self._html_search_regex(r'(.+?)', webpage, 'title') format_url = self._html_search_regex(r'"embedURL":"([^"]+)"', webpage, 'video URL') - # Remove invalid characters using regex format_url = re.sub(r'\\', '', format_url) - print("format_url") - print(format_url) webpage_video = self._download_webpage(format_url, video_id) - - # format_url = self._html_search_regex(r'(.+?)', webpage, 'title') - return { 'id': video_id, 'title': title, From f2c122a8b6f77e7f4b626828d2c6e6dd0d22a488 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Fri, 3 May 2024 08:01:49 -0400 Subject: [PATCH 06/10] add http headers --- yt_dlp/extractor/fyptt.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index 424f1d630..d79f1cffc 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -33,12 +33,12 @@ class FYPTTIE(InfoExtractor): 'only_matching': True, }] - def _download_webpage_handle(self, *args, **kwargs): - headers = kwargs.get('headers', {}).copy() - headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' - kwargs['headers'] = headers - return super(FYPTTIE, self)._download_webpage_handle( - *args, **kwargs) +# def _download_webpage_handle(self, *args, **kwargs): +# headers = kwargs.get('headers', {}).copy() +# headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' +# kwargs['headers'] = headers +# return super(FYPTTIE, self)._download_webpage_handle( +# *args, **kwargs) def _real_extract(self, url): video_id = self._match_id(url) @@ -57,11 +57,13 @@ class FYPTTIE(InfoExtractor): }) title = self._html_search_regex(r'(.+?)', webpage, 'title') + + http_headers = {'Referer':'https://fyptt.to/'} return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), 'age_limit': 18, 'formats': formats, + 'http_headers': http_headers } \ No newline at end of file From 51a3d602ce43160fa9f1f916634c1694d98d8dd2 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Fri, 3 May 2024 09:24:54 -0400 Subject: [PATCH 07/10] fix tests and clean code --- yt_dlp/extractor/fyptt.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index d79f1cffc..7bea8af4d 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -7,23 +7,12 @@ class FYPTTIE(InfoExtractor): _VALID_URL = r'https?://(?:stream\.|)fyptt\.to/(?P[0-9a-zA-Z]+)(?:|/)' _TESTS = [{ 'url': 'https://fyptt.to/203/gorgeous-naughty-blonde-with-beautiful-curves-shows-her-naked-boobies-on-nsfw-tiktok/', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'md5': 'fc12bce4a9c1335f153500c8fea6e1a8', 'info_dict': { - # For videos, only the 'id' and 'ext' fields are required to RUN the test: 'id': '203', 'ext': 'mp4', - # Then if the test run fails, it will output the missing/incorrect fields. - # Properties can be added as: - # * A value, e.g. - # 'title': 'Video title goes here', - # * MD5 checksum; start the string with 'md5:', e.g. - # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', - # * A regular expression; start the string with 're:', e.g. - # 'thumbnail': r're:^https?://.*\.jpg$', - # * A count of elements in a list; start the string with 'count:', e.g. - # 'tags': 'count:10', - # * Any Python type, e.g. - # 'view_count': int, + 'title': 'Gorgeous, naughty blonde with beautiful curves shows her naked boobies on NSFW TikTok', + 'age_limit': 18 }, }, { 'url': 'https://fyptt.to/10382/beautiful-livestream-tits-and-nipples-slip-from-girls-who-loves-talking-with-their-viewers/', @@ -32,14 +21,7 @@ class FYPTTIE(InfoExtractor): 'url': 'https://fyptt.to/120/small-tits-fit-blonde-dancing-naked-at-the-front-door-on-tiktok', 'only_matching': True, }] - -# def _download_webpage_handle(self, *args, **kwargs): -# headers = kwargs.get('headers', {}).copy() -# headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' -# kwargs['headers'] = headers -# return super(FYPTTIE, self)._download_webpage_handle( -# *args, **kwargs) - + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -57,13 +39,13 @@ class FYPTTIE(InfoExtractor): }) title = self._html_search_regex(r'(.+?)', webpage, 'title') - + http_headers = {'Referer':'https://fyptt.to/'} return { 'id': video_id, 'title': title, - 'age_limit': 18, 'formats': formats, + 'age_limit': 18, 'http_headers': http_headers - } \ No newline at end of file + } From 25fff48148490fab7d80a7fc4240aa463c046d77 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Fri, 3 May 2024 09:27:08 -0400 Subject: [PATCH 08/10] remove unused import --- yt_dlp/extractor/fyptt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index 7bea8af4d..cc6728c3f 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..utils import escapeHTML import re From 68163f7530bfec591a291cf08cd3699bc1286250 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Fri, 3 May 2024 10:11:08 -0400 Subject: [PATCH 09/10] fix regex --- yt_dlp/extractor/fyptt.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index cc6728c3f..c057a4eff 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -24,13 +24,14 @@ class FYPTTIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + formats = [] format_url = self._html_search_regex(r'"embedURL":"([^"]+)"', webpage, 'video URL') - format_url = re.sub(r'\\', '', format_url) + webpage_video = self._download_webpage(format_url, video_id) - match = re.search(r' Date: Fri, 3 May 2024 10:21:47 -0400 Subject: [PATCH 10/10] clean code --- yt_dlp/extractor/fyptt.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fyptt.py b/yt_dlp/extractor/fyptt.py index c057a4eff..d54fa276e 100644 --- a/yt_dlp/extractor/fyptt.py +++ b/yt_dlp/extractor/fyptt.py @@ -2,6 +2,7 @@ from .common import InfoExtractor import re + class FYPTTIE(InfoExtractor): _VALID_URL = r'https?://(?:stream\.|)fyptt\.to/(?P[0-9a-zA-Z]+)(?:|/)' _TESTS = [{ @@ -28,7 +29,7 @@ class FYPTTIE(InfoExtractor): formats = [] format_url = self._html_search_regex(r'"embedURL":"([^"]+)"', webpage, 'video URL') format_url = re.sub(r'\\', '', format_url) - + webpage_video = self._download_webpage(format_url, video_id) match = re.search(r'(https:\/\/[^"]+\.mp4)', webpage_video) @@ -40,7 +41,7 @@ class FYPTTIE(InfoExtractor): title = self._html_search_regex(r'(.+?)', webpage, 'title') - http_headers = {'Referer':'https://fyptt.to/'} + http_headers = {'Referer': 'https://fyptt.to/'} return { 'id': video_id,