[extractor/camtasia] Separate into own extractor (#4307)

Authored by: coletdjnz
pull/1649/head^2
pukkandan 2 years ago
parent f2e8dbcc00
commit 5fff2e576f

@ -219,6 +219,7 @@ from .camdemy import (
CamdemyFolderIE CamdemyFolderIE
) )
from .cammodels import CamModelsIE from .cammodels import CamModelsIE
from .camtasia import CamtasiaEmbedIE
from .camwithher import CamWithHerIE from .camwithher import CamWithHerIE
from .canalalpha import CanalAlphaIE from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE

@ -0,0 +1,71 @@
import os
import urllib.parse
from .common import InfoExtractor
from ..utils import float_or_none
class CamtasiaEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
'playlist': [{
'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
'ext': 'flv',
'duration': 2235.90,
}
}, {
'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
'ext': 'flv',
'duration': 2235.93,
}
}],
'info_dict': {
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
},
'skip': 'webpage dead'
},
]
def _extract_from_webpage(self, url, webpage):
camtasia_cfg = self._search_regex(
r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
webpage, 'camtasia configuration file', default=None)
if camtasia_cfg is None:
return None
title = self._html_search_meta('DC.title', webpage, fatal=True)
camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, self._generic_id(url),
note='Downloading camtasia configuration',
errnote='Failed to download camtasia configuration')
fileset_node = camtasia_cfg.find('./playlist/array/fileset')
entries = []
for n in fileset_node.getchildren():
url_n = n.find('./uri')
if url_n is None:
continue
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
return {
'_type': 'playlist',
'entries': entries,
'title': title,
}

@ -933,30 +933,6 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# Camtasia studio
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
'playlist': [{
'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
'ext': 'flv',
'duration': 2235.90,
}
}, {
'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
'ext': 'flv',
'duration': 2235.93,
}
}],
'info_dict': {
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
}
},
# Flowplayer # Flowplayer
{ {
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html', 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
@ -2680,43 +2656,6 @@ class GenericIE(InfoExtractor):
'entries': entries, 'entries': entries,
} }
def _extract_camtasia(self, url, video_id, webpage):
""" Returns None if no camtasia video can be found. """
camtasia_cfg = self._search_regex(
r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
webpage, 'camtasia configuration file', default=None)
if camtasia_cfg is None:
return None
title = self._html_search_meta('DC.title', webpage, fatal=True)
camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
errnote='Failed to download camtasia configuration')
fileset_node = camtasia_cfg.find('./playlist/array/fileset')
entries = []
for n in fileset_node.getchildren():
url_n = n.find('./uri')
if url_n is None:
continue
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
return {
'_type': 'playlist',
'entries': entries,
'title': title,
}
def _kvs_getrealurl(self, video_url, license_code): def _kvs_getrealurl(self, video_url, license_code):
if not video_url.startswith('function/0/'): if not video_url.startswith('function/0/'):
return video_url # not obfuscated return video_url # not obfuscated
@ -2920,12 +2859,6 @@ class GenericIE(InfoExtractor):
except xml.etree.ElementTree.ParseError: except xml.etree.ElementTree.ParseError:
pass pass
# Is it a Camtasia project?
camtasia_res = self._extract_camtasia(url, video_id, webpage)
if camtasia_res is not None:
self.report_detected('Camtasia video')
return camtasia_res
info_dict.update({ info_dict.update({
# it's tempting to parse this further, but you would # it's tempting to parse this further, but you would
# have to take into account all the variations like # have to take into account all the variations like

Loading…
Cancel
Save