diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb5a75f30..7ca363d6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1285,6 +1285,7 @@ from .ufctv import ( UFCTVIE, UFCArabiaIE, ) +from .ukdevilz import UKDevilzIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .dlive import ( diff --git a/youtube_dl/extractor/ukdevilz.py b/youtube_dl/extractor/ukdevilz.py new file mode 100644 index 000000000..9fd2b1ff7 --- /dev/null +++ b/youtube_dl/extractor/ukdevilz.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) +from ..compat import compat_urlparse + + +class UKDevilzIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ukdevilz\.com/watch/-(?P[\d]*_[\d]*)' + IE_DESC = 'UKDEVILZ' + _TEST = { + 'url': 'https://ukdevilz.com/watch/-160418850_456239050', + 'md5': 'fe608143263af08b0160932561ed1a8a', + 'info_dict': { + 'id': '160418850_456239050', + 'ext': 'mp4', + 'title': 'Twix hot beverage', + 'description': 'md5:7c941f7c8ae9c83d06a6cea1722ae859', + 'thumbnail': r're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex(r']+src=(["\'])(?P(?:https?:)?//ukdevilz\.com/player/[^"\']+)', webpage, 'iFrameUrl', group='iFrameUrl') + iframe = self._download_webpage(iframe_url, video_id) + + playlist_url = compat_urlparse.urljoin(url, self._search_regex(r'window.playlistUrl\s*=\s*["\'](?P[^"\']+)', iframe, 'playlistUrl')) + playlist = self._download_json(playlist_url, video_id) + + # playlist has no info about the width and file extension of the HLS stream + # the HLS stream seems to always be the highest quality of the other streams, so just use that + max_height = 0 + ext = '' + for source in playlist.get('sources'): + if source.get('label') and int(source.get('label')) > max_height: + max_height = int_or_none(source.get('label')) + ext = source.get('type') + + formats = [] + for source in playlist.get('sources'): + formats.append({ + 'url': source.get('file') if not source.get('file').startswith('/') else compat_urlparse.urljoin(url, source.get('file')), + 'ext': ext if source.get('type') == 'hls' else source.get('type'), + 'protocol': 'm3u8' if source.get('type') == 'hls' else compat_urlparse.urlparse(source.get('file')).scheme, + 'height': int_or_none(source.get('label') or max_height) + }) + + description = (self._search_regex(r'(?s)]+\bclass=["\']description["\'][^>]*>(.+?)', webpage, 'description', default='', fatal=False) + or self._og_search_description(webpage)) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': description, + 'formats': formats, + 'thumbnail': self._og_search_thumbnail(webpage), + 'tags': self._html_search_meta('keywords', webpage) + }