From 910ef313e1c12f8c7fc8b84140ef8fc9dc98d8ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miha=20Frange=C5=BE?= Date: Wed, 10 Feb 2021 19:43:17 +0100 Subject: [PATCH] [RTV SLO 4D] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rtvslo.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/rtvslo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 84998316c..08cdd78d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1006,6 +1006,7 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE +from .rtvslo import RTVSLO4DIE from .ruhd import RUHDIE from .rumble import RumbleEmbedIE from .rutube import ( diff --git a/youtube_dl/extractor/rtvslo.py b/youtube_dl/extractor/rtvslo.py new file mode 100644 index 000000000..5b6a3dbbd --- /dev/null +++ b/youtube_dl/extractor/rtvslo.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_timestamp, + try_get, ExtractorError +) + + +class RTVSLO4DIE(InfoExtractor): + _VALID_URL = r'https?://(?:4d|www)\.rtvslo\.si/(?:arhiv/.+|embed|4d/arhiv)/(?P\d+)' + _TEST = { + 'url': 'https://4d.rtvslo.si/arhiv/seje-odbora-za-kmetijstvo-gozdarstvo-in-prehrano/174595438', + 'md5': 'b87e5a65be2365f83eb0d24d44131d0f', + 'info_dict': { + 'id': '174595438', + 'ext': 'mp4', + 'title': 'Krajčič o tatvini sendviča', + 'thumbnail': r're:https://img.rtvslo.si/.+\.jpg', + 'timestamp': 1549999614, + 'upload_date': '20190212', + 'duration': 85 + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + embed_url = 'https://4d.rtvslo.si/embed/' + video_id + embed_html = self._download_webpage(embed_url, video_id) + + client_id = self._search_regex(r'\[\'client\'\] = "(.+?)";', embed_html, 'clientId') + + info_url = 'https://api.rtvslo.si/ava/getRecordingDrm/' + video_id + '?client_id=' + client_id + video_info = self._download_json(info_url, video_id)['response'] + + if video_info["mediaType"] != "video": + raise ExtractorError("Downloading audio is not implemented for this source yet") + + jwt = video_info['jwt'] + + media_info_url = 'https://api.rtvslo.si/ava/getMedia/' + video_id + '?client_id=' + client_id + '&jwt=' + jwt + media_info = self._download_json(media_info_url, video_id)['response'] + + # TODO: Support for audio-only links (like radio shows) + # Instead of HLS, an mp3 URL is provided for those in ".mediaFiles[0].streams.https" + + formats = self._extract_m3u8_formats( + media_info['addaptiveMedia']['hls'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + + return { + 'id': video_id, + 'title': video_info['title'], + 'description': try_get(video_info, 'description'), + 'thumbnail': video_info.get('thumbnail_sec'), + 'timestamp': unified_timestamp(video_info['broadcastDate']), + 'duration': video_info.get('duration'), + 'formats': formats, + }