From 3ae878605dd28461896e62f56e20bc50336c45bd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 4 Dec 2019 17:20:53 +0100 Subject: [PATCH 01/70] [ufctv] fix extraction and add support for UFC Arabia(closes #23312) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/imggaming.py | 109 +++++++++++++++++++++++++++++ youtube_dl/extractor/ufctv.py | 73 +++---------------- 3 files changed, 121 insertions(+), 66 deletions(-) create mode 100644 youtube_dl/extractor/imggaming.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 465d9d364..74bf58f38 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1234,7 +1234,10 @@ from .udemy import ( UdemyCourseIE ) from .udn import UDNEmbedIE -from .ufctv import UFCTVIE +from .ufctv import ( + UFCTVIE, + UFCArabiaIE, +) from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .dlive import ( diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py new file mode 100644 index 000000000..96fddeac0 --- /dev/null +++ b/youtube_dl/extractor/imggaming.py @@ -0,0 +1,109 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + try_get, +) + + +class ImgGamingBaseIE(InfoExtractor): + _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/' + _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf' + _HEADERS = None + _LOGIN_REQUIRED = True + _LOGIN_SUFFIX = '' + _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} + _REALM = None + _TOKEN = None + _VALID_URL_TEMPL = r'https?://%s/(?Plive|video)/(?P\d+)' + + def _real_initialize(self): + if not self._LOGIN_REQUIRED: + return + + self._HEADERS = { + 'Realm': 'dce.' + self._REALM, + 'x-api-key': self._API_KEY, + } + + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + p_headers = self._HEADERS.copy() + p_headers['Content-Type'] = 'application/json' + self._HEADERS['Authorization'] = 'Bearer ' + self._download_json( + self._API_BASE + 'login' + self._LOGIN_SUFFIX, + None, 'Logging in', data=json.dumps({ + 'id': email, + 'secret': password, + }).encode(), headers=p_headers)['authorisationToken'] + + def _extract_media_id(self, url, display_id): + return display_id + + def _extract_dve_api_url(self, media_id, media_type): + url = self._API_BASE + 'stream' + if media_type == 'video': + url += '/vod/' + media_id + else: + url += '?eventId=' + media_id + try: + return self._download_json( + url, media_id, headers=self._HEADERS)['playerUrlCallback'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + raise ExtractorError( + self._parse_json(e.cause.read().decode(), media_id)['messages'][0], + expected=True) + raise + + def _real_extract(self, url): + media_type, display_id = re.match(self._VALID_URL, url).groups() + media_id = self._extract_media_id(url, display_id) + dve_api_url = self._extract_dve_api_url(media_id, media_type) + video_data = self._download_json(dve_api_url, media_id) + is_live = media_type == 'live' + if is_live: + title = self._live_title(self._download_json( + self._API_BASE + 'event/' + media_id, + media_id, headers=self._HEADERS)['title']) + else: + title = video_data['name'] + + formats = [] + for proto in ('hls', 'dash'): + media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url']) + if not media_url: + continue + if proto == 'hls': + m3u8_formats = self._extract_m3u8_formats( + media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native', + m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS) + for f in m3u8_formats: + f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS) + formats.append(f) + else: + formats.extend(self._extract_mpd_formats( + media_url, media_id, mpd_id='dash', fatal=False, + headers=self._MANIFEST_HEADERS)) + self._sort_formats(formats) + + return { + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'thumbnail': video_data.get('thumbnailUrl'), + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'tags': video_data.get('tags'), + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index f3eaee6b3..160b0f104 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -1,73 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - parse_iso8601, - urlencode_postdata, -) +from .imggaming import ImgGamingBaseIE -class UFCTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P[^/]+)' +class UFCTVIE(ImgGamingBaseIE): + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv' _NETRC_MACHINE = 'ufctv' - _TEST = { - 'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode', - 'info_dict': { - 'id': '34167', - 'ext': 'mp4', - 'title': 'UFC 219 Countdown: Full Episode', - 'description': 'md5:26d4e8bf4665ae5878842d7050c3c646', - 'timestamp': 1513962360, - 'upload_date': '20171222', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - } + _REALM = 'ufc' - def _real_initialize(self): - username, password = self._get_login_info() - if username is None: - return - code = self._download_json( - 'https://www.ufc.tv/secure/authenticate', - None, 'Logging in', data=urlencode_postdata({ - 'username': username, - 'password': password, - 'format': 'json', - })).get('code') - if code and code != 'loginsuccess': - raise ExtractorError(code, expected=True) - - def _real_extract(self, url): - display_id = self._match_id(url) - video_data = self._download_json(url, display_id, query={ - 'format': 'json', - }) - video_id = str(video_data['id']) - title = video_data['name'] - m3u8_url = self._download_json( - 'https://www.ufc.tv/service/publishpoint', video_id, query={ - 'type': 'video', - 'format': 'json', - 'id': video_id, - }, headers={ - 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1', - })['path'] - m3u8_url = m3u8_url.replace('_iphone.', '.') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description'), - 'duration': parse_duration(video_data.get('runtime')), - 'timestamp': parse_iso8601(video_data.get('releaseDate')), - 'formats': formats, - } +class UFCArabiaIE(ImgGamingBaseIE): + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com' + _NETRC_MACHINE = 'ufcarabia' + _REALM = 'admufc' From 7d53fa475a97baf2a676d935847b3dc2af562a7c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 4 Dec 2019 20:56:23 +0100 Subject: [PATCH 02/70] [imggaming] add support for playlists and extract subtitles --- youtube_dl/extractor/imggaming.py | 56 +++++++++++++++++++++++++------ youtube_dl/extractor/ufctv.py | 2 ++ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py index 96fddeac0..8bb5de463 100644 --- a/youtube_dl/extractor/imggaming.py +++ b/youtube_dl/extractor/imggaming.py @@ -9,6 +9,7 @@ from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, + str_or_none, try_get, ) @@ -16,13 +17,14 @@ from ..utils import ( class ImgGamingBaseIE(InfoExtractor): _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/' _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf' + _DOMAIN = None _HEADERS = None _LOGIN_REQUIRED = True _LOGIN_SUFFIX = '' _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} _REALM = None _TOKEN = None - _VALID_URL_TEMPL = r'https?://%s/(?Plive|video)/(?P\d+)' + _VALID_URL_TEMPL = r'https?://%s/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' def _real_initialize(self): if not self._LOGIN_REQUIRED: @@ -46,18 +48,22 @@ class ImgGamingBaseIE(InfoExtractor): 'secret': password, }).encode(), headers=p_headers)['authorisationToken'] + def _call_api(self, path, media_id): + return self._download_json( + self._API_BASE + path + media_id, media_id, headers=self._HEADERS) + def _extract_media_id(self, url, display_id): return display_id def _extract_dve_api_url(self, media_id, media_type): - url = self._API_BASE + 'stream' + stream_path = 'stream' if media_type == 'video': - url += '/vod/' + media_id + stream_path += '/vod/' else: - url += '?eventId=' + media_id + stream_path += '?eventId=' try: - return self._download_json( - url, media_id, headers=self._HEADERS)['playerUrlCallback'] + return self._call_api( + stream_path, media_id)['playerUrlCallback'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: raise ExtractorError( @@ -66,15 +72,35 @@ class ImgGamingBaseIE(InfoExtractor): raise def _real_extract(self, url): - media_type, display_id = re.match(self._VALID_URL, url).groups() + media_type, display_id, playlist_id = re.match(self._VALID_URL, url).groups() media_id = self._extract_media_id(url, display_id) + + if playlist_id: + if self._downloader.params.get('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % media_id) + else: + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + media_type, media_id = 'playlist', playlist_id + + if media_type == 'playlist': + playlist = self._call_api('vod/playlist/', media_id) + entries = [] + for video in try_get(playlist, lambda x: x['videos']['vods']) or []: + video_id = str_or_none(video.get('id')) + if not video_id: + continue + entries.append(self.url_result( + 'https://%s/video/%s' % (self._DOMAIN, video_id), + self.ie_key(), video_id)) + return self.playlist_result( + entries, media_id, playlist.get('title'), + playlist.get('description')) + dve_api_url = self._extract_dve_api_url(media_id, media_type) video_data = self._download_json(dve_api_url, media_id) is_live = media_type == 'live' if is_live: - title = self._live_title(self._download_json( - self._API_BASE + 'event/' + media_id, - media_id, headers=self._HEADERS)['title']) + title = self._live_title(self._call_api('event/', media_id)['title']) else: title = video_data['name'] @@ -96,6 +122,15 @@ class ImgGamingBaseIE(InfoExtractor): headers=self._MANIFEST_HEADERS)) self._sort_formats(formats) + subtitles = {} + for subtitle in video_data.get('subtitles', []): + subtitle_url = subtitle.get('url') + if not subtitle_url: + continue + subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({ + 'url': subtitle_url, + }) + return { 'id': media_id, 'display_id': display_id, @@ -106,4 +141,5 @@ class ImgGamingBaseIE(InfoExtractor): 'duration': int_or_none(video_data.get('duration')), 'tags': video_data.get('tags'), 'is_live': is_live, + 'subtitles': subtitles, } diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index 160b0f104..d07fa1280 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -7,10 +7,12 @@ from .imggaming import ImgGamingBaseIE class UFCTVIE(ImgGamingBaseIE): _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv' _NETRC_MACHINE = 'ufctv' + _DOMAIN = 'ufc.tv' _REALM = 'ufc' class UFCArabiaIE(ImgGamingBaseIE): _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com' _NETRC_MACHINE = 'ufcarabia' + _DOMAIN = 'app.ufcarabia.com' _REALM = 'admufc' From 4067a2327069c24915945fb5f5182e7fa987a57e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 6 Dec 2019 11:04:12 +0100 Subject: [PATCH 03/70] [ufctv] add support for more domains and remove compatibility code(closes #23332) --- youtube_dl/extractor/imggaming.py | 20 ++++---------------- youtube_dl/extractor/ufctv.py | 6 ++---- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py index 8bb5de463..10d26adab 100644 --- a/youtube_dl/extractor/imggaming.py +++ b/youtube_dl/extractor/imggaming.py @@ -17,19 +17,12 @@ from ..utils import ( class ImgGamingBaseIE(InfoExtractor): _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/' _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf' - _DOMAIN = None _HEADERS = None - _LOGIN_REQUIRED = True - _LOGIN_SUFFIX = '' _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} _REALM = None - _TOKEN = None - _VALID_URL_TEMPL = r'https?://%s/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' + _VALID_URL_TEMPL = r'https?://(?P(?:(?:app|www)\.)?%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' def _real_initialize(self): - if not self._LOGIN_REQUIRED: - return - self._HEADERS = { 'Realm': 'dce.' + self._REALM, 'x-api-key': self._API_KEY, @@ -42,7 +35,7 @@ class ImgGamingBaseIE(InfoExtractor): p_headers = self._HEADERS.copy() p_headers['Content-Type'] = 'application/json' self._HEADERS['Authorization'] = 'Bearer ' + self._download_json( - self._API_BASE + 'login' + self._LOGIN_SUFFIX, + self._API_BASE + 'login', None, 'Logging in', data=json.dumps({ 'id': email, 'secret': password, @@ -52,9 +45,6 @@ class ImgGamingBaseIE(InfoExtractor): return self._download_json( self._API_BASE + path + media_id, media_id, headers=self._HEADERS) - def _extract_media_id(self, url, display_id): - return display_id - def _extract_dve_api_url(self, media_id, media_type): stream_path = 'stream' if media_type == 'video': @@ -72,8 +62,7 @@ class ImgGamingBaseIE(InfoExtractor): raise def _real_extract(self, url): - media_type, display_id, playlist_id = re.match(self._VALID_URL, url).groups() - media_id = self._extract_media_id(url, display_id) + domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups() if playlist_id: if self._downloader.params.get('noplaylist'): @@ -90,7 +79,7 @@ class ImgGamingBaseIE(InfoExtractor): if not video_id: continue entries.append(self.url_result( - 'https://%s/video/%s' % (self._DOMAIN, video_id), + 'https://%s/video/%s' % (domain, video_id), self.ie_key(), video_id)) return self.playlist_result( entries, media_id, playlist.get('title'), @@ -133,7 +122,6 @@ class ImgGamingBaseIE(InfoExtractor): return { 'id': media_id, - 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': video_data.get('thumbnailUrl'), diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index d07fa1280..665eb1cb7 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -5,14 +5,12 @@ from .imggaming import ImgGamingBaseIE class UFCTVIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:ufc\.tv|(?:ufc)?fightpass\.com)' _NETRC_MACHINE = 'ufctv' - _DOMAIN = 'ufc.tv' _REALM = 'ufc' class UFCArabiaIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'ufcarabia\.(?:ae|com)' _NETRC_MACHINE = 'ufcarabia' - _DOMAIN = 'app.ufcarabia.com' _REALM = 'admufc' From 1d31b7ca048d0adf86946b8ace05e25d3216471e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 6 Dec 2019 15:34:35 +0100 Subject: [PATCH 04/70] [twitch] extract m3u8 formats frame rate(closes #23333) --- youtube_dl/extractor/twitch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8c0d70010..1f3df3112 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -327,6 +327,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'allow_audio_only': 'true', 'allow_spectre': 'true', 'player': 'twitchweb', + 'playlist_include_framerate': 'true', 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), @@ -598,6 +599,7 @@ class TwitchStreamIE(TwitchBaseIE): 'allow_spectre': 'true', 'p': random.randint(1000000, 10000000), 'player': 'twitchweb', + 'playlist_include_framerate': 'true', 'segment_preference': '4', 'sig': access_token['sig'].encode('utf-8'), 'token': access_token['token'].encode('utf-8'), From 6633103f8e607b23530a2c5846aeb4c9c73f4031 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 7 Dec 2019 19:23:19 +0100 Subject: [PATCH 05/70] [ufctv] add support for ufcfightpass.imgdge.com and ufcfightpass.imggaming.com domains(closes #23343) --- youtube_dl/extractor/imggaming.py | 2 +- youtube_dl/extractor/ufctv.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py index 10d26adab..e11f92053 100644 --- a/youtube_dl/extractor/imggaming.py +++ b/youtube_dl/extractor/imggaming.py @@ -20,7 +20,7 @@ class ImgGamingBaseIE(InfoExtractor): _HEADERS = None _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} _REALM = None - _VALID_URL_TEMPL = r'https?://(?P(?:(?:app|www)\.)?%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' + _VALID_URL_TEMPL = r'https?://(?P%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' def _real_initialize(self): self._HEADERS = { diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index 665eb1cb7..3d74ba071 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -5,12 +5,12 @@ from .imggaming import ImgGamingBaseIE class UFCTVIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:ufc\.tv|(?:ufc)?fightpass\.com)' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com' _NETRC_MACHINE = 'ufctv' _REALM = 'ufc' class UFCArabiaIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'ufcarabia\.(?:ae|com)' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)' _NETRC_MACHINE = 'ufcarabia' _REALM = 'admufc' From ce709fcb00a5a35d72e43c588120e40d38b3020d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 7 Dec 2019 20:17:30 +0100 Subject: [PATCH 06/70] [musicplayon] remove extractor(closes #9225) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/musicplayon.py | 66 ----------------------------- 2 files changed, 67 deletions(-) delete mode 100644 youtube_dl/extractor/musicplayon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74bf58f38..4e6f2c442 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -657,7 +657,6 @@ from .mtv import ( MTVJapanIE, ) from .muenchentv import MuenchenTVIE -from .musicplayon import MusicPlayOnIE from .mwave import MwaveIE, MwaveMeetGreetIE from .mychannels import MyChannelsIE from .myspace import MySpaceIE, MySpaceAlbumIE diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py deleted file mode 100644 index 1854d59a5..000000000 --- a/youtube_dl/extractor/musicplayon.py +++ /dev/null @@ -1,66 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - js_to_json, - mimetype2ext, -) - - -class MusicPlayOnIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P\d+)' - - _TESTS = [{ - 'url': 'http://en.musicplayon.com/play?v=433377', - 'md5': '00cdcdea1726abdf500d1e7fd6dd59bb', - 'info_dict': { - 'id': '433377', - 'ext': 'mp4', - 'title': 'Rick Ross - Interview On Chelsea Lately (2014)', - 'description': 'Rick Ross Interview On Chelsea Lately', - 'duration': 342, - 'uploader': 'ultrafish', - }, - }, { - 'url': 'http://en.musicplayon.com/play?pl=102&play=442629', - 'only_matching': True, - }] - - _URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - url = self._URL_TEMPLATE % video_id - - page = self._download_webpage(url, video_id) - - title = self._og_search_title(page) - description = self._og_search_description(page) - thumbnail = self._og_search_thumbnail(page) - duration = self._html_search_meta('video:duration', page, 'duration', fatal=False) - view_count = self._og_search_property('count', page, fatal=False) - uploader = self._html_search_regex( - r'', page, 'uploader', fatal=False) - - sources = self._parse_json( - self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'), - video_id, transform_source=js_to_json) - formats = [{ - 'url': compat_urlparse.urljoin(url, source['src']), - 'ext': mimetype2ext(source.get('type')), - 'format_note': source.get('data-res'), - } for source in sources] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'duration': int_or_none(duration), - 'view_count': int_or_none(view_count), - 'formats': formats, - } From 9d4424afaafe96161af59a8a59a0f922bd666fee Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 8 Dec 2019 11:54:16 +0100 Subject: [PATCH 07/70] [videopremium] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/videopremium.py | 46 ---------------------------- 2 files changed, 47 deletions(-) delete mode 100644 youtube_dl/extractor/videopremium.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4e6f2c442..7f4044b4a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1290,7 +1290,6 @@ from .videomore import ( VideomoreVideoIE, VideomoreSeasonIE, ) -from .videopremium import VideoPremiumIE from .videopress import VideoPressIE from .vidio import VidioIE from .vidlii import VidLiiIE diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py deleted file mode 100644 index cf690d7b0..000000000 --- a/youtube_dl/extractor/videopremium.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import unicode_literals - -import re -import random - -from .common import InfoExtractor - - -class VideoPremiumIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P\w+)(?:/.*)?' - _TEST = { - 'url': 'http://videopremium.tv/4w7oadjsf156', - 'info_dict': { - 'id': '4w7oadjsf156', - 'ext': 'f4v', - 'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4' - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Test file has been deleted.', - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage_url = 'http://videopremium.tv/' + video_id - webpage = self._download_webpage(webpage_url, video_id) - - if re.match(r'^]*>window\.location\s*=', webpage): - # Download again, we need a cookie - webpage = self._download_webpage( - webpage_url, video_id, - note='Downloading webpage again (with cookie)') - - video_title = self._html_search_regex( - r'\s*(.+?)\s*<', webpage, 'video title') - - return { - 'id': video_id, - 'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16), - 'play_path': 'mp4:%s.f4v' % video_id, - 'page_url': 'http://videopremium.tv/' + video_id, - 'player_url': 'http://videopremium.tv/uplayer/uppod.swf', - 'ext': 'f4v', - 'title': video_title, - } From d686cab084af88260bd28ad99673e27b36fcb4b2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 8 Dec 2019 12:38:21 +0100 Subject: [PATCH 08/70] [kontrtube] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/kontrtube.py | 73 ------------------------------ 2 files changed, 74 deletions(-) delete mode 100644 youtube_dl/extractor/kontrtube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7f4044b4a..fd93730fa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -511,7 +511,6 @@ from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .konserthusetplay import KonserthusetPlayIE -from .kontrtube import KontrTubeIE from .krasview import KrasViewIE from .ku6 import Ku6IE from .kusi import KUSIIE diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py deleted file mode 100644 index 1fda45107..000000000 --- a/youtube_dl/extractor/kontrtube.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, -) - - -class KontrTubeIE(InfoExtractor): - IE_NAME = 'kontrtube' - IE_DESC = 'KontrTube.ru - Труба зовёт' - _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P\d+)/(?P[^/]+)/' - - _TEST = { - 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', - 'md5': '975a991a4926c9a85f383a736a2e6b80', - 'info_dict': { - 'id': '2678', - 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag', - 'ext': 'mp4', - 'title': 'Над олимпийской деревней в Сочи поднят российский флаг', - 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', - 'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg', - 'duration': 270, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage( - url, display_id, 'Downloading page') - - video_url = self._search_regex( - r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL') - thumbnail = self._search_regex( - r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False) - title = self._html_search_regex( - r'(?s)

(.+?)

', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') - - duration = self._search_regex( - r'Длительность: ([^<]+)', webpage, 'duration', fatal=False) - if duration: - duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec')) - - view_count = self._search_regex( - r'Просмотров: ([^<]+)', - webpage, 'view count', fatal=False) - if view_count: - view_count = int_or_none(view_count.replace(' ', '')) - - comment_count = int_or_none(self._search_regex( - r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'thumbnail': thumbnail, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - } From 0e6ec3caf6c20bb5b27c063b2b946686e0b5159f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 9 Dec 2019 09:13:02 +0100 Subject: [PATCH 09/70] [vk] improve extraction - fix User Videos extraction(closes #23356) - extract all videos for lists with more than 1000 videos(#23356) - add support for video albums(closes #14327)(closes #14492) --- youtube_dl/extractor/vk.py | 54 +++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index a5e4a3e67..00ec006c4 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import collections +import functools import re from .common import InfoExtractor @@ -11,6 +12,7 @@ from ..utils import ( ExtractorError, get_element_by_class, int_or_none, + OnDemandPagedList, orderedSet, str_or_none, str_to_int, @@ -477,14 +479,23 @@ class VKIE(VKBaseIE): class VKUserVideosIE(VKBaseIE): IE_NAME = 'vk:uservideos' IE_DESC = "VK - User's Videos" - _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P
\w+))?|$)' _TEMPLATE_URL = 'https://vk.com/videos' _TESTS = [{ - 'url': 'http://vk.com/videos205387401', + 'url': 'https://vk.com/videos-767561', 'info_dict': { - 'id': '205387401', + 'id': '-767561_all', }, - 'playlist_mincount': 4, + 'playlist_mincount': 1150, + }, { + 'url': 'https://vk.com/videos-767561?section=uploaded', + 'info_dict': { + 'id': '-767561_uploaded', + }, + 'playlist_mincount': 425, + }, { + 'url': 'http://vk.com/videos205387401', + 'only_matching': True, }, { 'url': 'http://vk.com/videos-77521', 'only_matching': True, @@ -498,25 +509,33 @@ class VKUserVideosIE(VKBaseIE): 'url': 'http://new.vk.com/videos205387401', 'only_matching': True, }] - _VIDEO = collections.namedtuple( - 'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta']) - - def _real_extract(self, url): - page_id = self._match_id(url) + _PAGE_SIZE = 1000 + _VIDEO = collections.namedtuple('Video', ['owner_id', 'id']) + def _fetch_page(self, page_id, section, page): l = self._download_payload('al_video', page_id, { 'act': 'load_videos_silent', + 'offset': page * self._PAGE_SIZE, 'oid': page_id, - })[0]['']['list'] + 'section': section, + })[0][section]['list'] - entries = [] for video in l: - v = self._VIDEO._make(video) + v = self._VIDEO._make(video[:2]) video_id = '%d_%d' % (v.owner_id, v.id) - entries.append(self.url_result( - 'http://vk.com/video' + video_id, 'VK', video_id=video_id)) + yield self.url_result( + 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id) - return self.playlist_result(entries, page_id) + def _real_extract(self, url): + page_id, section = re.match(self._VALID_URL, url).groups() + if not section: + section = 'all' + + entries = OnDemandPagedList( + functools.partial(self._fetch_page, page_id, section), + self._PAGE_SIZE) + + return self.playlist_result(entries, '%s_%s' % (page_id, section)) class VKWallPostIE(VKBaseIE): @@ -580,8 +599,7 @@ class VKWallPostIE(VKBaseIE): 'only_matching': True, }] _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' - _AUDIO = collections.namedtuple( - 'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key']) + _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads']) def _decode(self, enc): dec = '' @@ -629,7 +647,7 @@ class VKWallPostIE(VKBaseIE): for audio in re.findall(r'data-audio="([^"]+)', webpage): audio = self._parse_json(unescapeHTML(audio), post_id) - a = self._AUDIO._make(audio) + a = self._AUDIO._make(audio[:16]) if not a.url: continue title = unescapeHTML(a.title) From cf80ff186eab6963fcfb108919a25b7ed28813d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 9 Dec 2019 14:38:12 +0100 Subject: [PATCH 10/70] [soundcloud] add support for token protected embeds(#18954) --- youtube_dl/extractor/soundcloud.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 988dec4fa..c2ee54457 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -28,7 +28,12 @@ from ..utils import ( class SoundcloudEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P.*)' + _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P.+)' + _TEST = { + # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/ + 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey', + 'only_matching': True, + } @staticmethod def _extract_urls(webpage): @@ -37,8 +42,13 @@ class SoundcloudEmbedIE(InfoExtractor): webpage)] def _real_extract(self, url): - return self.url_result(compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query)['url'][0]) + query = compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query) + api_url = query['url'][0] + secret_token = query.get('secret_token') + if secret_token: + api_url = update_url_query(api_url, {'secret_token': secret_token[0]}) + return self.url_result(api_url) class SoundcloudIE(InfoExtractor): From 232ed8e6e0ec8b86156e68002e496a8bc89e6346 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Dec 2019 11:00:31 +0100 Subject: [PATCH 11/70] [twitch] fix clip extraction(closes #23375) --- youtube_dl/extractor/twitch.py | 110 +++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 1f3df3112..a8c2502af 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -17,12 +17,10 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, - float_or_none, int_or_none, orderedSet, parse_duration, parse_iso8601, - qualities, try_get, unified_timestamp, update_url_query, @@ -676,63 +674,81 @@ class TwitchClipsIE(TwitchBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - status = self._download_json( - 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id, - video_id) + clip = self._download_json( + 'https://gql.twitch.tv/gql', video_id, data=json.dumps({ + 'query': '''{ + clip(slug: "%s") { + broadcaster { + displayName + } + createdAt + curator { + displayName + id + } + durationSeconds + id + tiny: thumbnailURL(width: 86, height: 45) + small: thumbnailURL(width: 260, height: 147) + medium: thumbnailURL(width: 480, height: 272) + title + videoQualities { + frameRate + quality + sourceURL + } + viewCount + } +}''' % video_id, + }).encode(), headers={ + 'Client-ID': self._CLIENT_ID, + })['data']['clip'] + + if not clip: + raise ExtractorError( + 'This clip is no longer available', expected=True) formats = [] - - for option in status['quality_options']: + for option in clip.get('videoQualities', []): if not isinstance(option, dict): continue - source = url_or_none(option.get('source')) + source = url_or_none(option.get('sourceURL')) if not source: continue formats.append({ 'url': source, 'format_id': option.get('quality'), 'height': int_or_none(option.get('quality')), - 'fps': int_or_none(option.get('frame_rate')), + 'fps': int_or_none(option.get('frameRate')), }) - self._sort_formats(formats) - info = { + thumbnails = [] + for thumbnail_id in ('tiny', 'small', 'medium'): + thumbnail_url = clip.get(thumbnail_id) + if not thumbnail_url: + continue + thumb = { + 'id': thumbnail_id, + 'url': thumbnail_url, + } + mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url) + if mobj: + thumb.update({ + 'height': int(mobj.group(2)), + 'width': int(mobj.group(1)), + }) + thumbnails.append(thumb) + + return { + 'id': clip.get('id') or video_id, + 'title': clip.get('title') or video_id, 'formats': formats, + 'duration': int_or_none(clip.get('durationSeconds')), + 'views': int_or_none(clip.get('viewCount')), + 'timestamp': unified_timestamp(clip.get('createdAt')), + 'thumbnails': thumbnails, + 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str), + 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str), + 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), } - - clip = self._call_api( - 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={ - 'Accept': 'application/vnd.twitchtv.v5+json', - }) - - if clip: - quality_key = qualities(('tiny', 'small', 'medium')) - thumbnails = [] - thumbnails_dict = clip.get('thumbnails') - if isinstance(thumbnails_dict, dict): - for thumbnail_id, thumbnail_url in thumbnails_dict.items(): - thumbnails.append({ - 'id': thumbnail_id, - 'url': thumbnail_url, - 'preference': quality_key(thumbnail_id), - }) - - info.update({ - 'id': clip.get('tracking_id') or video_id, - 'title': clip.get('title') or video_id, - 'duration': float_or_none(clip.get('duration')), - 'views': int_or_none(clip.get('views')), - 'timestamp': unified_timestamp(clip.get('created_at')), - 'thumbnails': thumbnails, - 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str), - 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str), - 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), - }) - else: - info.update({ - 'title': video_id, - 'id': video_id, - }) - - return info From b33a05d2213020fd4a74a1790db728a367f82517 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Dec 2019 19:29:04 +0100 Subject: [PATCH 12/70] [slideslive] fix extraction(closes #23413) --- youtube_dl/extractor/slideslive.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index ed84322c5..467af2cb3 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -14,9 +14,9 @@ class SlidesLiveIE(InfoExtractor): 'info_dict': { 'id': 'LMtgR8ba0b0', 'ext': 'mp4', - 'title': '38902413: external video', - 'description': '3890241320170925-9-1yd6ech.mp4', - 'uploader': 'SlidesLive Administrator', + 'title': 'GCC IA16 backend', + 'description': 'Watch full version of this video at https://slideslive.com/38902413.', + 'uploader': 'SlidesLive Videos - A', 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'upload_date': '20170925', } @@ -29,11 +29,18 @@ class SlidesLiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - url, video_id, headers={'Accept': 'application/json'}) + 'https://ben.slideslive.com/player/' + video_id, video_id) service_name = video_data['video_service_name'].lower() if service_name == 'youtube': yt_video_id = video_data['video_service_id'] - return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + return { + '_type': 'url_transparent', + 'ie_key': 'Youtube', + 'id': yt_video_id, + 'thumbnail': video_data.get('thumbnail'), + 'title': video_data.get('title'), + 'url': yt_video_id, + } else: raise ExtractorError( 'Unsupported service name: {0}'.format(service_name), expected=True) From 73d8f3a63426e8517143e3a5554e12d614c5cdec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Dec 2019 21:35:31 +0100 Subject: [PATCH 13/70] [slideslive] add support for url and vimeo service names(closes #23414) --- youtube_dl/extractor/slideslive.py | 41 ++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index 467af2cb3..d9ea76831 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import smuggle_url class SlidesLiveIE(InfoExtractor): @@ -24,6 +24,14 @@ class SlidesLiveIE(InfoExtractor): # video_service_name = youtube 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'only_matching': True, + }, { + # video_service_name = url + 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', + 'only_matching': True, + }, { + # video_service_name = vimeo + 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', + 'only_matching': True, }] def _real_extract(self, url): @@ -31,16 +39,23 @@ class SlidesLiveIE(InfoExtractor): video_data = self._download_json( 'https://ben.slideslive.com/player/' + video_id, video_id) service_name = video_data['video_service_name'].lower() - if service_name == 'youtube': - yt_video_id = video_data['video_service_id'] - return { - '_type': 'url_transparent', - 'ie_key': 'Youtube', - 'id': yt_video_id, - 'thumbnail': video_data.get('thumbnail'), - 'title': video_data.get('title'), - 'url': yt_video_id, - } + assert service_name in ('url', 'vimeo', 'youtube') + service_id = video_data['video_service_id'] + info = { + 'id': video_id, + 'thumbnail': video_data.get('thumbnail'), + 'url': service_id, + } + if service_name == 'url': + info['title'] = video_data['title'] else: - raise ExtractorError( - 'Unsupported service name: {0}'.format(service_name), expected=True) + info.update({ + '_type': 'url_transparent', + 'ie_key': service_name.capitalize(), + 'title': video_data.get('title'), + }) + if service_name == 'vimeo': + info['url'] = smuggle_url( + 'https://player.vimeo.com/video/' + service_id, + {'http_headers': {'Referer': url}}) + return info From 42db58ec7367e7ee6555e5f14107712add61d013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Dec 2019 23:15:24 +0700 Subject: [PATCH 14/70] [utils] Improve str_to_int --- test/test_utils.py | 5 +++++ youtube_dl/utils.py | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index fed94a906..0896f4150 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -500,6 +500,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) + # Python 3 has no long + if sys.version_info < (3, 0): + eval('self.assertEqual(str_to_int(123456L), 123456)') + self.assertEqual(str_to_int('noninteger'), None) + self.assertEqual(str_to_int([]), None) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 328f037a8..f6204692a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -46,6 +46,7 @@ from .compat import ( compat_html_entities, compat_html_entities_html5, compat_http_client, + compat_integer_types, compat_kwargs, compat_os_name, compat_parse_qs, @@ -3519,10 +3520,11 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if not isinstance(int_str, compat_str): + if isinstance(int_str, compat_integer_types): return int_str - int_str = re.sub(r'[,\.\+]', '', int_str) - return int(int_str) + elif isinstance(int_str, compat_str): + int_str = re.sub(r'[,\.\+]', '', int_str) + return int_or_none(int_str) def float_or_none(v, scale=1, invscale=1, default=None): From fab01080f402dbfad00122b73714d92b5d1deb24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Dec 2019 00:08:18 +0700 Subject: [PATCH 15/70] [tv2dk:bornholm:play] Add extractor (closes #23291) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/tv2dk.py | 74 +++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fd93730fa..376d07727 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1168,7 +1168,10 @@ from .tv2 import ( TV2ArticleIE, KatsomoIE, ) -from .tv2dk import TV2DKIE +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py index eb39424df..611fdc0c6 100644 --- a/youtube_dl/extractor/tv2dk.py +++ b/youtube_dl/extractor/tv2dk.py @@ -1,10 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor -from ..utils import extract_attributes +from ..utils import ( + determine_ext, + extract_attributes, + js_to_json, + url_or_none, +) class TV2DKIE(InfoExtractor): @@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor): 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', video_id=kaltura_id)) return self.playlist_result(entries) + + +class TV2DKBornholmPlayIE(InfoExtractor): + _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P\d+)' + _TEST = { + 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021', + 'info_dict': { + 'id': '781021', + 'ext': 'mp4', + 'title': '12Nyheder-27.11.19', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, + data=json.dumps({ + 'playlist_id': video_id, + 'serienavn': '', + }).encode(), headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'Content-Type': 'application/json; charset=UTF-8', + })['d'] + + # TODO: generalize flowplayer + title = self._search_regex( + r'title\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', video, 'title', + group='value') + sources = self._parse_json(self._search_regex( + r'(?s)sources:\s*(\[.+?\]),', video, 'sources'), + video_id, js_to_json) + + formats = [] + srcs = set() + for source in sources: + src = url_or_none(source.get('src')) + if not src: + continue + if src in srcs: + continue + srcs.add(src) + ext = determine_ext(src) + src_type = source.get('type') + if src_type == 'application/x-mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif src_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } From 2dbc0967f26425acc204395bc69c9446d9ebd682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Dec 2019 00:40:34 +0700 Subject: [PATCH 16/70] [ChangeLog] Actualize [ci skip] --- ChangeLog | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d4f809fc6..d2f17ee06 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,55 @@ +version + +Core +* [utils] Improve str_to_int ++ [downloader/hls] Add ability to override AES decryption key URL (#17521) + +Extractors ++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291) ++ [slideslive] Add support for url and vimeo service names (#23414) +* [slideslive] Fix extraction (#23413) +* [twitch:clips] Fix extraction (#23375) ++ [soundcloud] Add support for token protected embeds (#18954) +* [vk] Improve extraction + * Fix User Videos extraction (#23356) + * Extract all videos for lists with more than 1000 videos (#23356) + + Add support for video albums (#14327, #14492) +- [kontrtube] Remove extractor +- [videopremium] Remove extractor +- [musicplayon] Remove extractor (#9225) ++ [ufctv] Add support for ufcfightpass.imgdge.com and + ufcfightpass.imggaming.com (#23343) ++ [twitch] Extract m3u8 formats frame rate (#23333) ++ [imggaming] Add support for playlists and extract subtitles ++ [ufcarabia] Add support for UFC Arabia (#23312) +* [ufctv] Fix extraction +* [yahoo] Fix gyao brightcove player id (#23303) +* [vzaar] Override AES decryption key URL (#17521) ++ [vzaar] Add support for AES HLS manifests (#17521, #23299) +* [nrl] Fix extraction +* [teachingchannel] Fix extraction +* [nintendo] Fix extraction and partially add support for Nintendo Direct + videos (#4592) ++ [ooyala] Add better fallback values for domain and streams variables ++ [youtube] Add support youtubekids.com (#23272) +* [tv2] Detect DRM protection ++ [tv2] Add support for katsomo.fi and mtv.fi (#10543) +* [tv2] Fix tv2.no article extraction +* [msn] Improve extraction + + Add support for YouTube and NBCSports embeds + + Add support for articles with multiple videos + * Improve AOL embed support + * Improve format extraction +* [abcotvs] Relax URL regular expression and improve metadata extraction + (#18014) +* [channel9] Reduce response size +* [adobetv] Improve extaction + * Use OnDemandPagedList for list extractors + * Reduce show extraction requests + * Extract original video format and subtitles + + Add support for adobe tv embeds + + version 2019.11.28 Core @@ -583,7 +635,7 @@ Extractors version 2019.04.17 Extractors -* [openload] Randomize User-Agent (closes #20688) +* [openload] Randomize User-Agent (#20688) + [openload] Add support for oladblock domains (#20471) * [adn] Fix subtitle extraction (#12724) + [aol] Add support for localized websites @@ -1148,7 +1200,7 @@ Extractors + [youtube] Extract channel meta fields (#9676, #12939) * [porntube] Fix extraction (#17541) * [asiancrush] Fix extraction (#15630) -+ [twitch:clips] Extend URL regular expression (closes #17559) ++ [twitch:clips] Extend URL regular expression (#17559) + [vzaar] Add support for HLS * [tube8] Fix metadata extraction (#17520) * [eporner] Extract JSON-LD (#17519) From 9b6e72fd06e8669cc1a7200f3318eec51d06b4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Dec 2019 23:51:08 +0700 Subject: [PATCH 17/70] [mediaset] Fix parse formats (closes #23508) --- youtube_dl/extractor/mediaset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index f976506f4..027a790b8 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -123,7 +123,7 @@ class MediasetIE(ThePlatformBaseIE): def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): for video in smil.findall(self._xpath_ns('.//video', namespace)): video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src']) - return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url) + return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url) def _real_extract(self, url): guid = self._match_id(url) From b1a92520a345178f82ff2ccd694700d4491c8b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Dec 2019 00:52:11 +0700 Subject: [PATCH 18/70] [ChangeLog] Actualize [ci skip] --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index d2f17ee06..b90e78c63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core + [downloader/hls] Add ability to override AES decryption key URL (#17521) Extractors +* [mediaset] Fix parse formats (#23508) + [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291) + [slideslive] Add support for url and vimeo service names (#23414) * [slideslive] Fix extraction (#23413) From 80e43af5bff8a7e91ff39b9e3f39e7251340692f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Dec 2019 01:16:49 +0700 Subject: [PATCH 19/70] release 2019.12.25 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 15 ++++++++------- youtube_dl/version.py | 2 +- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 3a94bd621..e6b82fda3 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.11.28** +- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.28 + [debug] youtube-dl version 2019.12.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 72bee12aa..9096af717 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.11.28** +- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index ddf67e951..5c235df0a 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.28** +- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 7122e2714..fe6ab9aa0 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.11.28** +- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.28 + [debug] youtube-dl version 2019.12.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index a93882b39..76b028de4 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.28** +- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index b90e78c63..18080575b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.12.25 Core * [utils] Improve str_to_int diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2744dfca8..04956c546 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,10 +28,11 @@ - **acast:channel** - **ADN**: Anime Digital Network - **AdobeConnect** - - **AdobeTV** - - **AdobeTVChannel** - - **AdobeTVShow** - - **AdobeTVVideo** + - **adobetv** + - **adobetv:channel** + - **adobetv:embed** + - **adobetv:show** + - **adobetv:video** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **afreecatv**: afreecatv.com @@ -396,6 +397,7 @@ - **Kankan** - **Karaoketv** - **KarriereVideos** + - **Katsomo** - **KeezMovies** - **Ketnet** - **KhanAcademy** @@ -403,7 +405,6 @@ - **KinjaEmbed** - **KinoPoisk** - **KonserthusetPlay** - - **kontrtube**: KontrTube.ru - Труба зовёт - **KrasView**: Красвью - **Ku6** - **KUSI** @@ -513,7 +514,6 @@ - **mtvjapan** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - - **MusicPlayOn** - **mva**: Microsoft Virtual Academy videos - **mva:course**: Microsoft Virtual Academy courses - **Mwave** @@ -913,6 +913,7 @@ - **tv2.hu** - **TV2Article** - **TV2DK** + - **TV2DKBornholmPlay** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **TVA** @@ -954,6 +955,7 @@ - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 + - **UFCArabia** - **UFCTV** - **UKTVPlay** - **umg:de**: Universal Music Deutschland @@ -993,7 +995,6 @@ - **videomore** - **videomore:season** - **videomore:video** - - **VideoPremium** - **VideoPress** - **Vidio** - **VidLii** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1227abc0a..606dbe1fb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.11.28' +__version__ = '2019.12.25' From 278be57be26b842712e2755f422b83b70a2e135b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Dec 2019 04:28:34 +0700 Subject: [PATCH 20/70] [mailru] Relax _VALID_URLs (#23509) --- youtube_dl/extractor/mailru.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 6b0e64b7f..50234798b 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -20,10 +20,10 @@ class MailRuIE(InfoExtractor): IE_DESC = 'Видео@Mail.Ru' _VALID_URL = r'''(?x) https?:// - (?:(?:www|m)\.)?my\.mail\.ru/ + (?:(?:www|m)\.)?my\.mail\.ru/+ (?: video/.*\#video=/?(?P(?:[^/]+/){3}\d+)| - (?:(?P(?:[^/]+/){2})video/(?P[^/]+/\d+))\.html| + (?:(?P(?:[^/]+/+){2})video/(?P[^/]+/\d+))\.html| (?:video/embed|\+/video/meta)/(?P\d+) ) ''' @@ -85,6 +85,14 @@ class MailRuIE(InfoExtractor): { 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', 'only_matching': True, + }, + { + 'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html', + 'only_matching': True, + }, + { + 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html', + 'only_matching': True, } ] @@ -237,7 +245,7 @@ class MailRuMusicSearchBaseIE(InfoExtractor): class MailRuMusicIE(MailRuMusicSearchBaseIE): IE_NAME = 'mailru:music' IE_DESC = 'Музыка@Mail.Ru' - _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P[\da-f]+)' + _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P[\da-f]+)' _TESTS = [{ 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893', 'md5': '0f8c22ef8c5d665b13ac709e63025610', @@ -273,7 +281,7 @@ class MailRuMusicIE(MailRuMusicSearchBaseIE): class MailRuMusicSearchIE(MailRuMusicSearchBaseIE): IE_NAME = 'mailru:music:search' IE_DESC = 'Музыка@Mail.Ru' - _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P[^/?#&]+)' + _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://my.mail.ru/music/search/black%20shadow', 'info_dict': { From d1b27220959921c8c96bfd92c946edb52c78c39b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Dec 2019 22:39:50 +0700 Subject: [PATCH 21/70] [soundcloud] Update client id (closes #23516) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index c2ee54457..2128e5957 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -255,7 +255,7 @@ class SoundcloudIE(InfoExtractor): _API_BASE = 'https://api.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' - _CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji' + _CLIENT_ID = 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _ARTWORK_MAP = { From 18ff573e503ce96585f853600765e49e90696c1e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 25 Dec 2019 20:01:25 +0100 Subject: [PATCH 22/70] [mitele] fix extraction(closes #21354)(closes #23456) --- youtube_dl/extractor/mitele.py | 99 +++++++++++++--------------------- 1 file changed, 36 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 40f214a87..ad9da9612 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -4,8 +4,8 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + parse_iso8601, smuggle_url, - parse_duration, ) @@ -18,16 +18,18 @@ class MiTeleIE(InfoExtractor): 'info_dict': { 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg', 'ext': 'mp4', - 'title': 'Tor, la web invisible', - 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', + 'title': 'Diario de La redacción Programa 144', + 'description': 'md5:07c35a7b11abb05876a6a79185b58d27', 'series': 'Diario de', - 'season': 'La redacción', + 'season': 'Season 14', 'season_number': 14, - 'season_id': 'diario_de_t14_11981', - 'episode': 'Programa 144', + 'episode': 'Tor, la web invisible', 'episode_number': 3, 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'duration': 2913, + 'age_limit': 16, + 'timestamp': 1471209401, + 'upload_date': '20160814', }, 'add_ie': ['Ooyala'], }, { @@ -39,13 +41,15 @@ class MiTeleIE(InfoExtractor): 'title': 'Cuarto Milenio Temporada 6 Programa 226', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', - 'season': 'Temporada 6', + 'season': 'Season 6', 'season_number': 6, - 'season_id': 'cuarto_milenio_t06_12715', - 'episode': 'Programa 226', + 'episode': 'Episode 24', 'episode_number': 24, 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'duration': 7313, + 'age_limit': 12, + 'timestamp': 1471209021, + 'upload_date': '20160814', }, 'params': { 'skip_download': True, @@ -54,67 +58,36 @@ class MiTeleIE(InfoExtractor): }, { 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'only_matching': True, + }, { + 'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - - paths = self._download_json( - 'https://www.mitele.es/amd/agp/web/metadata/general_configuration', - video_id, 'Downloading paths JSON') - - ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] - base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com') - full_path = ooyala_s.get('full_path', '/search/v1/full/providers/') - source = self._download_json( - '%s://%s%s%s/docs/%s' % ( - ooyala_s.get('protocol', 'https'), base_url, full_path, - ooyala_s.get('provider_id', '104951'), video_id), - video_id, 'Downloading data JSON', query={ - 'include_titles': 'Series,Season', - 'product_name': ooyala_s.get('product_name', 'test'), - 'format': 'full', - })['hits']['hits'][0]['_source'] - - embedCode = source['offers'][0]['embed_codes'][0] - titles = source['localizable_titles'][0] - - title = titles.get('title_medium') or titles['title_long'] - - description = titles.get('summary_long') or titles.get('summary_medium') - - def get(key1, key2): - value1 = source.get(key1) - if not value1 or not isinstance(value1, list): - return - if not isinstance(value1[0], dict): - return - return value1[0].get(key2) - - series = get('localizable_titles_series', 'title_medium') - - season = get('localizable_titles_season', 'title_medium') - season_number = int_or_none(source.get('season_number')) - season_id = source.get('season_id') - - episode = titles.get('title_sort_name') - episode_number = int_or_none(source.get('episode_number')) - - duration = parse_duration(get('videos', 'duration')) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + pre_player = self._parse_json(self._search_regex( + r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', + webpage, 'Pre Player'), display_id)['prePlayer'] + title = pre_player['title'] + video = pre_player['video'] + video_id = video['dataMediaId'] + content = pre_player.get('content') or {} + info = content.get('info') or {} return { '_type': 'url_transparent', # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), + 'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}), 'id': video_id, 'title': title, - 'description': description, - 'series': series, - 'season': season, - 'season_number': season_number, - 'season_id': season_id, - 'episode': episode, - 'episode_number': episode_number, - 'duration': duration, - 'thumbnail': get('images', 'url'), + 'description': info.get('synopsis'), + 'series': content.get('title'), + 'season_number': int_or_none(info.get('season_number')), + 'episode': content.get('subtitle'), + 'episode_number': int_or_none(info.get('episode_number')), + 'duration': int_or_none(info.get('duration')), + 'thumbnail': video.get('dataPoster'), + 'age_limit': int_or_none(info.get('rating')), + 'timestamp': parse_iso8601(pre_player.get('publishedTime')), } From 7ea55819ac9fdd6f06f527cf3302ab550bcf3219 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Dec 2019 15:25:04 +0100 Subject: [PATCH 23/70] [scrippsnetworks] Add new extractor(closes #19857)(closes #22981) --- youtube_dl/extractor/scrippsnetworks.py | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 8b3275735..afab9591d 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -7,6 +7,7 @@ import re from .aws import AWSIE from .anvato import AnvatoIE +from .common import InfoExtractor from ..utils import ( smuggle_url, urlencode_postdata, @@ -102,3 +103,46 @@ class ScrippsNetworksWatchIE(AWSIE): 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) + + +class ScrippsNetworksIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', + 'info_dict': { + 'id': '0260338', + 'ext': 'mp4', + 'title': 'The Best of the Best', + 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', + 'timestamp': 1475678834, + 'upload_date': '20161005', + 'uploader': 'SCNI-SCND', + }, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', + 'only_matching': True, + }, { + 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', + 'only_matching': True, + }, { + 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', + 'only_matching': True, + }, { + 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', + 'only_matching': True, + }] + _ACCOUNT_MAP = { + 'cookingchanneltv': 2433005105, + 'diynetwork': 2433004575, + 'foodnetwork': 2433005105, + 'hgtv': 2433004575, + 'travelchannel': 2433005739, + } + _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' + + def _real_extract(self, url): + site, guid = re.match(self._VALID_URL, url).groups() + return self.url_result(smuggle_url( + self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), + {'force_smil_url': True}), 'ThePlatform', guid) From f8a12427a9ccdb8506be64c2b56eee7f8872ac3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Dec 2019 00:18:37 +0700 Subject: [PATCH 24/70] [teachable] Improve locked lessons detection (#23528) --- youtube_dl/extractor/teachable.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 7d2e34b3b..b82414c3d 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -165,7 +165,10 @@ class TeachableIE(TeachableBaseIE): if any(re.search(p, webpage) for p in ( r'class=["\']lecture-contents-locked', r'>\s*Lecture contents locked', - r'id=["\']lecture-locked')): + r'id=["\']lecture-locked', + # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 + r'class=["\'](?:inner-)?lesson-locked', + r'>LESSON LOCKED<')): self.raise_login_required('Lecture contents locked') title = self._og_search_title(webpage, default=None) From 941e359e9512c1c75d42cb5b4b248816e16edb82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Dec 2019 00:26:12 +0700 Subject: [PATCH 25/70] [teachable] Fail with error message if no video URL found --- youtube_dl/extractor/teachable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index b82414c3d..6b7f13b43 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -170,6 +170,7 @@ class TeachableIE(TeachableBaseIE): r'class=["\'](?:inner-)?lesson-locked', r'>LESSON LOCKED<')): self.raise_login_required('Lecture contents locked') + raise ExtractorError('Unable to find video URL') title = self._og_search_title(webpage, default=None) From cb7e053e0a6542b2db145c16291361e1f2d5ba2c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 29 Dec 2019 19:25:21 +0100 Subject: [PATCH 26/70] [extractors] add missing import for ScrippsNetworksIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 376d07727..7b05f5410 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -963,7 +963,10 @@ from .savefrom import SaveFromIE from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE -from .scrippsnetworks import ScrippsNetworksWatchIE +from .scrippsnetworks import ( + ScrippsNetworksWatchIE, + ScrippsNetworksIE, +) from .scte import ( SCTEIE, SCTECourseIE, From 75ef77c1b18e943933a635ba28a47ec4c9671504 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 29 Dec 2019 19:30:50 +0100 Subject: [PATCH 27/70] [brightcove] cache brightcove player policy keys --- youtube_dl/extractor/brightcove.py | 36 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 8e2f7217a..9553f82d6 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -586,24 +586,26 @@ class BrightcoveNewIE(AdobePassIE): account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'http://players.brightcove.net/%s/%s_%s/index.min.js' - % (account_id, player_id, embed), video_id) - - policy_key = None - - catalog = self._search_regex( - r'catalog\(({.+?})\);', webpage, 'catalog', default=None) - if catalog: - catalog = self._parse_json( - js_to_json(catalog), video_id, fatal=False) - if catalog: - policy_key = catalog.get('policyKey') - + policy_key_id = '%s_%s' % (account_id, player_id) + policy_key = self._downloader.cache.load('brightcove', policy_key_id) if not policy_key: - policy_key = self._search_regex( - r'policyKey\s*:\s*(["\'])(?P.+?)\1', - webpage, 'policy key', group='pk') + webpage = self._download_webpage( + 'http://players.brightcove.net/%s/%s_%s/index.min.js' + % (account_id, player_id, embed), video_id) + + catalog = self._search_regex( + r'catalog\(({.+?})\);', webpage, 'catalog', default=None) + if catalog: + catalog = self._parse_json( + js_to_json(catalog), video_id, fatal=False) + if catalog: + policy_key = catalog.get('policyKey') + + if not policy_key: + policy_key = self._search_regex( + r'policyKey\s*:\s*(["\'])(?P.+?)\1', + webpage, 'policy key', group='pk') + self._downloader.cache.store('brightcove', policy_key_id, policy_key) api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = { From 0c15a56f1c7afa77347b5e3b1ae9811662291f25 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 30 Dec 2019 22:31:11 +0100 Subject: [PATCH 28/70] [prosiebensat1] improve geo restriction handling(closes #23571) --- youtube_dl/extractor/prosiebensat1.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index e19a470a5..1bc4f9b6b 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -16,7 +16,7 @@ from ..utils import ( class ProSiebenSat1BaseIE(InfoExtractor): - _GEO_COUNTRIES = ['DE'] + _GEO_BYPASS = False _ACCESS_ID = None _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' @@ -39,14 +39,18 @@ class ProSiebenSat1BaseIE(InfoExtractor): formats = [] if self._ACCESS_ID: raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID - server_token = (self._download_json( + protocols = self._download_json( self._V4_BASE_URL + 'protocols', clip_id, 'Downloading protocols JSON', headers=self.geo_verification_headers(), query={ 'access_id': self._ACCESS_ID, 'client_token': sha1((raw_ct).encode()).hexdigest(), 'video_id': clip_id, - }, fatal=False) or {}).get('server_token') + }, fatal=False, expected_status=(403,)) or {} + error = protocols.get('error') or {} + if error.get('title') == 'Geo check failed': + self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) + server_token = protocols.get('server_token') if server_token: urls = (self._download_json( self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ From 3bed621750b7fe25afc04a0131664bbbc610c563 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 31 Dec 2019 09:49:29 +0100 Subject: [PATCH 29/70] [soundcloud] automatically update client id on failing requests --- youtube_dl/extractor/soundcloud.py | 45 +++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2128e5957..b3ffef8df 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -9,6 +9,8 @@ from .common import ( SearchInfoExtractor ) from ..compat import ( + compat_HTTPError, + compat_kwargs, compat_str, compat_urlparse, ) @@ -255,7 +257,6 @@ class SoundcloudIE(InfoExtractor): _API_BASE = 'https://api.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' - _CLIENT_ID = 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _ARTWORK_MAP = { @@ -271,9 +272,39 @@ class SoundcloudIE(InfoExtractor): 'original': 0, } + def _update_client_id(self): + webpage = self._download_webpage('https://soundcloud.com/', None) + for src in reversed(re.findall(r']+src="([^"]+)"', webpage)): + script = self._download_webpage(src, None, fatal=False) + if script: + client_id = self._search_regex( + r'client_id\s*:\s*"([0-9a-zA-Z]{32})"', + script, 'client id', default=None) + if client_id: + self._CLIENT_ID = client_id + self._downloader.cache.store('soundcloud', 'client_id', client_id) + return + raise ExtractorError('Unable to extract client id') + + def _download_json(self, *args, **kwargs): + query = kwargs.get('query', {}).copy() + for _ in range(2): + query['client_id'] = self._CLIENT_ID + kwargs['query'] = query + try: + return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs)) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self._update_client_id() + continue + raise + + def _real_initialize(self): + self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' + @classmethod def _resolv_url(cls, url): - return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID + return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): track_id = compat_str(info['id']) @@ -451,9 +482,7 @@ class SoundcloudIE(InfoExtractor): track_id = mobj.group('track_id') - query = { - 'client_id': self._CLIENT_ID, - } + query = {} if track_id: info_json_url = self._API_V2_BASE + 'tracks/' + track_id full_title = track_id @@ -536,7 +565,6 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): def _extract_playlist(self, base_url, playlist_id, playlist_title): COMMON_QUERY = { 'limit': 2000000000, - 'client_id': self._CLIENT_ID, 'linked_partitioning': '1', } @@ -722,9 +750,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): mobj = re.match(self._VALID_URL, url) playlist_id = mobj.group('id') - query = { - 'client_id': self._CLIENT_ID, - } + query = {} token = mobj.group('token') if token: query['secret_token'] = token @@ -761,7 +787,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): self._MAX_RESULTS_PER_PAGE) query.update({ 'limit': limit, - 'client_id': self._CLIENT_ID, 'linked_partitioning': 1, 'offset': 0, }) From 2b845c408653683f8266665f03b145ecaad76f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 31 Dec 2019 22:16:39 +0700 Subject: [PATCH 30/70] [spankbang] Fix extraction (closes #23307, closes #23423, closes #23444) --- youtube_dl/extractor/spankbang.py | 36 +++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index e040ada29..d02ce6b57 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, merge_dicts, orderedSet, @@ -75,11 +76,20 @@ class SpankBangIE(InfoExtractor): if not f_url: return f = parse_resolution(format_id) - f.update({ - 'url': f_url, - 'format_id': format_id, - }) - formats.append(f) + ext = determine_ext(f_url) + if format_id.startswith('m3u8') or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif format_id.startswith('mpd') or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + f_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp4' or f.get('width') or f.get('height'): + f.update({ + 'url': f_url, + 'format_id': format_id, + }) + formats.append(f) STREAM_URL_PREFIX = 'stream_url_' @@ -93,28 +103,22 @@ class SpankBangIE(InfoExtractor): r'data-streamkey\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'stream key', group='value') - sb_csrf_session = self._get_cookies( - 'https://spankbang.com')['sb_csrf_session'].value - stream = self._download_json( 'https://spankbang.com/api/videos/stream', video_id, 'Downloading stream JSON', data=urlencode_postdata({ 'id': stream_key, 'data': 0, - 'sb_csrf_session': sb_csrf_session, }), headers={ 'Referer': url, - 'X-CSRFToken': sb_csrf_session, + 'X-Requested-With': 'XMLHttpRequest', }) for format_id, format_url in stream.items(): - if format_id.startswith(STREAM_URL_PREFIX): - if format_url and isinstance(format_url, list): - format_url = format_url[0] - extract_format( - format_id[len(STREAM_URL_PREFIX):], format_url) + if format_url and isinstance(format_url, list): + format_url = format_url[0] + extract_format(format_id, format_url) - self._sort_formats(formats) + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) info = self._search_json_ld(webpage, video_id, default={}) From 0a02732b566c080434dc88e68f75a5e3c0239c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 31 Dec 2019 22:18:01 +0700 Subject: [PATCH 31/70] [spankbang] Improve removed video detection (#23423) --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index d02ce6b57..61ca902ce 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -65,7 +65,7 @@ class SpankBangIE(InfoExtractor): url.replace('/%s/embed' % video_id, '/%s/video' % video_id), video_id, headers={'Cookie': 'country=US'}) - if re.search(r'<[^>]+\bid=["\']video_removed', webpage): + if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): raise ExtractorError( 'Video %s is not available' % video_id, expected=True) From 060680874654e77cfd03d150a834b58213379c8c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 31 Dec 2019 16:42:56 +0100 Subject: [PATCH 32/70] [brightcove] update policy key on failing requests --- youtube_dl/extractor/brightcove.py | 42 ++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 9553f82d6..5e0c4bc3e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -588,11 +588,15 @@ class BrightcoveNewIE(AdobePassIE): policy_key_id = '%s_%s' % (account_id, player_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id) - if not policy_key: + policy_key_extracted = False + + def extract_policy_key(): webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id) + policy_key = None + catalog = self._search_regex( r'catalog\(({.+?})\);', webpage, 'catalog', default=None) if catalog: @@ -605,28 +609,38 @@ class BrightcoveNewIE(AdobePassIE): policy_key = self._search_regex( r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') + self._downloader.cache.store('brightcove', policy_key_id, policy_key) + return policy_key api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = { - 'Accept': 'application/json;pk=%s' % policy_key, - } + headers = {} referrer = smuggled_data.get('referrer') if referrer: headers.update({ 'Referer': referrer, 'Origin': re.search(r'https?://[^/]+', referrer).group(0), }) - try: - json_data = self._download_json(api_url, video_id, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise + + for _ in range(2): + if not policy_key: + policy_key = extract_policy_key() + policy_key_extracted = True + headers['Accept'] = 'application/json;pk=%s' % policy_key + try: + json_data = self._download_json(api_url, video_id, headers=headers) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: + policy_key = None + continue + raise ExtractorError(message, expected=True) + raise errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH': From f41347260c2c2cf723bc2bb8a5c11f67a22175d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 31 Dec 2019 23:29:06 +0700 Subject: [PATCH 33/70] [pornhub] Fix extraction and add support for m3u8 formats (closes #22749, closes #23082) --- youtube_dl/extractor/pornhub.py | 47 ++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ba0ad7da2..75ed69cde 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -227,12 +227,13 @@ class PornHubIE(PornHubBaseIE): else: thumbnail, duration = [None] * 2 - if not video_urls: - tv_webpage = dl_webpage('tv') - + def extract_js_vars(webpage, pattern, fatal=True): assignments = self._search_regex( - r'(var.+?mediastring.+?)', tv_webpage, - 'encoded url').split(';') + pattern, webpage, 'encoded url', fatal=fatal) + if not assignments: + return {} + + assignments = assignments.split(';') js_vars = {} @@ -254,11 +255,31 @@ class PornHubIE(PornHubBaseIE): assn = re.sub(r'var\s+', '', assn) vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) + return js_vars - video_url = js_vars['mediastring'] - if video_url not in video_urls_set: - video_urls.append((video_url, None)) - video_urls_set.add(video_url) + def add_video_url(video_url): + v_url = url_or_none(video_url) + if not v_url: + return + if v_url in video_urls_set: + return + video_urls.append((v_url, None)) + video_urls_set.add(v_url) + + if not video_urls: + FORMAT_PREFIXES = ('media', 'quality') + js_vars = extract_js_vars( + webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), + fatal=False) + if js_vars: + for key, format_url in js_vars.items(): + if any(key.startswith(p) for p in FORMAT_PREFIXES): + add_video_url(format_url) + + if not video_urls: + js_vars = extract_js_vars( + dl_webpage('tv'), r'(var.+?mediastring.+?)') + add_video_url(js_vars['mediastring']) for mobj in re.finditer( r']+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P(?:(?!\1).)+)\1', @@ -276,10 +297,16 @@ class PornHubIE(PornHubBaseIE): r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') - if determine_ext(video_url) == 'mpd': + ext = determine_ext(video_url) + if ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue tbr = None mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url) if mobj: From 0164cd5dacf76b0fd295e82412fda60e7c60df61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 31 Dec 2019 23:43:43 +0700 Subject: [PATCH 34/70] [pornhub] Improve locked videos detection (closes #22449, closes #22780) --- youtube_dl/extractor/pornhub.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 75ed69cde..b3251ccd9 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -17,6 +17,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + NO_DEFAULT, orderedSet, remove_quotes, str_to_int, @@ -227,9 +228,9 @@ class PornHubIE(PornHubBaseIE): else: thumbnail, duration = [None] * 2 - def extract_js_vars(webpage, pattern, fatal=True): + def extract_js_vars(webpage, pattern, default=NO_DEFAULT): assignments = self._search_regex( - pattern, webpage, 'encoded url', fatal=fatal) + pattern, webpage, 'encoded url', default=default) if not assignments: return {} @@ -270,11 +271,15 @@ class PornHubIE(PornHubBaseIE): FORMAT_PREFIXES = ('media', 'quality') js_vars = extract_js_vars( webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), - fatal=False) + default=None) if js_vars: for key, format_url in js_vars.items(): if any(key.startswith(p) for p in FORMAT_PREFIXES): add_video_url(format_url) + if not video_urls and re.search( + r'<[^>]+\bid=["\']lockedPlayer', webpage): + raise ExtractorError( + 'Video %s is locked' % video_id, expected=True) if not video_urls: js_vars = extract_js_vars( From 2d30b92e116d097f5d9d794ad97f71ef6aadf8a9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 31 Dec 2019 19:48:40 +0100 Subject: [PATCH 35/70] [brightcove] invalidate policy key cache on failing requests --- youtube_dl/extractor/brightcove.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5e0c4bc3e..85001b3ad 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -589,6 +589,7 @@ class BrightcoveNewIE(AdobePassIE): policy_key_id = '%s_%s' % (account_id, player_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id) policy_key_extracted = False + store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) def extract_policy_key(): webpage = self._download_webpage( @@ -610,7 +611,7 @@ class BrightcoveNewIE(AdobePassIE): r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - self._downloader.cache.store('brightcove', policy_key_id, policy_key) + store_pk(policy_key) return policy_key api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) @@ -638,6 +639,7 @@ class BrightcoveNewIE(AdobePassIE): self.raise_geo_restricted(msg=message) elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: policy_key = None + store_pk(None) continue raise ExtractorError(message, expected=True) raise From de7aade2f872d6de2dbd0d82624e51c24968e057 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 31 Dec 2019 21:31:22 +0100 Subject: [PATCH 36/70] [soundcloud] fix client id extraction for non fatal requests --- youtube_dl/extractor/soundcloud.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index b3ffef8df..62e9d8643 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -15,6 +15,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + error_to_compat_str, ExtractorError, float_or_none, HEADRequest, @@ -272,6 +273,9 @@ class SoundcloudIE(InfoExtractor): 'original': 0, } + def _store_client_id(self, client_id): + self._downloader.cache.store('soundcloud', 'client_id', client_id) + def _update_client_id(self): webpage = self._download_webpage('https://soundcloud.com/', None) for src in reversed(re.findall(r']+src="([^"]+)"', webpage)): @@ -282,11 +286,14 @@ class SoundcloudIE(InfoExtractor): script, 'client id', default=None) if client_id: self._CLIENT_ID = client_id - self._downloader.cache.store('soundcloud', 'client_id', client_id) + self._store_client_id(client_id) return raise ExtractorError('Unable to extract client id') def _download_json(self, *args, **kwargs): + non_fatal = kwargs.get('fatal') is False + if non_fatal: + del kwargs['fatal'] query = kwargs.get('query', {}).copy() for _ in range(2): query['client_id'] = self._CLIENT_ID @@ -295,8 +302,12 @@ class SoundcloudIE(InfoExtractor): return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs)) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self._store_client_id(None) self._update_client_id() continue + elif non_fatal: + self._downloader.report_warning(error_to_compat_str(e)) + return False raise def _real_initialize(self): From d6bf9cbd46c1eb65f7f79e5e1fde78ec665369e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Jan 2020 04:13:32 +0700 Subject: [PATCH 37/70] [ChangeLog] Actualize [ci skip] --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index 18080575b..c0b536be7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Extractors +* [brightcove] Invalidate policy key cache on failing requests +* [pornhub] Improve locked videos detection (#22449, #22780) ++ [pornhub] Add support for m3u8 formats +* [pornhub] Fix extraction (#22749, #23082) +* [brightcove] Update policy key on failing requests +* [spankbang] Improve removed video detection (#23423) +* [spankbang] Fix extraction (#23307, #23423, #23444) +* [soundcloud] Automatically update client id on failing requests +* [prosiebensat1] Improve geo restriction handling (#23571) +* [brightcove] Cache brightcove player policy keys +* [teachable] Fail with error message if no video URL found +* [teachable] Improve locked lessons detection (#23528) ++ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981) +* [mitele] Fix extraction (#21354, #23456) +* [soundcloud] Update client id (#23516) +* [mailru] Relax URL regular expressions (#23509) + + version 2019.12.25 Core From 0d5c415e1f4be8364bf842ac7548f09b472d72d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Jan 2020 05:20:48 +0700 Subject: [PATCH 38/70] [devscripts/create-github-release] Switch to using PAT for authentication Basic authentication will be deprecated soon --- devscripts/create-github-release.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 428111b3f..6464ef322 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -15,7 +15,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_basestring, - compat_input, compat_getpass, compat_print, compat_urllib_request, @@ -40,28 +39,20 @@ class GitHubReleaser(object): try: info = netrc.netrc().authenticators(self._NETRC_MACHINE) if info is not None: - self._username = info[0] - self._password = info[2] + self._token = info[2] compat_print('Using GitHub credentials found in .netrc...') return else: compat_print('No GitHub credentials found in .netrc') except (IOError, netrc.NetrcParseError): compat_print('Unable to parse .netrc') - self._username = compat_input( - 'Type your GitHub username or email address and press [Return]: ') - self._password = compat_getpass( - 'Type your GitHub password and press [Return]: ') + self._token = compat_getpass( + 'Type your GitHub PAT (personal access token) and press [Return]: ') def _call(self, req): if isinstance(req, compat_basestring): req = sanitized_Request(req) - # Authorizing manually since GitHub does not response with 401 with - # WWW-Authenticate header set (see - # https://developer.github.com/v3/#basic-authentication) - b64 = base64.b64encode( - ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') - req.add_header('Authorization', 'Basic %s' % b64) + req.add_header('Authorization', 'token %s' % self._token) response = self._opener.open(req).read().decode('utf-8') return json.loads(response) From ca069f68816c5da790c5745713b38c70df6abf65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Jan 2020 05:24:58 +0700 Subject: [PATCH 39/70] release 2020.01.01 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index e6b82fda3..97b8afcf9 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.12.25** +- [ ] I've verified that I'm running youtube-dl version **2020.01.01** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.12.25 + [debug] youtube-dl version 2020.01.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 9096af717..de6c44a65 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.12.25** +- [ ] I've verified that I'm running youtube-dl version **2020.01.01** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 5c235df0a..a9dd5ca52 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.12.25** +- [ ] I've verified that I'm running youtube-dl version **2020.01.01** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index fe6ab9aa0..8347903ea 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.12.25** +- [ ] I've verified that I'm running youtube-dl version **2020.01.01** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.12.25 + [debug] youtube-dl version 2020.01.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 76b028de4..92228513c 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.12.25** +- [ ] I've verified that I'm running youtube-dl version **2020.01.01** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index c0b536be7..c33169cd8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.01.01 Extractors * [brightcove] Invalidate policy key cache on failing requests diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 04956c546..e471aa79a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -761,6 +761,7 @@ - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** + - **ScrippsNetworks** - **scrippsnetworks:watch** - **SCTE** - **SCTECourse** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 606dbe1fb..8ad2df674 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.12.25' +__version__ = '2020.01.01' From 484637a9ccede2967a709d2026d29d7b61560e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Jan 2020 22:45:42 +0700 Subject: [PATCH 40/70] [redtube] Detect private videos (#23518) --- youtube_dl/extractor/redtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 5c84028ef..b1bde1e81 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -43,8 +43,15 @@ class RedTubeIE(InfoExtractor): webpage = self._download_webpage( 'http://www.redtube.com/%s' % video_id, video_id) - if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): - raise ExtractorError('Video %s has been removed' % video_id, expected=True) + ERRORS = ( + (('video-deleted-info', '>This video has been removed'), 'has been removed'), + (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), + ) + + for patterns, message in ERRORS: + if any(p in webpage for p in patterns): + raise ExtractorError( + 'Video %s %s' % (video_id, message), expected=True) info = self._search_json_ld(webpage, video_id, default={}) From 44b434e4e3c4e64b25363bec1a3ededb7f667d72 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 5 Jan 2020 16:32:43 +0100 Subject: [PATCH 41/70] [vice] improve extraction(closes #23631) --- youtube_dl/extractor/vice.py | 212 +++++++++++++++++------------------ 1 file changed, 106 insertions(+), 106 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 8fdfd743d..e37499512 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,35 +1,50 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import time +import functools import hashlib import json import random +import re +import time from .adobepass import AdobePassIE -from .youtube import YoutubeIE from .common import InfoExtractor +from .youtube import YoutubeIE from ..compat import ( compat_HTTPError, compat_str, ) from ..utils import ( + clean_html, ExtractorError, int_or_none, + OnDemandPagedList, parse_age_limit, str_or_none, try_get, ) -class ViceIE(AdobePassIE): +class ViceBaseIE(InfoExtractor): + def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''): + return self._download_json( + 'https://video.vice.com/api/v1/graphql', resource_id, query={ + 'query': '''{ + %s(locale: "%s", %s: "%s"%s) { + %s + } +}''' % (resource, locale, resource_key, resource_id, args, fields), + })['data'][resource] + + +class ViceIE(ViceBaseIE, AdobePassIE): IE_NAME = 'vice' - _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P[^/]+)/(?:video/[^/]+|embed)/(?P[\da-f]+)' + _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P[^/]+)/(?:video/[^/]+|embed)/(?P[\da-f]{24})' _TESTS = [{ 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7', 'info_dict': { - 'id': '5e647f0125e145c9aef2069412c0cbde', + 'id': '58c69e38a55424f1227dc3f7', 'ext': 'mp4', 'title': '10 Questions You Always Wanted To Ask: Pet Cremator', 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5', @@ -43,17 +58,16 @@ class ViceIE(AdobePassIE): # m3u8 download 'skip_download': True, }, - 'add_ie': ['UplynkPreplay'], }, { # geo restricted to US 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', 'info_dict': { - 'id': '930c0ad1f47141cc955087eecaddb0e2', + 'id': '5816510690b70e6c5fd39a56', 'ext': 'mp4', - 'uploader': 'waypoint', + 'uploader': 'vice', 'title': 'The Signal From Tölva', 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', - 'uploader_id': '57f7d621e05ca860fa9ccaf9', + 'uploader_id': '57a204088cb727dec794c67b', 'timestamp': 1477941983, 'upload_date': '20161031', }, @@ -61,15 +75,14 @@ class ViceIE(AdobePassIE): # m3u8 download 'skip_download': True, }, - 'add_ie': ['UplynkPreplay'], }, { 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', 'info_dict': { 'id': '581b12b60a0e1f4c0fb6ea2f', 'ext': 'mp4', 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', - 'description': '

Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.

', - 'uploader': 'VICE', + 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.', + 'uploader': 'vice', 'uploader_id': '57a204088cb727dec794c67b', 'timestamp': 1485368119, 'upload_date': '20170125', @@ -78,9 +91,7 @@ class ViceIE(AdobePassIE): 'params': { # AES-encrypted m3u8 'skip_download': True, - 'proxy': '127.0.0.1:8118', }, - 'add_ie': ['UplynkPreplay'], }, { 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'only_matching': True, @@ -98,7 +109,7 @@ class ViceIE(AdobePassIE): @staticmethod def _extract_urls(webpage): return re.findall( - r']+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)', + r']+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})', webpage) @staticmethod @@ -109,31 +120,16 @@ class ViceIE(AdobePassIE): def _real_extract(self, url): locale, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'https://video.vice.com/%s/embed/%s' % (locale, video_id), - video_id) - - video = self._parse_json( - self._search_regex( - r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage, - 'app state'), video_id)['video'] - video_id = video.get('vms_id') or video.get('id') or video_id - title = video['title'] - is_locked = video.get('locked') + video = self._call_api('videos', 'id', video_id, locale, '''body + locked + rating + thumbnail_url + title''')[0] + title = video['title'].strip() rating = video.get('rating') - thumbnail = video.get('thumbnail_url') - duration = int_or_none(video.get('duration')) - series = try_get( - video, lambda x: x['episode']['season']['show']['title'], - compat_str) - episode_number = try_get( - video, lambda x: x['episode']['episode_number']) - season_number = try_get( - video, lambda x: x['episode']['season']['season_number']) - uploader = None query = {} - if is_locked: + if video.get('locked'): resource = self._get_mvpd_resource( 'VICELAND', title, video_id, rating) query['tvetoken'] = self._extract_mvpd_auth( @@ -148,12 +144,9 @@ class ViceIE(AdobePassIE): query.update({ 'exp': exp, 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), - '_ad_blocked': None, - '_ad_unit': '', - '_debug': '', + 'skipadstitching': 1, 'platform': 'desktop', 'rn': random.randint(10000, 100000), - 'fbprebidtoken': '', }) try: @@ -169,85 +162,94 @@ class ViceIE(AdobePassIE): raise video_data = preplay['video'] - base = video_data['base'] - uplynk_preplay_url = preplay['preplayURL'] - episode = video_data.get('episode', {}) - channel = video_data.get('channel', {}) + formats = self._extract_m3u8_formats( + preplay['playURL'], video_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + episode = video_data.get('episode') or {} + channel = video_data.get('channel') or {} + season = video_data.get('season') or {} subtitles = {} - cc_url = preplay.get('ccURL') - if cc_url: - subtitles['en'] = [{ + for subtitle in preplay.get('subtitleURLs', []): + cc_url = subtitle.get('url') + if not cc_url: + continue + language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en' + subtitles.setdefault(language_code, []).append({ 'url': cc_url, - }] + }) return { - '_type': 'url_transparent', - 'url': uplynk_preplay_url, + 'formats': formats, 'id': video_id, 'title': title, - 'description': base.get('body') or base.get('display_body'), - 'thumbnail': thumbnail, - 'duration': int_or_none(video_data.get('video_duration')) or duration, + 'description': clean_html(video.get('body')), + 'thumbnail': video.get('thumbnail_url'), + 'duration': int_or_none(video_data.get('video_duration')), 'timestamp': int_or_none(video_data.get('created_at'), 1000), - 'age_limit': parse_age_limit(video_data.get('video_rating')), - 'series': video_data.get('show_title') or series, - 'episode_number': int_or_none(episode.get('episode_number') or episode_number), + 'age_limit': parse_age_limit(video_data.get('video_rating') or rating), + 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str), + 'episode_number': int_or_none(episode.get('episode_number')), 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), - 'season_number': int_or_none(season_number), - 'season_id': str_or_none(episode.get('season_id')), - 'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader, + 'season_number': int_or_none(season.get('season_number')), + 'season_id': str_or_none(season.get('id') or video_data.get('season_id')), + 'uploader': channel.get('name'), 'uploader_id': str_or_none(channel.get('id')), 'subtitles': subtitles, - 'ie_key': 'UplynkPreplay', } -class ViceShowIE(InfoExtractor): +class ViceShowIE(ViceBaseIE): IE_NAME = 'vice:show' - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P[^/?#&]+)' - - _TEST = { - 'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2', + _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P[^/]+)/show/(?P[^/?#&]+)' + _PAGE_SIZE = 25 + _TESTS = [{ + 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious', 'info_dict': { - 'id': 'fuck-thats-delicious-2', - 'title': "Fuck, That's Delicious", - 'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.', + 'id': '57a2040c8cb727dec794c901', + 'title': 'F*ck, That’s Delicious', + 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.', }, - 'playlist_count': 17, - } + 'playlist_mincount': 64, + }, { + 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious', + 'only_matching': True, + }] + + def _fetch_page(self, locale, show_id, page): + videos = self._call_api('videos', 'show_id', show_id, locale, '''body + id + url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE)) + for video in videos: + yield self.url_result( + video['url'], ViceIE.ie_key(), video.get('id')) def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) + locale, display_id = re.match(self._VALID_URL, url).groups() + show = self._call_api('shows', 'slug', display_id, locale, '''dek + id + title''')[0] + show_id = show['id'] - entries = [ - self.url_result(video_url, ViceIE.ie_key()) - for video_url, _ in re.findall( - r']+class="article-title"[^>]+data-id="\d+"[^>]*>\s*]+href="(%s.*?)"' - % ViceIE._VALID_URL, webpage)] + entries = OnDemandPagedList( + functools.partial(self._fetch_page, locale, show_id), + self._PAGE_SIZE) - title = self._search_regex( - r'(.+?)', webpage, 'title', default=None) - if title: - title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip() - description = self._html_search_meta( - 'description', webpage, 'description') - - return self.playlist_result(entries, show_id, title, description) + return self.playlist_result( + entries, show_id, show.get('title'), show.get('dek')) -class ViceArticleIE(InfoExtractor): +class ViceArticleIE(ViceBaseIE): IE_NAME = 'vice:article' - _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P[^?#]+)' + _VALID_URL = r'https://(?:www\.)?vice\.com/(?P[^/]+)/article/(?:[0-9a-z]{6}/)?(?P[^?#]+)' _TESTS = [{ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', 'info_dict': { - 'id': '41eae2a47b174a1398357cec55f1f6fc', + 'id': '58dc0a3dee202d2a0ccfcbd8', 'ext': 'mp4', - 'title': 'Mormon War on Porn ', - 'description': 'md5:6394a8398506581d0346b9ab89093fef', + 'title': 'Mormon War on Porn', + 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf', 'uploader': 'vice', 'uploader_id': '57a204088cb727dec794c67b', 'timestamp': 1491883129, @@ -258,10 +260,10 @@ class ViceArticleIE(InfoExtractor): # AES-encrypted m3u8 'skip_download': True, }, - 'add_ie': ['UplynkPreplay'], + 'add_ie': [ViceIE.ie_key()], }, { 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', - 'md5': '7fe8ebc4fa3323efafc127b82bd821d9', + 'md5': '13010ee0bc694ea87ec40724397c2349', 'info_dict': { 'id': '3jstaBeXgAs', 'ext': 'mp4', @@ -271,15 +273,15 @@ class ViceArticleIE(InfoExtractor): 'uploader_id': 'MotherboardTV', 'upload_date': '20140529', }, - 'add_ie': ['Youtube'], + 'add_ie': [YoutubeIE.ie_key()], }, { 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'info_dict': { - 'id': 'e2ed435eb67e43efb66e6ef9a6930a88', + 'id': '57f41d3556a0a80f54726060', 'ext': 'mp4', 'title': "Making The World's First Male Sex Doll", - 'description': 'md5:916078ef0e032d76343116208b6cc2c4', + 'description': 'md5:19b00b215b99961cf869c40fbe9df755', 'uploader': 'vice', 'uploader_id': '57a204088cb727dec794c67b', 'timestamp': 1476919911, @@ -288,6 +290,7 @@ class ViceArticleIE(InfoExtractor): }, 'params': { 'skip_download': True, + 'format': 'bestvideo', }, 'add_ie': [ViceIE.ie_key()], }, { @@ -299,14 +302,11 @@ class ViceArticleIE(InfoExtractor): }] def _real_extract(self, url): - display_id = self._match_id(url) + locale, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - - prefetch_data = self._parse_json(self._search_regex( - r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n', - webpage, 'app state'), display_id)['pageData'] - body = prefetch_data['body'] + article = self._call_api('articles', 'slug', display_id, locale, '''body + embed_code''')[0] + body = article['body'] def _url_res(video_url, ie_key): return { @@ -316,7 +316,7 @@ class ViceArticleIE(InfoExtractor): 'ie_key': ie_key, } - vice_url = ViceIE._extract_url(webpage) + vice_url = ViceIE._extract_url(body) if vice_url: return _url_res(vice_url, ViceIE.ie_key()) @@ -332,6 +332,6 @@ class ViceArticleIE(InfoExtractor): video_url = self._html_search_regex( r'data-video-url="([^"]+)"', - prefetch_data['embed_code'], 'video URL') + article['embed_code'], 'video URL') return _url_res(video_url, ViceIE.ie_key()) From 259ad381730c1b3479c604820bf8333f06f65c64 Mon Sep 17 00:00:00 2001 From: nmeum Date: Sun, 5 Jan 2020 19:26:22 +0100 Subject: [PATCH 42/70] [devscripts/create-github-release] Remove unused import --- devscripts/create-github-release.py | 1 - 1 file changed, 1 deletion(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 6464ef322..2ddfa1096 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import unicode_literals -import base64 import io import json import mimetypes From 233826f68f75ec8ee93c5762bf0cd6fceffab0bb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 5 Jan 2020 21:08:50 +0100 Subject: [PATCH 43/70] [wistia] improve format extraction and extract subtitles(closes #22590) --- youtube_dl/extractor/wistia.py | 68 +++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 0fbc888ec..085514d47 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -13,8 +13,7 @@ from ..utils import ( class WistiaIE(InfoExtractor): _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]{10})' - _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' - _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' + _EMBED_BASE_URL = 'http://fast.wistia.com/embed/' _TESTS = [{ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', @@ -67,10 +66,10 @@ class WistiaIE(InfoExtractor): video_id = self._match_id(url) data_json = self._download_json( - self._API_URL % video_id, video_id, + self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id, # Some videos require this. headers={ - 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, + 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id, }) if data_json.get('error'): @@ -95,27 +94,61 @@ class WistiaIE(InfoExtractor): 'url': aurl, 'width': int_or_none(a.get('width')), 'height': int_or_none(a.get('height')), + 'filesize': int_or_none(a.get('size')), }) else: aext = a.get('ext') - is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' - formats.append({ - 'format_id': atype, + display_name = a.get('display_name') + format_id = atype + if atype and atype.endswith('_video') and display_name: + format_id = '%s-%s' % (atype[:-6], display_name) + f = { + 'format_id': format_id, 'url': aurl, - 'tbr': int_or_none(a.get('bitrate')), - 'vbr': int_or_none(a.get('opt_vbitrate')), - 'width': int_or_none(a.get('width')), - 'height': int_or_none(a.get('height')), - 'filesize': int_or_none(a.get('size')), - 'vcodec': a.get('codec'), - 'container': a.get('container'), - 'ext': 'mp4' if is_m3u8 else aext, - 'protocol': 'm3u8' if is_m3u8 else None, + 'tbr': int_or_none(a.get('bitrate')) or None, 'preference': 1 if atype == 'original' else None, - }) + } + if display_name == 'Audio': + f.update({ + 'vcodec': 'none', + }) + else: + f.update({ + 'width': int_or_none(a.get('width')), + 'height': int_or_none(a.get('height')), + 'vcodec': a.get('codec'), + }) + if a.get('container') == 'm3u8' or aext == 'm3u8': + ts_f = f.copy() + ts_f.update({ + 'ext': 'ts', + 'format_id': f['format_id'].replace('hls-', 'ts-'), + 'url': f['url'].replace('.bin', '.ts'), + }) + formats.append(ts_f) + f.update({ + 'ext': 'mp4', + 'protocol': 'm3u8_native', + }) + else: + f.update({ + 'container': a.get('container'), + 'ext': aext, + 'filesize': int_or_none(a.get('size')), + }) + formats.append(f) self._sort_formats(formats) + subtitles = {} + for caption in data.get('captions', []): + language = caption.get('language') + if not language: + continue + subtitles[language] = [{ + 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language, + }] + return { 'id': video_id, 'title': title, @@ -124,4 +157,5 @@ class WistiaIE(InfoExtractor): 'thumbnails': thumbnails, 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), + 'subtitles': subtitles, } From 0d2306d02beb4c1e50c3f279c109ab25f94ae421 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Mon, 6 Jan 2020 00:34:36 +0100 Subject: [PATCH 44/70] [nrktv:seriebase] Fix extraction (closes #23625) (#23537) --- youtube_dl/extractor/nrk.py | 48 +++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 60933f069..94115534b 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, int_or_none, JSON_LD_RE, + js_to_json, NO_DEFAULT, parse_age_limit, parse_duration, @@ -105,6 +106,7 @@ class NRKBaseIE(InfoExtractor): MESSAGES = { 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', 'ProgramRightsHasExpired': 'Programmet har gått ut', + 'NoProgramRights': 'Ikke tilgjengelig', 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', } message_type = data.get('messageType', '') @@ -255,6 +257,17 @@ class NRKTVIE(NRKBaseIE): ''' % _EPISODE_RE _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _TESTS = [{ + 'url': 'https://tv.nrk.no/program/MDDP12000117', + 'md5': '8270824df46ec629b66aeaa5796b36fb', + 'info_dict': { + 'id': 'MDDP12000117AA', + 'ext': 'mp4', + 'title': 'Alarm Trolltunga', + 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce', + 'duration': 2223, + 'age_limit': 6, + }, + }, { 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'md5': '9a167e54d04671eb6317a37b7bc8a280', 'info_dict': { @@ -266,6 +279,7 @@ class NRKTVIE(NRKBaseIE): 'series': '20 spørsmål', 'episode': '23.05.2014', }, + 'skip': 'NoProgramRights', }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', 'info_dict': { @@ -370,7 +384,24 @@ class NRKTVIE(NRKBaseIE): class NRKTVEpisodeIE(InfoExtractor): _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P[^/]+/sesong/\d+/episode/\d+)' - _TEST = { + _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2', + 'info_dict': { + 'id': 'MUHH36005220BA', + 'ext': 'mp4', + 'title': 'Kro, krig og kjærlighet 2:6', + 'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350', + 'duration': 1563, + 'series': 'Hellums kro', + 'season_number': 1, + 'episode_number': 2, + 'episode': '2:6', + 'age_limit': 6, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8', 'info_dict': { 'id': 'MSUI14000816AA', @@ -386,7 +417,8 @@ class NRKTVEpisodeIE(InfoExtractor): 'params': { 'skip_download': True, }, - } + 'skip': 'ProgramRightsHasExpired', + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -409,7 +441,7 @@ class NRKTVSerieBaseIE(InfoExtractor): (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', r'({.+?})\s*,\s*"[^"]+"\s*\)\s*'), webpage, 'config', default='{}' if not fatal else NO_DEFAULT), - display_id, fatal=False) + display_id, fatal=False, transform_source=js_to_json) if not config: return return try_get( @@ -479,6 +511,14 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P[^/]+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P\d+)' _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/blank', + 'info_dict': { + 'id': 'blank', + 'title': 'Blank', + 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e', + }, + 'playlist_mincount': 30, + }, { # new layout, seasons 'url': 'https://tv.nrk.no/serie/backstage', 'info_dict': { @@ -648,7 +688,7 @@ class NRKSkoleIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099', - 'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6', + 'md5': '18c12c3d071953c3bf8d54ef6b2587b7', 'info_dict': { 'id': '6021', 'ext': 'mp4', From 2f7aa680b79b60d707d7b09818e3ec55748448b2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 6 Jan 2020 14:24:13 +0100 Subject: [PATCH 45/70] [discovery] fix anonymous token extraction(closes #23650) --- youtube_dl/extractor/discovery.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 6a2712cc5..e0139cc86 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -13,8 +13,8 @@ from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): _VALID_URL = r'''(?x)https?:// (?P - (?:(?:www|go)\.)?discovery| - (?:www\.)? + go\.discovery| + www\. (?: investigationdiscovery| discoverylife| @@ -22,8 +22,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): ahctv| destinationamerica| sciencechannel| - tlc| - velocity + tlc )| watch\. (?: @@ -83,7 +82,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): 'authRel': 'authorization', 'client_id': '3020a40c2356a645b4b4', 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), - 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, + 'redirectUri': 'https://www.discovery.com/', })['access_token'] headers = self.geo_verification_headers() From 0264903574f78ef5d950081a1afa542f6a063157 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 6 Jan 2020 14:25:23 +0100 Subject: [PATCH 46/70] [scrippsnetworks] add support for www.discovery.com videos --- youtube_dl/extractor/scrippsnetworks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index afab9591d..36e1b67a9 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -106,7 +106,7 @@ class ScrippsNetworksWatchIE(AWSIE): class ScrippsNetworksIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' _TESTS = [{ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', 'info_dict': { @@ -131,9 +131,13 @@ class ScrippsNetworksIE(InfoExtractor): }, { 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', 'only_matching': True, + }, { + 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', + 'only_matching': True, }] _ACCOUNT_MAP = { 'cookingchanneltv': 2433005105, + 'discovery': 2706091867, 'diynetwork': 2433004575, 'foodnetwork': 2433005105, 'hgtv': 2433004575, From 7bac77413d2fbd7d9c79100ba85b59b08960e6f0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 6 Jan 2020 14:30:02 +0100 Subject: [PATCH 47/70] [scrippsnetworks] correct test case URL --- youtube_dl/extractor/scrippsnetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 36e1b67a9..b40b4c4af 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -132,7 +132,7 @@ class ScrippsNetworksIE(InfoExtractor): 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', 'only_matching': True, }, { - 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', + 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368', 'only_matching': True, }] _ACCOUNT_MAP = { From b2771a28530dab483848a7389616f1b52e96090c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 7 Jan 2020 13:03:32 +0100 Subject: [PATCH 48/70] [dctp] fix format extraction(closes #23656) --- youtube_dl/extractor/dctp.py | 50 +++++++++++++++--------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 04ff214f7..e700f8d86 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -16,10 +16,11 @@ class DctpTvIE(InfoExtractor): _TESTS = [{ # 4x3 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', + 'md5': '3ffbd1556c3fe210724d7088fad723e3', 'info_dict': { 'id': '95eaa4f33dad413aa17b4ee613cccc6c', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', - 'ext': 'flv', + 'ext': 'm4v', 'title': 'Videoinstallation für eine Kaufhausfassade', 'description': 'Kurzfilm', 'thumbnail': r're:^https?://.*\.jpg$', @@ -27,10 +28,6 @@ class DctpTvIE(InfoExtractor): 'timestamp': 1302172322, 'upload_date': '20110407', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { # 16x9 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/', @@ -59,33 +56,26 @@ class DctpTvIE(InfoExtractor): uuid = media['uuid'] title = media['title'] - ratio = '16x9' if media.get('is_wide') else '4x3' - play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio) + is_wide = media.get('is_wide') + formats = [] - servers = self._download_json( - 'http://www.dctp.tv/streaming_servers/', display_id, - note='Downloading server list JSON', fatal=False) + def add_formats(suffix): + templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix) + formats.extend([{ + 'format_id': 'hls-' + suffix, + 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8', + 'protocol': 'm3u8_native', + }, { + 'format_id': 's3-' + suffix, + 'url': templ % 'completed-media.s3.amazonaws.com', + }, { + 'format_id': 'http-' + suffix, + 'url': templ % 'cdn-media.dctp.tv', + }]) - if servers: - endpoint = next( - server['endpoint'] - for server in servers - if url_or_none(server.get('endpoint')) - and 'cloudfront' in server['endpoint']) - else: - endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/' - - app = self._search_regex( - r'^rtmpe?://[^/]+/(?P.*)$', endpoint, 'app') - - formats = [{ - 'url': endpoint, - 'app': app, - 'play_path': play_path, - 'page_url': url, - 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf', - 'ext': 'flv', - }] + add_formats('0500_' + ('16x9' if is_wide else '4x3')) + if is_wide: + add_formats('720p') thumbnails = [] images = media.get('images') From 3cb05b86de3887cfd2f5ebf41fedc09ff3ae6ff3 Mon Sep 17 00:00:00 2001 From: Singwai Chan Date: Tue, 7 Jan 2020 07:11:03 -0700 Subject: [PATCH 49/70] [pandatv] Remove extractor (#23630) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/pandatv.py | 99 ------------------------------ 2 files changed, 100 deletions(-) delete mode 100644 youtube_dl/extractor/pandatv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7b05f5410..1cab440f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -808,7 +808,6 @@ from .packtpub import ( PacktPubIE, PacktPubCourseIE, ) -from .pandatv import PandaTVIE from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py deleted file mode 100644 index 4219802d5..000000000 --- a/youtube_dl/extractor/pandatv.py +++ /dev/null @@ -1,99 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - qualities, -) - - -class PandaTVIE(InfoExtractor): - IE_DESC = '熊猫TV' - _VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://www.panda.tv/66666', - 'info_dict': { - 'id': '66666', - 'title': 're:.+', - 'uploader': '刘杀鸡', - 'ext': 'flv', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Live stream is offline', - }, { - 'url': 'https://www.panda.tv/66666', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_json( - 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id) - - error_code = config.get('errno', 0) - if error_code != 0: - raise ExtractorError( - '%s returned error %s: %s' - % (self.IE_NAME, error_code, config['errmsg']), - expected=True) - - data = config['data'] - video_info = data['videoinfo'] - - # 2 = live, 3 = offline - if video_info.get('status') != '2': - raise ExtractorError( - 'Live stream is offline', expected=True) - - title = data['roominfo']['name'] - uploader = data.get('hostinfo', {}).get('name') - room_key = video_info['room_key'] - stream_addr = video_info.get( - 'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'}) - - # Reverse engineered from web player swf - # (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of - # writing). - plflag0, plflag1 = video_info['plflag'].split('_') - plflag0 = int(plflag0) - 1 - if plflag1 == '21': - plflag0 = 10 - plflag1 = '4' - live_panda = 'live_panda' if plflag0 < 1 else '' - - plflag_auth = self._parse_json(video_info['plflag_list'], video_id) - sign = plflag_auth['auth']['sign'] - ts = plflag_auth['auth']['time'] - rid = plflag_auth['auth']['rid'] - - quality_key = qualities(['OD', 'HD', 'SD']) - suffix = ['_small', '_mid', ''] - formats = [] - for k, v in stream_addr.items(): - if v != '1': - continue - quality = quality_key(k) - if quality <= 0: - continue - for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): - formats.append({ - 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s' - % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid), - 'format_id': '%s-%s' % (k, ext), - 'quality': quality, - 'source_preference': pref, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': self._live_title(title), - 'uploader': uploader, - 'formats': formats, - 'is_live': True, - } From c88debff5d355cf345837fdd7f869db1ce8b9db3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 8 Jan 2020 10:54:05 +0100 Subject: [PATCH 50/70] [naver] improve extraction - improve geo-restriction handling - extract automatic captions - extract uploader metadata - extract VLive HLS formats --- youtube_dl/extractor/naver.py | 158 +++++++++++++++++++++------------- youtube_dl/extractor/vlive.py | 56 ++---------- 2 files changed, 107 insertions(+), 107 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index bb3d94413..f265fc929 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,68 +1,33 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + clean_html, + dict_get, ExtractorError, + get_element_by_class, int_or_none, + try_get, update_url_query, ) -class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' +class NaverBaseIE(InfoExtractor): + _CAPTION_EXT_RE = r'\.(?:ttml|vtt)' - _TESTS = [{ - 'url': 'http://tv.naver.com/v/81652', - 'info_dict': { - 'id': '81652', - 'ext': 'mp4', - 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', - 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', - 'upload_date': '20130903', - }, - }, { - 'url': 'http://tv.naver.com/v/395837', - 'md5': '638ed4c12012c458fefcddfd01f173cd', - 'info_dict': { - 'id': '395837', - 'ext': 'mp4', - 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', - 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7', - 'upload_date': '20150519', - }, - 'skip': 'Georestricted', - }, { - 'url': 'http://tvcast.naver.com/v/81652', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - vid = self._search_regex( - r'videoId["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'video id', fatal=None, group='value') - in_key = self._search_regex( - r'inKey["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'key', default=None, group='value') - - if not vid or not in_key: - error = self._html_search_regex( - r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', - webpage, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - raise ExtractorError('couldn\'t extract vid and key') + def _extract_video_info(self, video_id, vid, key): video_data = self._download_json( 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid, video_id, query={ - 'key': in_key, + 'key': key, }) meta = video_data['meta'] title = meta['subject'] formats = [] + get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or [] def extract_formats(streams, stream_type, query={}): for stream in streams: @@ -73,7 +38,7 @@ class NaverIE(InfoExtractor): encoding_option = stream.get('encodingOption', {}) bitrate = stream.get('bitrate', {}) formats.append({ - 'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')), + 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))), 'url': stream_url, 'width': int_or_none(encoding_option.get('width')), 'height': int_or_none(encoding_option.get('height')), @@ -83,7 +48,7 @@ class NaverIE(InfoExtractor): 'protocol': 'm3u8_native' if stream_type == 'HLS' else None, }) - extract_formats(video_data.get('videos', {}).get('list', []), 'H264') + extract_formats(get_list('video'), 'H264') for stream_set in video_data.get('streams', []): query = {} for param in stream_set.get('keys', []): @@ -101,28 +66,101 @@ class NaverIE(InfoExtractor): 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) self._sort_formats(formats) + replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x) + + def get_subs(caption_url): + if re.search(self._CAPTION_EXT_RE, caption_url): + return [{ + 'url': replace_ext(caption_url, 'ttml'), + }, { + 'url': replace_ext(caption_url, 'vtt'), + }] + else: + return [{'url': caption_url}] + + automatic_captions = {} subtitles = {} - for caption in video_data.get('captions', {}).get('list', []): + for caption in get_list('caption'): caption_url = caption.get('source') if not caption_url: continue - subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({ - 'url': caption_url, - }) + sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles + sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url)) - upload_date = self._search_regex( - r']+class="date".*?(\d{4}\.\d{2}\.\d{2})', - webpage, 'upload date', fatal=False) - if upload_date: - upload_date = upload_date.replace('.', '') + user = meta.get('user', {}) return { 'id': video_id, 'title': title, 'formats': formats, 'subtitles': subtitles, - 'description': self._og_search_description(webpage), - 'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage), + 'automatic_captions': automatic_captions, + 'thumbnail': try_get(meta, lambda x: x['cover']['source']), 'view_count': int_or_none(meta.get('count')), - 'upload_date': upload_date, + 'uploader_id': user.get('id'), + 'uploader': user.get('name'), + 'uploader_url': user.get('url'), } + + +class NaverIE(NaverBaseIE): + _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P\d+)' + _GEO_BYPASS = False + _TESTS = [{ + 'url': 'http://tv.naver.com/v/81652', + 'info_dict': { + 'id': '81652', + 'ext': 'mp4', + 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', + 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', + 'upload_date': '20130903', + 'uploader': '메가스터디, 합격불변의 법칙', + 'uploader_id': 'megastudy', + }, + }, { + 'url': 'http://tv.naver.com/v/395837', + 'md5': '8a38e35354d26a17f73f4e90094febd3', + 'info_dict': { + 'id': '395837', + 'ext': 'mp4', + 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', + 'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3', + 'upload_date': '20150519', + 'uploader': '4가지쇼 시즌2', + 'uploader_id': 'wrappinguser29', + }, + 'skip': 'Georestricted', + }, { + 'url': 'http://tvcast.naver.com/v/81652', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + content = self._download_json( + 'https://tv.naver.com/api/contents/json/v/' + video_id, + video_id, headers=self.geo_verification_headers()) + player_json = content.get('playerJson') or {} + + vid = player_json.get('videoId') + in_key = player_json.get('inKey') + + if not vid or not in_key: + player_auth = player_json.get('playerAuth') + if player_auth == 'notCountry': + self.raise_geo_restricted(countries=['KR']) + elif player_auth == 'notLogin': + self.raise_login_required() + raise ExtractorError('couldn\'t extract vid and key') + info = self._extract_video_info(video_id, vid, in_key) + + clip_info_html = content.get('clipInfoHtml') + if clip_info_html: + info['description'] = clean_html(get_element_by_class('desc', clip_info_html)) + upload_date = self._search_regex( + r']+class="date".*?(\d{4}\.\d{2}\.\d{2})', + clip_info_html, 'upload date', fatal=False) + if upload_date: + info['upload_date'] = upload_date.replace('.', '') + + return info diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index c3429f723..f79531e6f 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -6,22 +6,18 @@ import time import itertools from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_str, -) +from .naver import NaverBaseIE +from ..compat import compat_str from ..utils import ( - dict_get, ExtractorError, - float_or_none, - int_or_none, + merge_dicts, remove_start, try_get, urlencode_postdata, ) -class VLiveIE(InfoExtractor): +class VLiveIE(NaverBaseIE): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P[0-9]+)' _NETRC_MACHINE = 'vlive' @@ -34,6 +30,7 @@ class VLiveIE(InfoExtractor): 'title': "[V LIVE] Girl's Day's Broadcast", 'creator': "Girl's Day", 'view_count': int, + 'uploader_id': 'muploader_a', }, }, { 'url': 'http://www.vlive.tv/video/16937', @@ -44,6 +41,7 @@ class VLiveIE(InfoExtractor): 'creator': 'EXO', 'view_count': int, 'subtitles': 'mincount:12', + 'uploader_id': 'muploader_j', }, 'params': { 'skip_download': True, @@ -187,45 +185,9 @@ class VLiveIE(InfoExtractor): 'This video is only available for CH+ subscribers') long_video_id, key = video_info['vid'], video_info['inkey'] - playinfo = self._download_json( - 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' - % compat_urllib_parse_urlencode({ - 'videoId': long_video_id, - 'key': key, - 'ptc': 'http', - 'doct': 'json', # document type (xml or json) - 'cpt': 'vtt', # captions type (vtt or ttml) - }), video_id) - - formats = [{ - 'url': vid['source'], - 'format_id': vid.get('encodingOption', {}).get('name'), - 'abr': float_or_none(vid.get('bitrate', {}).get('audio')), - 'vbr': float_or_none(vid.get('bitrate', {}).get('video')), - 'width': int_or_none(vid.get('encodingOption', {}).get('width')), - 'height': int_or_none(vid.get('encodingOption', {}).get('height')), - 'filesize': int_or_none(vid.get('size')), - } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] - self._sort_formats(formats) - - view_count = int_or_none(playinfo.get('meta', {}).get('count')) - - subtitles = {} - for caption in playinfo.get('captions', {}).get('list', []): - lang = dict_get(caption, ('locale', 'language', 'country', 'label')) - if lang and caption.get('source'): - subtitles[lang] = [{ - 'ext': 'vtt', - 'url': caption['source']}] - - info = self._get_common_fields(webpage) - info.update({ - 'id': video_id, - 'formats': formats, - 'view_count': view_count, - 'subtitles': subtitles, - }) - return info + return merge_dicts( + self._get_common_fields(webpage), + self._extract_video_info(video_id, long_video_id, key)) def _download_init_page(self, video_id): return self._download_webpage( From 838171630da3691ad4df8a11eeab9b2632fb0bcf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 8 Jan 2020 12:55:33 +0100 Subject: [PATCH 51/70] [naver] improve metadata extraction --- youtube_dl/extractor/naver.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index f265fc929..61fc59126 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -8,8 +8,8 @@ from ..utils import ( clean_html, dict_get, ExtractorError, - get_element_by_class, int_or_none, + parse_duration, try_get, update_url_query, ) @@ -113,6 +113,7 @@ class NaverIE(NaverBaseIE): 'ext': 'mp4', 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', + 'timestamp': 1378200754, 'upload_date': '20130903', 'uploader': '메가스터디, 합격불변의 법칙', 'uploader_id': 'megastudy', @@ -125,6 +126,7 @@ class NaverIE(NaverBaseIE): 'ext': 'mp4', 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', 'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3', + 'timestamp': 1432030253, 'upload_date': '20150519', 'uploader': '4가지쇼 시즌2', 'uploader_id': 'wrappinguser29', @@ -138,29 +140,27 @@ class NaverIE(NaverBaseIE): def _real_extract(self, url): video_id = self._match_id(url) content = self._download_json( - 'https://tv.naver.com/api/contents/json/v/' + video_id, + 'https://tv.naver.com/api/json/v/' + video_id, video_id, headers=self.geo_verification_headers()) - player_json = content.get('playerJson') or {} + player_info_json = content.get('playerInfoJson') or {} + current_clip = player_info_json.get('currentClip') or {} - vid = player_json.get('videoId') - in_key = player_json.get('inKey') + vid = current_clip.get('videoId') + in_key = current_clip.get('inKey') if not vid or not in_key: - player_auth = player_json.get('playerAuth') + player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth']) if player_auth == 'notCountry': self.raise_geo_restricted(countries=['KR']) elif player_auth == 'notLogin': self.raise_login_required() raise ExtractorError('couldn\'t extract vid and key') info = self._extract_video_info(video_id, vid, in_key) - - clip_info_html = content.get('clipInfoHtml') - if clip_info_html: - info['description'] = clean_html(get_element_by_class('desc', clip_info_html)) - upload_date = self._search_regex( - r']+class="date".*?(\d{4}\.\d{2}\.\d{2})', - clip_info_html, 'upload date', fatal=False) - if upload_date: - info['upload_date'] = upload_date.replace('.', '') - + info.update({ + 'description': clean_html(current_clip.get('description')), + 'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000), + 'duration': parse_duration(current_clip.get('displayPlayTime')), + 'like_count': int_or_none(current_clip.get('recommendPoint')), + 'age_limit': 19 if current_clip.get('adult') else None, + }) return info From a71c1d1a5a54afc7f24acf3af7f1afd610c648f2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 8 Jan 2020 22:42:53 +0100 Subject: [PATCH 52/70] [cloudflarestream] improve extraction - add support for bytehighway.net domain - add support for signed URLs - extract thumbnail --- youtube_dl/extractor/cloudflarestream.py | 25 +++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py index 8ff2c6531..9026c7c90 100644 --- a/youtube_dl/extractor/cloudflarestream.py +++ b/youtube_dl/extractor/cloudflarestream.py @@ -1,20 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 import re from .common import InfoExtractor class CloudflareStreamIE(InfoExtractor): + _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _VALID_URL = r'''(?x) https?:// (?: - (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/| - embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo= + (?:watch\.)?%s/| + embed\.%s/embed/[^/]+\.js\?.*?\bvideo= ) - (?P[\da-f]+) - ''' + (?P[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+) + ''' % (_DOMAIN_RE, _DOMAIN_RE) _TESTS = [{ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'info_dict': { @@ -46,18 +48,23 @@ class CloudflareStreamIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' + base_url = 'https://%s/%s/' % (domain, video_id) + if '.' in video_id: + video_id = self._parse_json(base64.urlsafe_b64decode( + video_id.split('.')[1]), video_id)['sub'] + manifest_base_url = base_url + 'manifest/video.' formats = self._extract_m3u8_formats( - 'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id, - video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False) + manifest_base_url + 'm3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) formats.extend(self._extract_mpd_formats( - 'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id, - video_id, mpd_id='dash', fatal=False)) + manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)) self._sort_formats(formats) return { 'id': video_id, 'title': video_id, + 'thumbnail': base_url + 'thumbnails/thumbnail.jpg', 'formats': formats, } From 483b858d49eabaad2c521425eb892c1330d4f525 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 8 Jan 2020 23:07:41 +0100 Subject: [PATCH 53/70] [cloudflarestream] import embed URL extraction --- youtube_dl/extractor/cloudflarestream.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py index 9026c7c90..2fdcfbb3a 100644 --- a/youtube_dl/extractor/cloudflarestream.py +++ b/youtube_dl/extractor/cloudflarestream.py @@ -9,14 +9,16 @@ from .common import InfoExtractor class CloudflareStreamIE(InfoExtractor): _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' + _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE + _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _VALID_URL = r'''(?x) https?:// (?: (?:watch\.)?%s/| - embed\.%s/embed/[^/]+\.js\?.*?\bvideo= + %s ) - (?P[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+) - ''' % (_DOMAIN_RE, _DOMAIN_RE) + (?P%s) + ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE) _TESTS = [{ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'info_dict': { @@ -43,7 +45,7 @@ class CloudflareStreamIE(InfoExtractor): return [ mobj.group('url') for mobj in re.finditer( - r']+\bsrc=(["\'])(?P(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE), webpage)] def _real_extract(self, url): From d4e0cd69efe6240a188eac9cce296b57b06661ca Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 10 Jan 2020 05:06:45 +0100 Subject: [PATCH 54/70] [lego] fix extraction and extract subtitle(closes #23687) --- youtube_dl/extractor/lego.py | 183 +++++++++++++++++++---------------- 1 file changed, 102 insertions(+), 81 deletions(-) diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py index b312e77f1..1e3c19dfd 100644 --- a/youtube_dl/extractor/lego.py +++ b/youtube_dl/extractor/lego.py @@ -2,23 +2,24 @@ from __future__ import unicode_literals import re +import uuid from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( - unescapeHTML, - parse_duration, - get_element_by_class, + ExtractorError, + int_or_none, + qualities, ) class LEGOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]{32})' _TESTS = [{ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 'md5': 'f34468f176cfd76488767fc162c405fa', 'info_dict': { - 'id': '55492d823b1b4d5e985787fa8c2973b1', + 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US', 'ext': 'mp4', 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', @@ -26,103 +27,123 @@ class LEGOIE(InfoExtractor): }, { # geo-restricted but the contentUrl contain a valid url 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', - 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', + 'md5': 'c7420221f7ffd03ff056f9db7f8d807c', 'info_dict': { - 'id': '13bdc2299ab24d9685701a915b3d71e7', + 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL', 'ext': 'mp4', - 'title': 'Aflevering 20 - Helden van het koninkrijk', + 'title': 'Aflevering 20: Helden van het koninkrijk', 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', + 'age_limit': 5, }, }, { - # special characters in title - 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', + # with subtitle + 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba', 'info_dict': { - 'id': '9685ee9d12e84ff38e84b4e3d0db533d', + 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL', 'ext': 'mp4', - 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters', - 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', + 'title': 'De kleine puppy', + 'description': 'md5:5b725471f849348ac73f2e12cfb4be06', + 'age_limit': 1, + 'subtitles': { + 'nl': [{ + 'ext': 'srt', + 'url': r're:^https://.+\.srt$', + }], + }, }, 'params': { 'skip_download': True, }, }] - _BITRATES = [256, 512, 1024, 1536, 2560] + _QUALITIES = { + 'Lowest': (64, 180, 320), + 'Low': (64, 270, 480), + 'Medium': (96, 360, 640), + 'High': (128, 540, 960), + 'Highest': (128, 720, 1280), + } def _real_extract(self, url): locale, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) - title = get_element_by_class('video-header', webpage).strip() - progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/' - streaming_base = 'http://legoprod-f.akamaihd.net/' - content_url = self._html_search_meta('contentUrl', webpage) - path = self._search_regex( - r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)', - content_url, 'video path', default=None) - if not path: - player_url = self._proto_relative_url(self._search_regex( - r']+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)', - webpage, 'player url', default=None)) - if not player_url: - base_url = self._proto_relative_url(self._search_regex( - r'data-baseurl="([^"]+)"', webpage, 'base url', - default='http://www.lego.com/%s/mediaplayer/video/' % locale)) - player_url = base_url + video_id - player_webpage = self._download_webpage(player_url, video_id) - video_data = self._parse_json(unescapeHTML(self._search_regex( - r"video='([^']+)'", player_webpage, 'video data')), video_id) - progressive_base = self._search_regex( - r'data-video-progressive-url="([^"]+)"', - player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') - streaming_base = self._search_regex( - r'data-video-streaming-url="([^"]+)"', - player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') - item_id = video_data['ItemId'] + countries = [locale.split('-')[1].upper()] + self._initialize_geo_bypass({ + 'countries': countries, + }) - net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) - base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) - path = '/'.join([net_storage_path, base_path]) - streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) + try: + item = self._download_json( + # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video + 'https://services.slingshot.lego.com/mediaplayer/v2', + video_id, query={ + 'videoId': '%s_%s' % (uuid.UUID(video_id), locale), + }, headers=self.geo_verification_headers()) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451: + self.raise_geo_restricted(countries=countries) + raise - formats = self._extract_akamai_formats( - '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id) - m3u8_formats = list(filter( - lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none', - formats)) - if len(m3u8_formats) == len(self._BITRATES): - self._sort_formats(m3u8_formats) - for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats): - progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate) - mp4_f = m3u8_format.copy() - mp4_f.update({ - 'url': progressive_base_url + 'mp4', - 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'), - 'protocol': 'http', - }) - web_f = { - 'url': progressive_base_url + 'webm', - 'format_id': m3u8_format['format_id'].replace('hls', 'webm'), - 'width': m3u8_format['width'], - 'height': m3u8_format['height'], - 'tbr': m3u8_format.get('tbr'), - 'ext': 'webm', + video = item['Video'] + video_id = video['Id'] + title = video['Title'] + + q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest']) + formats = [] + for video_source in item.get('VideoFormats', []): + video_source_url = video_source.get('Url') + if not video_source_url: + continue + video_source_format = video_source.get('Format') + if video_source_format == 'F4M': + formats.extend(self._extract_f4m_formats( + video_source_url, video_id, + f4m_id=video_source_format, fatal=False)) + elif video_source_format == 'M3U8': + formats.extend(self._extract_m3u8_formats( + video_source_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=video_source_format, fatal=False)) + else: + video_source_quality = video_source.get('Quality') + format_id = [] + for v in (video_source_format, video_source_quality): + if v: + format_id.append(v) + f = { + 'format_id': '-'.join(format_id), + 'quality': q(video_source_quality), + 'url': video_source_url, } - formats.extend([web_f, mp4_f]) - else: - for bitrate in self._BITRATES: - for ext in ('web', 'mp4'): - formats.append({ - 'format_id': '%s-%s' % (ext, bitrate), - 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext), - 'tbr': bitrate, - 'ext': ext, - }) + quality = self._QUALITIES.get(video_source_quality) + if quality: + f.update({ + 'abr': quality[0], + 'height': quality[1], + 'width': quality[2], + }), + formats.append(f) self._sort_formats(formats) + subtitles = {} + sub_file_id = video.get('SubFileId') + if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000': + net_storage_path = video.get('NetstoragePath') + invariant_id = video.get('InvariantId') + video_file_id = video.get('VideoFileId') + video_version = video.get('VideoVersion') + if net_storage_path and invariant_id and video_file_id and video_version: + subtitles.setdefault(locale[:2], []).append({ + 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version), + }) + return { 'id': video_id, 'title': title, - 'description': self._html_search_meta('description', webpage), - 'thumbnail': self._html_search_meta('thumbnail', webpage), - 'duration': parse_duration(self._html_search_meta('duration', webpage)), + 'description': video.get('Description'), + 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'), + 'duration': int_or_none(video.get('Length')), 'formats': formats, + 'subtitles': subtitles, + 'age_limit': int_or_none(video.get('AgeFrom')), + 'season': video.get('SeasonTitle'), + 'season_number': int_or_none(video.get('Season')) or None, + 'episode_number': int_or_none(video.get('Episode')) or None, } From 3fdf573148abaa4e3b0dc9a8ae7ee7d1bdab2289 Mon Sep 17 00:00:00 2001 From: cdarlint Date: Sat, 11 Jan 2020 02:34:26 +0800 Subject: [PATCH 55/70] [safari] Fix kaltura session extraction (closes #23679) (#23670) --- youtube_dl/extractor/safari.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index bd9ee1647..4942437c7 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -165,7 +165,8 @@ class SafariIE(SafariBaseIE): kaltura_session = self._download_json( '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), video_id, 'Downloading kaltura session JSON', - 'Unable to download kaltura session JSON', fatal=False) + 'Unable to download kaltura session JSON', fatal=False, + headers={'Accept': 'application/json'}) if kaltura_session: session = kaltura_session.get('session') if session: From 9ba179c1fabe8f1700f5cbd778ef2030fa246d6e Mon Sep 17 00:00:00 2001 From: Johannes N <31795504+jonolt@users.noreply.github.com> Date: Fri, 10 Jan 2020 19:51:15 +0100 Subject: [PATCH 56/70] [orf:fm4] Fix extraction (#23599) --- youtube_dl/extractor/orf.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 3425f7602..816bd212e 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -161,9 +161,6 @@ class ORFRadioIE(InfoExtractor): show_date = mobj.group('date') show_id = mobj.group('show') - if station == 'fm4': - show_id = '4%s' % show_id - data = self._download_json( 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date), show_id @@ -195,10 +192,10 @@ class ORFRadioIE(InfoExtractor): class ORFFM4IE(ORFRadioIE): IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' - _VALID_URL = r'https?://(?Pfm4)\.orf\.at/player/(?P[0-9]+)/(?P\w+)' + _VALID_URL = r'https?://(?Pfm4)\.orf\.at/player/(?P[0-9]+)/(?P4\w+)' _TEST = { - 'url': 'http://fm4.orf.at/player/20170107/CC', + 'url': 'http://fm4.orf.at/player/20170107/4CC', 'md5': '2b0be47375432a7ef104453432a19212', 'info_dict': { 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295', From aca2fd222fc951dd8e177d4c5584f5276e719825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Jan 2020 02:18:36 +0700 Subject: [PATCH 57/70] [orf:radio] Clean description and improve extraction --- youtube_dl/extractor/orf.py | 46 ++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 816bd212e..45fc745a3 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -6,12 +6,14 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, determine_ext, float_or_none, HEADRequest, int_or_none, orderedSet, remove_end, + str_or_none, strip_jsonp, unescapeHTML, unified_strdate, @@ -162,30 +164,37 @@ class ORFRadioIE(InfoExtractor): show_id = mobj.group('show') data = self._download_json( - 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date), - show_id - ) + 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' + % (station, show_id, show_date), show_id) - def extract_entry_dict(info, title, subtitle): - return { - 'id': info['loopStreamId'].replace('.mp3', ''), - 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']), + entries = [] + for info in data['streams']: + loop_stream_id = str_or_none(info.get('loopStreamId')) + if not loop_stream_id: + continue + title = str_or_none(data.get('title')) + if not title: + continue + start = int_or_none(info.get('start'), scale=1000) + end = int_or_none(info.get('end'), scale=1000) + duration = end - start if end and start else None + entries.append({ + 'id': loop_stream_id.replace('.mp3', ''), + 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id), 'title': title, - 'description': subtitle, - 'duration': (info['end'] - info['start']) / 1000, - 'timestamp': info['start'] / 1000, + 'description': clean_html(data.get('subtitle')), + 'duration': duration, + 'timestamp': start, 'ext': 'mp3', - 'series': data.get('programTitle') - } - - entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] + 'series': data.get('programTitle'), + }) return { '_type': 'playlist', 'id': show_id, - 'title': data['title'], - 'description': data['subtitle'], - 'entries': entries + 'title': data.get('title'), + 'description': clean_html(data.get('subtitle')), + 'entries': entries, } @@ -206,7 +215,8 @@ class ORFFM4IE(ORFRadioIE): 'timestamp': 1483819257, 'upload_date': '20170107', }, - 'skip': 'Shows from ORF radios are only available for 7 days.' + 'skip': 'Shows from ORF radios are only available for 7 days.', + 'only_matching': True, } From 10a5091e58077cc0fd916a21df4c702f8d519702 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 12 Jan 2020 11:55:11 +0100 Subject: [PATCH 58/70] [twitter] add support for promo_video_website cards(closes #23711) --- youtube_dl/extractor/twitter.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 5f8d90fb4..01468981c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -251,10 +251,10 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'Simon Vertugo - BEAT PROD: @suhmeduh #Damndaniel', + 'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'Simon Vertugo', + 'uploader': 'simon vetugo', 'uploader_id': 'simonvertugo', 'duration': 30.0, 'timestamp': 1455777459, @@ -376,6 +376,10 @@ class TwitterIE(TwitterBaseIE): # Twitch Clip Embed 'url': 'https://twitter.com/GunB1g/status/1163218564784017422', 'only_matching': True, + }, { + # promo_video_website card + 'url': 'https://twitter.com/GunB1g/status/1163218564784017422', + 'only_matching': True, }] def _real_extract(self, url): @@ -458,10 +462,11 @@ class TwitterIE(TwitterBaseIE): return try_get(o, lambda x: x[x['type'].lower() + '_value']) card_name = card['name'].split(':')[-1] - if card_name == 'amplify': - formats = self._extract_formats_from_vmap_url( - get_binding_value('amplify_url_vmap'), - get_binding_value('amplify_content_id') or twid) + if card_name in ('amplify', 'promo_video_website'): + is_amplify = card_name == 'amplify' + vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url') + content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player')) + formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid) self._sort_formats(formats) thumbnails = [] From bd2c211fcc3b0390a1b404fa01409d455859caa8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 12 Jan 2020 17:34:21 +0100 Subject: [PATCH 59/70] [vodplatform] add support for embed.kwikmotion.com domain --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/vodplatform.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 743ef47db..a4aef106f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2960,7 +2960,7 @@ class GenericIE(InfoExtractor): # Look for VODPlatform embeds mobj = re.search( - r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1', + r']+src=(["\'])(?P(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1', webpage) if mobj is not None: return self.url_result( diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index 239644340..74d2257e7 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -6,8 +6,8 @@ from ..utils import unescapeHTML class VODPlatformIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P[^/?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P[^/?#]+)' + _TESTS = [{ # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', 'md5': '1db2b7249ce383d6be96499006e951fc', @@ -16,7 +16,10 @@ class VODPlatformIE(InfoExtractor): 'ext': 'mp4', 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', } - } + }, { + 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 3fc56635b7d375b262ac3c15aaae549227b5227e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jan 2020 21:46:56 +0700 Subject: [PATCH 60/70] [ndr:base:embed] Improve thumbnails extraction (closes #23731) --- youtube_dl/extractor/ndr.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index aec2ea133..9c8bf05af 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -9,6 +9,8 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, + try_get, + urljoin, ) @@ -220,11 +222,17 @@ class NDREmbedBaseIE(InfoExtractor): upload_date = ppjson.get('config', {}).get('publicationDate') duration = int_or_none(config.get('duration')) - thumbnails = [{ - 'id': thumbnail.get('quality') or thumbnail_id, - 'url': thumbnail['src'], - 'preference': quality_key(thumbnail.get('quality')), - } for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')] + thumbnails = [] + poster = try_get(config, lambda x: x['poster'], dict) or {} + for thumbnail_id, thumbnail in poster.items(): + thumbnail_url = urljoin(url, thumbnail.get('src')) + if not thumbnail_url: + continue + thumbnails.append({ + 'id': thumbnail.get('quality') or thumbnail_id, + 'url': thumbnail_url, + 'preference': quality_key(thumbnail.get('quality')), + }) return { 'id': video_id, From 628e5bc0b715c239e5fe367bc538a1c1fa563787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jan 2020 23:48:36 +0700 Subject: [PATCH 61/70] [canvas] Add support for new API endpoint and update tests (closes #17680, closes #18629) --- youtube_dl/extractor/canvas.py | 83 +++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index c506bc5dd..8667a0d04 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -13,6 +13,8 @@ from ..utils import ( int_or_none, merge_dicts, parse_iso8601, + str_or_none, + url_or_none, ) @@ -20,15 +22,15 @@ class CanvasIE(InfoExtractor): _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', - 'md5': '90139b746a0a9bd7bb631283f6e2a64e', + 'md5': '68993eda72ef62386a15ea2cf3c93107', 'info_dict': { 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Nachtwacht: De Greystook', - 'description': 'md5:1db3f5dc4c7109c821261e7512975be7', + 'description': 'Nachtwacht: De Greystook', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1468.03, + 'duration': 1468.04, }, 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], }, { @@ -39,23 +41,45 @@ class CanvasIE(InfoExtractor): 'HLS': 'm3u8_native', 'HLS_AES': 'm3u8', } + _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) site_id, video_id = mobj.group('site_id'), mobj.group('id') + # Old API endpoint, serves more formats but may fail for some videos data = self._download_json( 'https://mediazone.vrt.be/api/v1/%s/assets/%s' - % (site_id, video_id), video_id) + % (site_id, video_id), video_id, 'Downloading asset JSON', + 'Unable to download asset JSON', fatal=False) + + # New API endpoint + if not data: + token = self._download_json( + '%s/tokens' % self._REST_API_BASE, video_id, + 'Downloading token', data=b'', + headers={'Content-Type': 'application/json'})['vrtPlayerToken'] + data = self._download_json( + '%s/videos/%s' % (self._REST_API_BASE, video_id), + video_id, 'Downloading video JSON', fatal=False, query={ + 'vrtPlayerToken': token, + 'client': '%s@PROD' % site_id, + }, expected_status=400) + message = data.get('message') + if message and not data.get('title'): + if data.get('code') == 'AUTHENTICATION_REQUIRED': + self.raise_login_required(message) + raise ExtractorError(message, expected=True) title = data['title'] description = data.get('description') formats = [] for target in data['targetUrls']: - format_url, format_type = target.get('url'), target.get('type') + format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type')) if not format_url or not format_type: continue + format_type = format_type.upper() if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], @@ -134,20 +158,20 @@ class CanvasEenIE(InfoExtractor): }, 'skip': 'Pagina niet gevonden', }, { - 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', + 'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan', 'info_dict': { - 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', - 'display_id': 'herbekijk-sorry-voor-alles', + 'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8', + 'display_id': 'emma-pakt-thilly-aan', 'ext': 'mp4', - 'title': 'Herbekijk Sorry voor alles', - 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', + 'title': 'Emma pakt Thilly aan', + 'description': 'md5:c5c9b572388a99b2690030afa3f3bad7', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 3788.06, + 'duration': 118.24, }, 'params': { 'skip_download': True, }, - 'skip': 'Episode no longer available', + 'expected_warnings': ['is not a supported codec'], }, { 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', 'only_matching': True, @@ -183,19 +207,44 @@ class VrtNUIE(GigyaBaseIE): IE_DESC = 'VrtNU.be' _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?Pvrtnu)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ + # Available via old API endpoint 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', 'info_dict': { 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'De zwarte weduwe', - 'description': 'md5:d90c21dced7db869a85db89a623998d4', + 'description': 'md5:db1227b0f318c849ba5eab1fef895ee4', 'duration': 1457.04, 'thumbnail': r're:^https?://.*\.jpg$', - 'season': '1', + 'season': 'Season 1', 'season_number': 1, 'episode_number': 1, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', + 'params': { + 'username': '', + 'password': '', + }, + 'expected_warnings': ['is not a supported codec'], + }, { + # Only available via new API endpoint + 'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/', + 'info_dict': { + 'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1', + 'ext': 'mp4', + 'title': 'Aflevering 5', + 'description': 'Wie valt door de mand tijdens een missie?', + 'duration': 2967.06, + 'season': 'Season 1', + 'season_number': 1, + 'episode_number': 5, + }, + 'skip': 'This video is only available for registered users', + 'params': { + 'username': '', + 'password': '', + }, + 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'], }] _NETRC_MACHINE = 'vrtnu' _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' From 14bb191634e2e8ab89a2e94e9e4d009b6406c8b2 Mon Sep 17 00:00:00 2001 From: jnozsc Date: Tue, 14 Jan 2020 10:09:08 -0800 Subject: [PATCH 62/70] [travis] Add flake8 job (#23720) --- .travis.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 14d95fa84..51afd469a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ dist: trusty env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download -matrix: +jobs: include: - python: 3.7 dist: xenial @@ -35,6 +35,11 @@ matrix: env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - env: JYTHON=true; YTDL_TEST_SET=download + - name: flake8 + python: 3.8 + dist: xenial + install: pip install flake8 + script: flake8 . fast_finish: true allow_failures: - env: YTDL_TEST_SET=download From bfdc8340c90e0ce495d2927e7d555daa5ac05670 Mon Sep 17 00:00:00 2001 From: Moritz Patelscheck Date: Fri, 20 Dec 2019 00:02:39 +0100 Subject: [PATCH 63/70] [yourporn] Fix extraction (closes #21645, closes #22255, closes #23459) --- youtube_dl/extractor/yourporn.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 8a2d5f63b..98347491e 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( parse_duration, urljoin, @@ -8,9 +9,9 @@ from ..utils import ( class YourPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:yourporn\.sexy|sxyprn\.com)/post/(?P[^/?#&.]+)' + _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P[^/?#&.]+)' _TESTS = [{ - 'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html', + 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', 'md5': '6f8682b6464033d87acaa7a8ff0c092e', 'info_dict': { 'id': '57ffcb2e1179b', @@ -33,11 +34,19 @@ class YourPornIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_url = urljoin(url, self._parse_json( + parts = self._parse_json( self._search_regex( r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn5/') + video_id)[video_id].split('/') + + num = 0 + for c in parts[6] + parts[7]: + if c.isnumeric(): + num += int(c) + parts[5] = compat_str(int(parts[5]) - num) + parts[1] += '8' + video_url = urljoin(url, '/'.join(parts)) title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', @@ -54,4 +63,5 @@ class YourPornIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'age_limit': 18, + 'ext': 'mp4', } From d7c55f226dd8fafb424eefb078f41b3fc410588b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Jan 2020 01:34:01 +0700 Subject: [PATCH 64/70] [ChangeLog] Actualize [ci skip] --- ChangeLog | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/ChangeLog b/ChangeLog index c33169cd8..cdf4dbc96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,35 @@ +version + +Extractors +* [yourporn] Fix extraction (#21645, #22255, #23459) ++ [canvas] Add support for new API endpoint (#17680, #18629) +* [ndr:base:embed] Improve thumbnails extraction (#23731) ++ [vodplatform] Add support for embed.kwikmotion.com domain ++ [twitter] Add support for promo_video_website cards (#23711) +* [orf:radio] Clean description and improve extraction +* [orf:fm4] Fix extraction (#23599) +* [safari] Fix kaltura session extraction (#23679, #23670) +* [lego] Fix extraction and extract subtitle (#23687) +* [cloudflarestream] Improve extraction + + Add support for bytehighway.net domain + + Add support for signed URLs + + Extract thumbnail +* [naver] Improve extraction + * Improve geo-restriction handling + + Extract automatic captions + + Extract uploader metadata + + Extract VLive HLS formats + * Improve metadata extraction +- [pandatv] Remove extractor (#23630) +* [dctp] Fix format extraction (#23656) ++ [scrippsnetworks] Add support for www.discovery.com videos +* [discovery] Fix anonymous token extraction (#23650) +* [nrktv:seriebase] Fix extraction (#23625, #23537) +* [wistia] Improve format extraction and extract subtitles (#22590) +* [vice] Improve extraction (#23631) +* [redtube] Detect private videos (#23518) + + version 2020.01.01 Extractors From e8cf0dbdd8aa4f2dcd521d0bf7e7798e87867b52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Jan 2020 01:37:29 +0700 Subject: [PATCH 65/70] release 2020.01.15 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 1 - youtube_dl/version.py | 2 +- 8 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 97b8afcf9..cf8e6e411 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.01.01** +- [ ] I've verified that I'm running youtube-dl version **2020.01.15** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.01.01 + [debug] youtube-dl version 2020.01.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index de6c44a65..babbda464 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.01.01** +- [ ] I've verified that I'm running youtube-dl version **2020.01.15** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index a9dd5ca52..5498983ff 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.01.01** +- [ ] I've verified that I'm running youtube-dl version **2020.01.15** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8347903ea..d46735951 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.01.01** +- [ ] I've verified that I'm running youtube-dl version **2020.01.15** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.01.01 + [debug] youtube-dl version 2020.01.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 92228513c..748b64756 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.01.01** +- [ ] I've verified that I'm running youtube-dl version **2020.01.15** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index cdf4dbc96..cc7fc4323 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.01.15 Extractors * [yourporn] Fix extraction (#21645, #22255, #23459) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e471aa79a..e9a8cc27a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -628,7 +628,6 @@ - **OutsideTV** - **PacktPub** - **PacktPubCourse** - - **PandaTV**: 熊猫TV - **pandora.tv**: 판도라TV - **ParamountNetwork** - **parliamentlive.tv**: UK parliament videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8ad2df674..932b138a9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.01.01' +__version__ = '2020.01.15' From e4e5fa6e3c1c2ca2d48dfb5a8b1f734bd627b2dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Jan 2020 04:13:10 +0700 Subject: [PATCH 66/70] [soundcloud] Restore previews extraction (closes #23739) --- youtube_dl/extractor/soundcloud.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 62e9d8643..a0b09f5b1 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -96,7 +96,7 @@ class SoundcloudIE(InfoExtractor): 'repost_count': int, } }, - # not streamable song + # not streamable song, preview { 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'info_dict': { @@ -119,7 +119,6 @@ class SoundcloudIE(InfoExtractor): # rtmp 'skip_download': True, }, - 'skip': 'Preview', }, # private link { @@ -346,9 +345,9 @@ class SoundcloudIE(InfoExtractor): }) def invalid_url(url): - return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url) + return not url or url in format_urls - def add_format(f, protocol): + def add_format(f, protocol, is_preview=False): mobj = re.search(r'\.(?P\d+)\.(?P[0-9a-z]{3,4})(?=[/?])', stream_url) if mobj: for k, v in mobj.groupdict().items(): @@ -361,12 +360,16 @@ class SoundcloudIE(InfoExtractor): v = f.get(k) if v: format_id_list.append(v) + preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url']) + if preview: + format_id_list.append('preview') abr = f.get('abr') if abr: f['abr'] = int(abr) f.update({ 'format_id': '_'.join(format_id_list), 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', + 'preference': -10 if preview else None, }) formats.append(f) @@ -377,7 +380,7 @@ class SoundcloudIE(InfoExtractor): if not isinstance(t, dict): continue format_url = url_or_none(t.get('url')) - if not format_url or t.get('snipped') or '/preview/' in format_url: + if not format_url: continue stream = self._download_json( format_url, track_id, query=query, fatal=False) @@ -400,7 +403,8 @@ class SoundcloudIE(InfoExtractor): add_format({ 'url': stream_url, 'ext': ext, - }, 'http' if protocol == 'progressive' else protocol) + }, 'http' if protocol == 'progressive' else protocol, + t.get('snipped') or '/preview/' in format_url) if not formats: # Old API, does not work for some tracks (e.g. From 90ea83c64d904587992105fb4506e80f6abb28b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Jan 2020 04:32:05 +0700 Subject: [PATCH 67/70] [orf:tvthek] Improve geo restricted videos detection (closes #23741) --- youtube_dl/extractor/orf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 45fc745a3..d54b8ace6 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -90,8 +90,11 @@ class ORFTVthekIE(InfoExtractor): format_id = '-'.join(format_id_list) ext = determine_ext(src) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + src, video_id, 'mp4', m3u8_id=format_id, fatal=False) + if any('/geoprotection' in f['url'] for f in m3u8_formats): + self.raise_geo_restricted() + formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( src, video_id, f4m_id=format_id, fatal=False)) From a9866c0366bd6399b0f757527425466a3be4d128 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 15 Jan 2020 14:02:57 +0100 Subject: [PATCH 68/70] [zype] improve extraction - extract subtitles(closes #21258) - support URLs with alternative keys/tokens(#21258) - extract more metadata --- youtube_dl/extractor/generic.py | 3 + youtube_dl/extractor/trunews.py | 49 ++------------ youtube_dl/extractor/zype.py | 111 +++++++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a4aef106f..3c002472f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2098,6 +2098,9 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Smoky Barbecue Favorites', 'thumbnail': r're:^https?://.*\.jpe?g', + 'description': 'md5:5ff01e76316bd8d46508af26dc86023b', + 'upload_date': '20170909', + 'timestamp': 1504915200, }, 'add_ie': [ZypeIE.ie_key()], 'params': { diff --git a/youtube_dl/extractor/trunews.py b/youtube_dl/extractor/trunews.py index b0c7caabf..cca5b5ceb 100644 --- a/youtube_dl/extractor/trunews.py +++ b/youtube_dl/extractor/trunews.py @@ -1,21 +1,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - dict_get, - float_or_none, - int_or_none, - unified_timestamp, - update_url_query, - url_or_none, -) class TruNewsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P[^/?#&]+)' _TEST = { 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', - 'md5': 'a19c024c3906ff954fac9b96ce66bb08', 'info_dict': { 'id': '5c5a21e65d3c196e1c0020cc', 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', @@ -28,48 +19,16 @@ class TruNewsIE(InfoExtractor): }, 'add_ie': ['Zype'], } + _ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt' def _real_extract(self, url): display_id = self._match_id(url) - video = self._download_json( + zype_id = self._download_json( 'https://api.zype.com/videos', display_id, query={ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H', 'per_page': 1, 'active': 'true', 'friendly_title': display_id, - })['response'][0] - - zype_id = video['_id'] - - thumbnails = [] - thumbnails_list = video.get('thumbnails') - if isinstance(thumbnails_list, list): - for thumbnail in thumbnails_list: - if not isinstance(thumbnail, dict): - continue - thumbnail_url = url_or_none(thumbnail.get('url')) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - return { - '_type': 'url_transparent', - 'url': update_url_query( - 'https://player.zype.com/embed/%s.js' % zype_id, - {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}), - 'ie_key': 'Zype', - 'id': zype_id, - 'display_id': display_id, - 'title': video.get('title'), - 'description': dict_get(video, ('description', 'ott_description', 'short_description')), - 'duration': int_or_none(video.get('duration')), - 'timestamp': unified_timestamp(video.get('published_at')), - 'average_rating': float_or_none(video.get('rating')), - 'view_count': int_or_none(video.get('request_count')), - 'thumbnails': thumbnails, - } + })['response'][0]['_id'] + return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id) diff --git a/youtube_dl/extractor/zype.py b/youtube_dl/extractor/zype.py index 3b16e703b..2e2e97a0c 100644 --- a/youtube_dl/extractor/zype.py +++ b/youtube_dl/extractor/zype.py @@ -4,10 +4,20 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + dict_get, + ExtractorError, + int_or_none, + js_to_json, + parse_iso8601, +) class ZypeIE(InfoExtractor): - _VALID_URL = r'https?://player\.zype\.com/embed/(?P[\da-fA-F]+)\.js\?.*?api_key=[^&]+' + _ID_RE = r'[\da-fA-F]+' + _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)=' + _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P%s)' % _ID_RE)) _TEST = { 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false', 'md5': 'eaee31d474c76a955bdaba02a505c595', @@ -16,6 +26,9 @@ class ZypeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Smoky Barbecue Favorites', 'thumbnail': r're:^https?://.*\.jpe?g', + 'description': 'md5:5ff01e76316bd8d46508af26dc86023b', + 'timestamp': 1504915200, + 'upload_date': '20170909', }, } @@ -24,34 +37,98 @@ class ZypeIE(InfoExtractor): return [ mobj.group('url') for mobj in re.finditer( - r']+\bsrc=(["\'])(?P(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE), webpage)] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + try: + response = self._download_json(re.sub( + r'\.(?:js|html)\?', '.json?', url), video_id)['response'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403): + raise ExtractorError(self._parse_json( + e.cause.read().decode(), video_id)['message'], expected=True) + raise - title = self._search_regex( - r'video_title\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'title', group='value') + body = response['body'] + video = response['video'] + title = video['title'] - m3u8_url = self._search_regex( - r'(["\'])(?P(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage, - 'm3u8 url', group='url') - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + if isinstance(body, dict): + formats = [] + for output in body.get('outputs', []): + output_url = output.get('url') + if not output_url: + continue + name = output.get('name') + if name == 'm3u8': + formats = self._extract_m3u8_formats( + output_url, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + else: + f = { + 'format_id': name, + 'tbr': int_or_none(output.get('bitrate')), + 'url': output_url, + } + if name in ('m4a', 'mp3'): + f['vcodec'] = 'none' + else: + f.update({ + 'height': int_or_none(output.get('height')), + 'width': int_or_none(output.get('width')), + }) + formats.append(f) + text_tracks = body.get('subtitles') or [] + else: + m3u8_url = self._search_regex( + r'(["\'])(?P(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', + body, 'm3u8 url', group='url') + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + text_tracks = self._search_regex( + r'textTracks\s*:\s*(\[[^]]+\])', + body, 'text tracks', default=None) + if text_tracks: + text_tracks = self._parse_json( + text_tracks, video_id, js_to_json, False) self._sort_formats(formats) - thumbnail = self._search_regex( - r'poster\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'thumbnail', - default=False, group='url') + subtitles = {} + if text_tracks: + for text_track in text_tracks: + tt_url = dict_get(text_track, ('file', 'src')) + if not tt_url: + continue + subtitles.setdefault(text_track.get('label') or 'English', []).append({ + 'url': tt_url, + }) + + thumbnails = [] + for thumbnail in video.get('thumbnails', []): + thumbnail_url = thumbnail.get('url') + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) return { 'id': video_id, + 'display_id': video.get('friendly_title'), 'title': title, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, + 'description': dict_get(video, ('description', 'ott_description', 'short_description')), + 'timestamp': parse_iso8601(video.get('published_at')), + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('request_count')), + 'average_rating': int_or_none(video.get('rating')), + 'season_number': int_or_none(video.get('season')), + 'episode_number': int_or_none(video.get('episode')), 'formats': formats, + 'subtitles': subtitles, } From 2c482bff7c91c364c55b74846a3ae416cf588df3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 15 Jan 2020 14:16:58 +0100 Subject: [PATCH 69/70] [americastestkitchen] fix extraction --- youtube_dl/extractor/americastestkitchen.py | 42 ++++++++------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py index 8b32aa886..9c9d77ae1 100644 --- a/youtube_dl/extractor/americastestkitchen.py +++ b/youtube_dl/extractor/americastestkitchen.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import ( clean_html, int_or_none, + js_to_json, try_get, unified_strdate, ) @@ -13,22 +14,21 @@ from ..utils import ( class AmericasTestKitchenIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P\d+)' _TESTS = [{ - 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', + 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', 'md5': 'b861c3e365ac38ad319cfd509c30577f', 'info_dict': { - 'id': '1_5g5zua6e', - 'title': 'Summer Dinner Party', + 'id': '5b400b9ee338f922cb06450c', + 'title': 'Weeknight Japanese Suppers', 'ext': 'mp4', - 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1497285541, - 'upload_date': '20170612', - 'uploader_id': 'roger.metcalf@americastestkitchen.com', - 'release_date': '20170617', + 'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8', + 'thumbnail': r're:^https?://', + 'timestamp': 1523664000, + 'upload_date': '20180414', + 'release_date': '20180414', 'series': "America's Test Kitchen", - 'season_number': 17, - 'episode': 'Summer Dinner Party', - 'episode_number': 24, + 'season_number': 18, + 'episode': 'Weeknight Japanese Suppers', + 'episode_number': 15, }, 'params': { 'skip_download': True, @@ -47,7 +47,7 @@ class AmericasTestKitchenIE(InfoExtractor): self._search_regex( r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*', webpage, 'initial context'), - video_id) + video_id, js_to_json) ep_data = try_get( video_data, @@ -55,17 +55,7 @@ class AmericasTestKitchenIE(InfoExtractor): lambda x: x['videoDetail']['content']['data']), dict) ep_meta = ep_data.get('full_video', {}) - zype_id = ep_meta.get('zype_id') - if zype_id: - embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id - ie_key = 'Zype' - else: - partner_id = self._search_regex( - r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', - webpage, 'kaltura partner id') - external_id = ep_data.get('external_id') or ep_meta['external_id'] - embed_url = 'kaltura:%s:%s' % (partner_id, external_id) - ie_key = 'Kaltura' + zype_id = ep_data.get('zype_id') or ep_meta['zype_id'] title = ep_data.get('title') or ep_meta.get('title') description = clean_html(ep_meta.get('episode_description') or ep_data.get( @@ -79,8 +69,8 @@ class AmericasTestKitchenIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': embed_url, - 'ie_key': ie_key, + 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id, + 'ie_key': 'Zype', 'title': title, 'description': description, 'thumbnail': thumbnail, From 48ff5590c160b89e4596b706f2b33c69557063a0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 16 Jan 2020 15:37:16 +0100 Subject: [PATCH 70/70] [nbc] add support for nbc multi network URLs(closes #23049) --- youtube_dl/extractor/nbc.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 5bc39d002..6f3cb3003 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -87,11 +87,25 @@ class NBCIE(AdobePassIE): def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() permalink = 'http' + compat_urllib_parse_unquote(permalink) - response = self._download_json( + video_data = self._download_json( 'https://friendship.nbc.co/v2/graphql', video_id, query={ - 'query': '''{ - page(name: "%s", platform: web, type: VIDEO, userId: "0") { - data { + 'query': '''query bonanzaPage( + $app: NBCUBrands! = nbc + $name: String! + $oneApp: Boolean + $platform: SupportedPlatforms! = web + $type: EntityPageType! = VIDEO + $userId: String! +) { + bonanzaPage( + app: $app + name: $name + oneApp: $oneApp + platform: $platform + type: $type + userId: $userId + ) { + metadata { ... on VideoPageData { description episodeNumber @@ -100,15 +114,20 @@ class NBCIE(AdobePassIE): mpxAccountId mpxGuid rating + resourceId seasonNumber secondaryTitle seriesShortTitle } } } -}''' % permalink, - }) - video_data = response['data']['page']['data'] +}''', + 'variables': json.dumps({ + 'name': permalink, + 'oneApp': True, + 'userId': '0', + }), + })['data']['bonanzaPage']['metadata'] query = { 'mbr': 'true', 'manifest': 'm3u', @@ -117,8 +136,8 @@ class NBCIE(AdobePassIE): title = video_data['secondaryTitle'] if video_data.get('locked'): resource = self._get_mvpd_resource( - 'nbcentertainment', title, video_id, - video_data.get('rating')) + video_data.get('resourceId') or 'nbcentertainment', + title, video_id, video_data.get('rating')) query['auth'] = self._extract_mvpd_auth( url, video_id, 'nbcentertainment', resource) theplatform_url = smuggle_url(update_url_query(