From fa4ac365f69cbd51e4c9801984ebea49a12825b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Jan 2019 10:24:44 +0700 Subject: [PATCH 001/785] [youtube] Extend JS player signature function name regexes (closes #18890, closes #18891, closes #18893) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 730935657..c8bf98b58 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1198,8 +1198,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') From f53cecd796dbb698abbde6ac2f7f973dba78f8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Jan 2019 10:25:50 +0700 Subject: [PATCH 002/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 13019bf2b..171034a75 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors +* [youtube] Extend JS player signature function name regular expressions + (#18890, #18891, #18893) + + version 2019.01.16 Core From 29639b363ddab7903ceae096912a0227c8017533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Jan 2019 10:27:17 +0700 Subject: [PATCH 003/785] release 2019.01.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 650f78511..841bca914 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.16 +[debug] youtube-dl version 2019.01.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 171034a75..902301765 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.01.17 Extractors * [youtube] Extend JS player signature function name regular expressions diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c13f3a38a..ea3f62928 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.16' +__version__ = '2019.01.17' From 79fec976b0c250446ea9a9eb7323fb2045ee37fe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 17 Jan 2019 09:44:08 +0100 Subject: [PATCH 004/785] [vimeo] fix extraction for password protected player URLs(closes #18889) --- youtube_dl/extractor/vimeo.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5e15f060b..fd37f919b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 import json import re import itertools @@ -392,6 +393,22 @@ class VimeoIE(VimeoBaseInfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://player.vimeo.com/video/68375962', + 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', + 'info_dict': { + 'id': '68375962', + 'ext': 'mp4', + 'title': 'youtube-dl password protected test video', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', + 'uploader_id': 'user18948128', + 'uploader': 'Jaime Marquínez Ferrándiz', + 'duration': 10, + }, + 'params': { + 'videopassword': 'youtube-dl', + }, + }, { 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 'only_matching': True, @@ -452,7 +469,9 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') - data = urlencode_postdata({'password': password}) + data = urlencode_postdata({ + 'password': base64.b64encode(password.encode()), + }) pass_url = url + '/check-password' password_request = sanitized_Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') From e2dd132f054df5b6c09b7c274752a77d8ba44f8d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 17 Jan 2019 23:56:37 +0100 Subject: [PATCH 005/785] [cartoonnetwork] fix extraction(closes #15664)(closes #17224) --- youtube_dl/extractor/cartoonnetwork.py | 56 +++++++++++++++++--------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 6aeebd7b3..48b33617f 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -1,20 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .turner import TurnerBaseIE +from ..utils import int_or_none class CartoonNetworkIE(TurnerBaseIE): _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P[^/?#]+)-(?:clip|episode)\.html' _TEST = { - 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html', + 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html', 'info_dict': { - 'id': '8a250ab04ed07e6c014ef3f1e2f9016c', + 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e', 'ext': 'mp4', - 'title': 'Starfire the Cat Lady', - 'description': 'Robin decides to become a cat so that Starfire will finally love him.', + 'title': 'How to Draw Upgrade', + 'description': 'md5:2061d83776db7e8be4879684eefe8c0f', }, 'params': { # m3u8 download @@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() - query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id - return self._extract_cvp_info( - 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', - 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile', - }, - }, { + + def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False): + metadata_re = '' + if content_re: + metadata_re = r'|video_metadata\.content_' + content_re + return self._search_regex( + r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + webpage, name, fatal=fatal) + + media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) + title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True) + + info = self._extract_ngtv_info( + media_id, {'networkId': 'cartoonnetwork'}, { 'url': url, 'site_name': 'CartoonNetwork', - 'auth_required': self._search_regex( - r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);', - webpage, 'auth required', default='false') == 'true', + 'auth_required': find_field('authType', 'auth type') != 'unauth', }) + + series = find_field( + 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage) + info.update({ + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'description': self._html_search_meta('description', webpage), + 'series': series, + 'episode': title, + }) + + for field in ('season', 'episode'): + field_name = field + 'Number' + info[field + '_number'] = int_or_none(find_field( + field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage)) + + return info From 2bfc1d9d68dec097fd8093dc0284dd0cd64beb2e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 19 Jan 2019 21:25:15 +0100 Subject: [PATCH 006/785] [extractor/common] imporove HLS video only format detection(closes #18923) --- youtube_dl/extractor/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9e7febcad..af621b74b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1596,6 +1596,7 @@ class InfoExtractor(object): # References: # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21 # 2. https://github.com/rg3/youtube-dl/issues/12211 + # 3. https://github.com/rg3/youtube-dl/issues/18923 # We should try extracting formats only from master playlists [1, 4.3.4], # i.e. playlists that describe available qualities. On the other hand @@ -1667,11 +1668,16 @@ class InfoExtractor(object): rendition = stream_group[0] return rendition.get('NAME') or stream_group_id + # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the + # chance to detect video only formats when EXT-X-STREAM-INF tags + # precede EXT-X-MEDIA tags in HLS manifest such as [3]. + for line in m3u8_doc.splitlines(): + if line.startswith('#EXT-X-MEDIA:'): + extract_media(line) + for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_stream_inf = parse_m3u8_attributes(line) - elif line.startswith('#EXT-X-MEDIA:'): - extract_media(line) elif line.startswith('#') or not line.strip(): continue else: From f28363ad1fb45b4450ae6f09ff9aff8f93227e40 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 19 Jan 2019 21:25:53 +0100 Subject: [PATCH 007/785] [ted] correct acodec for http formats(#18923) --- youtube_dl/extractor/ted.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d3e4205f5..645942dfd 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -265,6 +265,8 @@ class TEDIE(InfoExtractor): 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) + if f.get('acodec') == 'none': + del f['acodec'] formats.append(f) audio_download = talk_info.get('audioDownload') From 379306ef55b64c966392c072b17a450831fec252 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 19 Jan 2019 21:35:02 +0100 Subject: [PATCH 008/785] [extractor/common] fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index af621b74b..6e36e6778 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1668,7 +1668,7 @@ class InfoExtractor(object): rendition = stream_group[0] return rendition.get('NAME') or stream_group_id - # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the + # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the # chance to detect video only formats when EXT-X-STREAM-INF tags # precede EXT-X-MEDIA tags in HLS manifest such as [3]. for line in m3u8_doc.splitlines(): From 2cc779f497ae20d6e0e28fc546a25723cfea631a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Jan 2019 13:48:09 +0700 Subject: [PATCH 009/785] [YoutubeDL] Add negation support for string comparisons in format selection expressions (closes #18600, closes #18805) --- README.md | 3 ++- test/test_YoutubeDL.py | 46 +++++++++++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 9 +++++--- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 70bcfaccf..886696015 100644 --- a/README.md +++ b/README.md @@ -667,13 +667,14 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `asr`: Audio sampling rate in Hertz - `fps`: Frame rate -Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields: +Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format +Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f0f5a8470..df8994b84 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -239,6 +239,52 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + def test_format_selection_string_ops(self): + formats = [ + {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # equals (=) + ydl = YDL({'format': '[format_id=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not equal (!=) + ydl = YDL({'format': '[format_id!=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # starts with (^=) + ydl = YDL({'format': '[format_id^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not start with (!^=) + ydl = YDL({'format': '[format_id!^=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # ends with ($=) + ydl = YDL({'format': '[format_id$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not end with (!$=) + ydl = YDL({'format': '[format_id!$=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # contains (*=) + ydl = YDL({'format': '[format_id*=-]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=-]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + def test_youtube_format_selection(self): order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4493fd0e1..a827414dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1063,21 +1063,24 @@ class YoutubeDL(object): if not m: STR_OPERATORS = { '=': operator.eq, - '!=': operator.ne, '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?Pext|acodec|vcodec|container|protocol|format_id) - \s*(?P%s)(?P\s*\?)? + \s*(?P!\s*)?(?P%s)(?P\s*\?)? \s*(?P[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) if m: comparison_value = m.group('value') - op = STR_OPERATORS[m.group('op')] + str_op = STR_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not str_op + else: + op = str_op if not m: raise ValueError('Invalid filter specification %r' % filter_spec) From 4e58d9fabbfef80b2ecc0d20959bdf9dd3705e73 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 20 Jan 2019 14:23:35 +0700 Subject: [PATCH 010/785] [README.md] Fix formatting --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 886696015..4ba982907 100644 --- a/README.md +++ b/README.md @@ -674,6 +674,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format + Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. From fc746c3fdd7d40935a11e72b5cb69a8aee840e94 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 20 Jan 2019 09:04:51 +0100 Subject: [PATCH 011/785] [test/test_InfoExtractor] add test for #18923 --- test/test_InfoExtractor.py | 59 ++++++++++++++++++++++++++++++- test/testdata/m3u8/ted_18923.m3u8 | 28 +++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 test/testdata/m3u8/ted_18923.m3u8 diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 06be72616..75fa0bbb7 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -497,7 +497,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1280, 'height': 720, }] - ) + ), + ( + # https://github.com/rg3/youtube-dl/issues/18923 + # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa + 'ted_18923', + 'http://hls.ted.com/talks/31241.m3u8', + [{ + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '600k-Audio', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '68', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '163', + 'acodec': 'none', + 'width': 320, + 'height': 180, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '481', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '769', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '984', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1255', + 'acodec': 'none', + 'width': 640, + 'height': 360, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1693', + 'acodec': 'none', + 'width': 853, + 'height': 480, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '2462', + 'acodec': 'none', + 'width': 1280, + 'height': 720, + }] + ), ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: diff --git a/test/testdata/m3u8/ted_18923.m3u8 b/test/testdata/m3u8/ted_18923.m3u8 new file mode 100644 index 000000000..52a27118b --- /dev/null +++ b/test/testdata/m3u8/ted_18923.m3u8 @@ -0,0 +1,28 @@ +#EXTM3U +#EXT-X-VERSION:4 +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360 +/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180 +/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480 +/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720 +/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES +/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b + +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" + +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400 From 15870747f02effba9376fd1a1500c32d36d1b811 Mon Sep 17 00:00:00 2001 From: aviperes Date: Sun, 20 Jan 2019 10:15:01 +0200 Subject: [PATCH 012/785] [odnoklassniki] Detect paid videos --- youtube_dl/extractor/odnoklassniki.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 190d8af4d..114b93c07 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -115,6 +115,10 @@ class OdnoklassnikiIE(InfoExtractor): }, { 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', 'only_matching': True, + }, { + # Paid video + 'url': 'https://ok.ru/video/954886983203', + 'only_matching': True, }] def _real_extract(self, url): @@ -244,6 +248,11 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'flv', }) + if not formats: + payment_info = metadata.get('paymentInfo') + if payment_info: + raise ExtractorError('This video is paid, subscribe to download it', expected=True) + self._sort_formats(formats) info['formats'] = formats From 31fbedc06a349bc555ab934588544f75734e3a55 Mon Sep 17 00:00:00 2001 From: jhwgh1968 Date: Sun, 20 Jan 2019 09:10:46 +0000 Subject: [PATCH 013/785] [instagram] Add base extractor for playlists and tag extractor --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/instagram.py | 136 +++++++++++++++++++++-------- 2 files changed, 105 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de38c6641..24361def4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -494,7 +494,11 @@ from .ina import InaIE from .inc import IncIE from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE -from .instagram import InstagramIE, InstagramUserIE +from .instagram import ( + InstagramIE, + InstagramUserIE, + InstagramTagIE, +) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 7e0e838f0..ffd87b55f 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -227,44 +227,37 @@ class InstagramIE(InfoExtractor): } -class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' - IE_DESC = 'Instagram user profile' - IE_NAME = 'instagram:user' - _TEST = { - 'url': 'https://instagram.com/porsche', - 'info_dict': { - 'id': 'porsche', - 'title': 'porsche', - }, - 'playlist_count': 5, - 'params': { - 'extract_flat': True, - 'skip_download': True, - 'playlistend': 5, - } - } +class InstagramPlaylistIE(InfoExtractor): + # A superclass for handling any kind of query based on GraphQL which + # results in a playlist. - _gis_tmpl = None + _gis_tmpl = None # used to cache GIS request type - def _entries(self, data): + def _parse_graphql(self, webpage, item_id): + # Reads a webpage and returns its GraphQL data. + return self._parse_json( + self._search_regex( + r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), + item_id) + + def _extract_graphql(self, data, url): + # Parses GraphQL queries containing videos and generates a playlist. def get_count(suffix): return int_or_none(try_get( node, lambda x: x['edge_media_' + suffix]['count'])) - uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + uploader_id = self._match_id(url) csrf_token = data['config']['csrf_token'] rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' - self._set_cookie('instagram.com', 'ig_pr', '1') - cursor = '' for page_num in itertools.count(1): - variables = json.dumps({ - 'id': uploader_id, + variables = { 'first': 12, 'after': cursor, - }) + } + variables.update(self._query_vars_for(data)) + variables = json.dumps(variables) if self._gis_tmpl: gis_tmpls = [self._gis_tmpl] @@ -276,21 +269,26 @@ class InstagramUserIE(InfoExtractor): '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), ] + # try all of the ways to generate a GIS query, and not only use the + # first one that works, but cache it for future requests for gis_tmpl in gis_tmpls: try: - media = self._download_json( + json_data = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, 'Downloading JSON page %d' % page_num, headers={ 'X-Requested-With': 'XMLHttpRequest', 'X-Instagram-GIS': hashlib.md5( ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), }, query={ - 'query_hash': '42323d64886122307be10013ad2dcc44', + 'query_hash': self._QUERY_HASH, 'variables': variables, - })['data']['user']['edge_owner_to_timeline_media'] + }) + media = self._parse_timeline_from(json_data) self._gis_tmpl = gis_tmpl break except ExtractorError as e: + # if it's an error caused by a bad query, and there are + # more GIS templates to try, ignore it and keep trying if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if gis_tmpl != gis_tmpls[-1]: continue @@ -348,14 +346,80 @@ class InstagramUserIE(InfoExtractor): break def _real_extract(self, url): - username = self._match_id(url) + user_or_tag = self._match_id(url) + webpage = self._download_webpage(url, user_or_tag) + data = self._parse_graphql(webpage, user_or_tag) - webpage = self._download_webpage(url, username) - - data = self._parse_json( - self._search_regex( - r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), - username) + self._set_cookie('instagram.com', 'ig_pr', '1') return self.playlist_result( - self._entries(data), username, username) + self._extract_graphql(data, url), user_or_tag, user_or_tag) + + +class InstagramUserIE(InstagramPlaylistIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' + IE_DESC = 'Instagram user profile' + IE_NAME = 'instagram:user' + _TEST = { + 'url': 'https://instagram.com/porsche', + 'info_dict': { + 'id': 'porsche', + 'title': 'porsche', + }, + 'playlist_count': 5, + 'params': { + 'extract_flat': True, + 'skip_download': True, + 'playlistend': 5, + } + } + + _QUERY_HASH = '42323d64886122307be10013ad2dcc44', + + @staticmethod + def _parse_timeline_from(data): + # extracts the media timeline data from a GraphQL result + return data['data']['user']['edge_owner_to_timeline_media'] + + @staticmethod + def _query_vars_for(data): + # returns a dictionary of variables to add to the timeline query based + # on the GraphQL of the original page + return { + 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + } + + +class InstagramTagIE(InstagramPlaylistIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P[^/]+)' + IE_DESC = 'Instagram hashtag search' + IE_NAME = 'instagram:tag' + _TEST = { + 'url': 'https://instagram.com/explore/tags/lolcats', + 'info_dict': { + 'id': 'lolcats', + 'title': 'lolcats', + }, + 'playlist_count': 50, + 'params': { + 'extract_flat': True, + 'skip_download': True, + 'playlistend': 50, + } + } + + _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', + + @staticmethod + def _parse_timeline_from(data): + # extracts the media timeline data from a GraphQL result + return data['data']['hashtag']['edge_hashtag_to_media'] + + @staticmethod + def _query_vars_for(data): + # returns a dictionary of variables to add to the timeline query based + # on the GraphQL of the original page + return { + 'tag_name': + data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] + } From 6ca3fa898cd066422daea3d5be53efdae22187d8 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Sun, 20 Jan 2019 05:24:21 -0400 Subject: [PATCH 014/785] [streamango] Add support for fruithosts.net --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index fcaa5ac0b..efb259f96 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -38,6 +38,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', 'only_matching': True, + }, { + 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', + 'only_matching': True, }] def _real_extract(self, url): From 289ef490f77cb35c43d98b9d2ea4cf529c24895e Mon Sep 17 00:00:00 2001 From: Anthony Fok Date: Sun, 30 Dec 2018 02:44:40 -0700 Subject: [PATCH 015/785] [hketv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hketv.py | 185 +++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 youtube_dl/extractor/hketv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24361def4..574a47e6d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -452,6 +452,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hgtv import HGTVComShowIE +from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py new file mode 100644 index 000000000..b5790cdee --- /dev/null +++ b/youtube_dl/extractor/hketv.py @@ -0,0 +1,185 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + merge_dicts, + str_or_none, + str_to_int, + try_get, + unified_strdate, + urlencode_postdata, + urljoin, +) + + +class HKETVIE(InfoExtractor): + IE_NAME = 'hketv' + IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['HK'] + _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://www.hkedcity.net/etv/resource/2932360618', + 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7', + 'info_dict': { + 'id': '2932360618', + 'ext': 'mp4', + 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', + 'description': '本節目輯錄了「閱讀滿Fun嘉年華」和「二○一八響應世界閱讀日――悅愛閱讀・愈讀愈愛」的活動花絮,並由學者、作家、演藝界人士等,分享培養子女閱讀興趣和習慣的方法,以及呼籲大家一同分享閱讀的樂趣。', + 'upload_date': '20181024', + 'duration': 900, + 'subtitles': { + 'en': [{ + 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=en', + 'ext': 'srt', + }], + 'zh-Hant': [{ + 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=qmt', + 'ext': 'srt', + }], + } + }, + }, { + 'url': 'https://www.hkedcity.net/etv/resource/972641418', + 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', + 'info_dict': { + 'id': '972641418', + 'ext': 'mp4', + 'title': '衣冠楚楚 (天使系列之一)', + 'description': '天國仙境,有兩位可愛的天使小姐妹。她們對幾千年來天使衣著一成不變頗有不滿。她們下望人世間:只見人們穿著七彩繽紛、款式各異的服裝,漂亮極了。天使姐妹決定下凡考察衣著,以設計天使新裝。 下到人間,姐妹試穿各式各樣的衣著,引發連串奇特有趣的情節:她們穿著校服在街上閒逛時,被女警誤認為逃學而送回學校,到校後又被體育老師誤認為是新同學,匆匆忙忙換上運動服後在操場上大顯神通。她們穿著護士服在醫院散步時,又被誤認為當班護士,而投入追尋失蹤病童、治病救人的工作中去。姐妹倆還到過玩具店,與布娃娃們談論衣著。她們也去過服裝設計學校,被當成小模特兒而試穿各式服裝。最令姐妹倆興奮的是一場盛大的民族服裝表演會。身穿盛裝的十二個民族的少女在台上翩翩起舞,各種服飾七彩繽紛、美不勝收。姐妹們情不自禁地穿上民族服裝,小天使變成了少數民族姑娘……最後天使姐妹回到天上,對於天使究竟穿甚麼樣的衣服好,她們還是拿不定主意。 節目通過天使姐妹的奇特經歷,反復示範各式衣服鞋襪的正確讀音及談論衣著時的常用句式,並以盛大的民族服裝表演活動,帶出有關服裝的文化知識。內容豐富而饒有趣味。', + 'upload_date': '20070109', + 'duration': 907, + 'subtitles': {}, + }, + }] + + _CC_LANGS = { + '中文(繁體中文)': 'zh-Hant', + '中文(简体中文)': 'zh-Hans', + 'English': 'en', + 'Bahasa Indonesia': 'id', + '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi', + '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne', + 'Tagalog': 'tl', + '\u0e44\u0e17\u0e22': 'th', + '\u0627\u0631\u062f\u0648': 'ur', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('ed_title', webpage, fatal=True) + + file_id = self._html_search_regex(r'post_var\["file_id"\]\s*=\s*(.+?);', webpage, 'file ID') + curr_url = self._html_search_regex(r'post_var\["curr_url"\]\s*=\s*"(.+?)";', webpage, 'curr URL') + data = { + 'action': 'get_info', + 'curr_url': curr_url, + 'file_id': file_id, + 'video_url': file_id, + } + _APPS_BASE_URL = 'https://apps.hkedcity.net' + handler_url = _APPS_BASE_URL + '/media/play/handler.php' + + response = self._download_json( + handler_url, video_id, + data=urlencode_postdata(data), + headers=merge_dicts({'Content-Type': 'application/x-www-form-urlencoded'}, + self.geo_verification_headers())) + + result = response['result'] + + formats = [] + subtitles = {} + + if response.get('success') and response.get('access'): + width = int_or_none(result.get('width')) + height = int_or_none(result.get('height')) + + playlist0 = try_get(result, lambda x: x['playlist'][0], dict) + fmts = playlist0.get('sources') + for fmt in fmts: + file_path = fmt.get('file') + if file_path: + file_url = urljoin(_APPS_BASE_URL, file_path) + # If we ever wanted to provide the final resolved URL that + # does not require cookies, albeit with a shorter lifespan: + # urlh = self._downloader.urlopen(file_url) + # resolved_url = urlh.geturl() + + label = fmt.get('label') + w = None + h = None + if label == 'HD': + h = 720 + elif label == 'SD': + h = 360 + if h: + if width and height: + w = h * width // height + else: + w = h * 4 // 3 + + formats.append({ + 'format_id': label, + 'ext': fmt.get('type'), + 'url': file_url, + 'width': w, + 'height': h, + }) + + tracks = playlist0.get('tracks', []) + for track in tracks: + if not isinstance(track, dict): + continue + track_kind = str_or_none(track.get('kind')) + if not track_kind or not isinstance(track_kind, compat_str): + continue + if track_kind.lower() not in ('captions', 'subtitles'): + continue + track_url = urljoin(_APPS_BASE_URL, track.get('file')) + if not track_url: + continue + track_label = track.get('label') + subtitles.setdefault(self._CC_LANGS.get(track_label, track_label), []).append({ + 'url': self._proto_relative_url(track_url), + 'ext': 'srt', + }) + + else: + error = clean_html(response.get('access_err_msg')) + if 'Video streaming is not available in your country' in error: + self.raise_geo_restricted(msg=error, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError(error) + + # Likes + emotion = self._download_json( + 'https://emocounter.hkedcity.net/handler.php', + video_id, + data=urlencode_postdata({ + 'action': 'get_emotion', + 'data[bucket_id]': 'etv', + 'data[identifier]': video_id, + }), + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + fatal=False) + like_count = int_or_none(try_get(emotion, lambda x: x['data']['emotion_data'][0]['count'])) + + return { + 'id': video_id, + 'title': title, + 'description': self._html_search_meta('description', webpage, fatal=False), + 'upload_date': unified_strdate(self._html_search_meta('ed_date', webpage, fatal=False), day_first=False), + 'duration': int_or_none(result.get('length')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': urljoin(_APPS_BASE_URL, result.get('image')), + 'view_count': str_to_int(result.get('view_count')), + 'like_count': like_count, + } From 73c19aaa9f74e9383a7aaf0dfb3c608727d5b6b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Jan 2019 17:42:25 +0700 Subject: [PATCH 016/785] [hketv] Improve and simplify (closes #18696) --- youtube_dl/extractor/hketv.py | 180 ++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py index b5790cdee..b57927fc1 100644 --- a/youtube_dl/extractor/hketv.py +++ b/youtube_dl/extractor/hketv.py @@ -8,8 +8,8 @@ from ..utils import ( ExtractorError, int_or_none, merge_dicts, + parse_count, str_or_none, - str_to_int, try_get, unified_strdate, urlencode_postdata, @@ -30,20 +30,12 @@ class HKETVIE(InfoExtractor): 'id': '2932360618', 'ext': 'mp4', 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', - 'description': '本節目輯錄了「閱讀滿Fun嘉年華」和「二○一八響應世界閱讀日――悅愛閱讀・愈讀愈愛」的活動花絮,並由學者、作家、演藝界人士等,分享培養子女閱讀興趣和習慣的方法,以及呼籲大家一同分享閱讀的樂趣。', + 'description': 'md5:d5286d05219ef50e0613311cbe96e560', 'upload_date': '20181024', 'duration': 900, - 'subtitles': { - 'en': [{ - 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=en', - 'ext': 'srt', - }], - 'zh-Hant': [{ - 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=qmt', - 'ext': 'srt', - }], - } + 'subtitles': 'count:2', }, + 'skip': 'Geo restricted to HK', }, { 'url': 'https://www.hkedcity.net/etv/resource/972641418', 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', @@ -51,11 +43,15 @@ class HKETVIE(InfoExtractor): 'id': '972641418', 'ext': 'mp4', 'title': '衣冠楚楚 (天使系列之一)', - 'description': '天國仙境,有兩位可愛的天使小姐妹。她們對幾千年來天使衣著一成不變頗有不滿。她們下望人世間:只見人們穿著七彩繽紛、款式各異的服裝,漂亮極了。天使姐妹決定下凡考察衣著,以設計天使新裝。 下到人間,姐妹試穿各式各樣的衣著,引發連串奇特有趣的情節:她們穿著校服在街上閒逛時,被女警誤認為逃學而送回學校,到校後又被體育老師誤認為是新同學,匆匆忙忙換上運動服後在操場上大顯神通。她們穿著護士服在醫院散步時,又被誤認為當班護士,而投入追尋失蹤病童、治病救人的工作中去。姐妹倆還到過玩具店,與布娃娃們談論衣著。她們也去過服裝設計學校,被當成小模特兒而試穿各式服裝。最令姐妹倆興奮的是一場盛大的民族服裝表演會。身穿盛裝的十二個民族的少女在台上翩翩起舞,各種服飾七彩繽紛、美不勝收。姐妹們情不自禁地穿上民族服裝,小天使變成了少數民族姑娘……最後天使姐妹回到天上,對於天使究竟穿甚麼樣的衣服好,她們還是拿不定主意。 節目通過天使姐妹的奇特經歷,反復示範各式衣服鞋襪的正確讀音及談論衣著時的常用句式,並以盛大的民族服裝表演活動,帶出有關服裝的文化知識。內容豐富而饒有趣味。', + 'description': 'md5:10bb3d659421e74f58e5db5691627b0f', 'upload_date': '20070109', 'duration': 907, 'subtitles': {}, }, + 'params': { + 'geo_verification_proxy': '', + }, + 'skip': 'Geo restricted to HK', }] _CC_LANGS = { @@ -69,117 +65,127 @@ class HKETVIE(InfoExtractor): '\u0e44\u0e17\u0e22': 'th', '\u0627\u0631\u062f\u0648': 'ur', } + _FORMAT_HEIGHTS = { + 'SD': 360, + 'HD': 720, + } + _APPS_BASE_URL = 'https://apps.hkedcity.net' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('ed_title', webpage, fatal=True) - file_id = self._html_search_regex(r'post_var\["file_id"\]\s*=\s*(.+?);', webpage, 'file ID') - curr_url = self._html_search_regex(r'post_var\["curr_url"\]\s*=\s*"(.+?)";', webpage, 'curr URL') + title = ( + self._html_search_meta( + ('ed_title', 'search.ed_title'), webpage, default=None) or + self._search_regex( + r'data-favorite_title_(?:eng|chi)=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'title', default=None, group='url') or + self._html_search_regex( + r'

([^<]+)

', webpage, 'title', default=None) or + self._og_search_title(webpage) + ) + + file_id = self._search_regex( + r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);', + webpage, 'file ID') + curr_url = self._search_regex( + r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";', + webpage, 'curr URL') data = { 'action': 'get_info', 'curr_url': curr_url, 'file_id': file_id, 'video_url': file_id, } - _APPS_BASE_URL = 'https://apps.hkedcity.net' - handler_url = _APPS_BASE_URL + '/media/play/handler.php' response = self._download_json( - handler_url, video_id, + self._APPS_BASE_URL + '/media/play/handler.php', video_id, data=urlencode_postdata(data), - headers=merge_dicts({'Content-Type': 'application/x-www-form-urlencoded'}, - self.geo_verification_headers())) + headers=merge_dicts({ + 'Content-Type': 'application/x-www-form-urlencoded'}, + self.geo_verification_headers())) result = response['result'] + if not response.get('success') or not response.get('access'): + error = clean_html(response.get('access_err_msg')) + if 'Video streaming is not available in your country' in error: + self.raise_geo_restricted( + msg=error, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError(error, expected=True) + formats = [] + + width = int_or_none(result.get('width')) + height = int_or_none(result.get('height')) + + playlist0 = result['playlist'][0] + for fmt in playlist0['sources']: + file_url = urljoin(self._APPS_BASE_URL, fmt.get('file')) + if not file_url: + continue + # If we ever wanted to provide the final resolved URL that + # does not require cookies, albeit with a shorter lifespan: + # urlh = self._downloader.urlopen(file_url) + # resolved_url = urlh.geturl() + label = fmt.get('label') + h = self._FORMAT_HEIGHTS.get(label) + w = h * width // height if h and width and height else None + formats.append({ + 'format_id': label, + 'ext': fmt.get('type'), + 'url': file_url, + 'width': w, + 'height': h, + }) + self._sort_formats(formats) + subtitles = {} - - if response.get('success') and response.get('access'): - width = int_or_none(result.get('width')) - height = int_or_none(result.get('height')) - - playlist0 = try_get(result, lambda x: x['playlist'][0], dict) - fmts = playlist0.get('sources') - for fmt in fmts: - file_path = fmt.get('file') - if file_path: - file_url = urljoin(_APPS_BASE_URL, file_path) - # If we ever wanted to provide the final resolved URL that - # does not require cookies, albeit with a shorter lifespan: - # urlh = self._downloader.urlopen(file_url) - # resolved_url = urlh.geturl() - - label = fmt.get('label') - w = None - h = None - if label == 'HD': - h = 720 - elif label == 'SD': - h = 360 - if h: - if width and height: - w = h * width // height - else: - w = h * 4 // 3 - - formats.append({ - 'format_id': label, - 'ext': fmt.get('type'), - 'url': file_url, - 'width': w, - 'height': h, - }) - - tracks = playlist0.get('tracks', []) - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = str_or_none(track.get('kind')) - if not track_kind or not isinstance(track_kind, compat_str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(_APPS_BASE_URL, track.get('file')) - if not track_url: - continue - track_label = track.get('label') - subtitles.setdefault(self._CC_LANGS.get(track_label, track_label), []).append({ + tracks = try_get(playlist0, lambda x: x['tracks'], list) or [] + for track in tracks: + if not isinstance(track, dict): + continue + track_kind = str_or_none(track.get('kind')) + if not track_kind or not isinstance(track_kind, compat_str): + continue + if track_kind.lower() not in ('captions', 'subtitles'): + continue + track_url = urljoin(self._APPS_BASE_URL, track.get('file')) + if not track_url: + continue + track_label = track.get('label') + subtitles.setdefault(self._CC_LANGS.get( + track_label, track_label), []).append({ 'url': self._proto_relative_url(track_url), 'ext': 'srt', }) - else: - error = clean_html(response.get('access_err_msg')) - if 'Video streaming is not available in your country' in error: - self.raise_geo_restricted(msg=error, countries=self._GEO_COUNTRIES) - else: - raise ExtractorError(error) - # Likes emotion = self._download_json( - 'https://emocounter.hkedcity.net/handler.php', - video_id, + 'https://emocounter.hkedcity.net/handler.php', video_id, data=urlencode_postdata({ 'action': 'get_emotion', 'data[bucket_id]': 'etv', 'data[identifier]': video_id, }), headers={'Content-Type': 'application/x-www-form-urlencoded'}, - fatal=False) - like_count = int_or_none(try_get(emotion, lambda x: x['data']['emotion_data'][0]['count'])) + fatal=False) or {} + like_count = int_or_none(try_get( + emotion, lambda x: x['data']['emotion_data'][0]['count'])) return { 'id': video_id, 'title': title, - 'description': self._html_search_meta('description', webpage, fatal=False), - 'upload_date': unified_strdate(self._html_search_meta('ed_date', webpage, fatal=False), day_first=False), + 'description': self._html_search_meta( + 'description', webpage, fatal=False), + 'upload_date': unified_strdate(self._html_search_meta( + 'ed_date', webpage, fatal=False), day_first=False), 'duration': int_or_none(result.get('length')), 'formats': formats, 'subtitles': subtitles, - 'thumbnail': urljoin(_APPS_BASE_URL, result.get('image')), - 'view_count': str_to_int(result.get('view_count')), + 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')), + 'view_count': parse_count(result.get('view_count')), 'like_count': like_count, } From a1a460759815414c6194bc921ac77a5533b6e02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Jan 2019 18:21:31 +0700 Subject: [PATCH 017/785] [vimeo] Fix video password verification for videos protected by Referer HTTP header --- youtube_dl/extractor/vimeo.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index fd37f919b..6215b3258 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -435,6 +435,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/160743502/abd0e13fb4', 'only_matching': True, } + # https://gettingthingsdone.com/workflowmap/ + # vimeo embed with check-password page protected by Referer header ] @staticmethod @@ -465,20 +467,22 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = VimeoIE._extract_urls(url, webpage) return urls[0] if urls else None - def _verify_player_video_password(self, url, video_id): + def _verify_player_video_password(self, url, video_id, headers): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') data = urlencode_postdata({ 'password': base64.b64encode(password.encode()), }) - pass_url = url + '/check-password' - password_request = sanitized_Request(pass_url, data) - password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - password_request.add_header('Referer', url) - return self._download_json( - password_request, video_id, - 'Verifying the password', 'Wrong password') + headers = merge_dicts(headers, { + 'Content-Type': 'application/x-www-form-urlencoded', + }) + checked = self._download_json( + url + '/check-password', video_id, + 'Verifying the password', data=data, headers=headers) + if checked is False: + raise ExtractorError('Wrong video password', expected=True) + return checked def _real_initialize(self): self._login() @@ -591,7 +595,7 @@ class VimeoIE(VimeoBaseInfoExtractor): cause=e) else: if config.get('view') == 4: - config = self._verify_player_video_password(redirect_url, video_id) + config = self._verify_player_video_password(redirect_url, video_id, headers) vod = config.get('video', {}).get('vod', {}) From 29cfcb43da8ac60e6c2eddad095a41c800d4d95a Mon Sep 17 00:00:00 2001 From: Alexandre Huot Date: Sun, 20 Jan 2019 06:33:09 -0500 Subject: [PATCH 018/785] [radiocanada] Relax DRM check --- youtube_dl/extractor/radiocanada.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index b952e59b4..302f67d96 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -49,6 +49,16 @@ class RadioCanadaIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, + { + # with protectionType but not actually DRM protected + 'url': 'radiocanada:toutv:140872', + 'info_dict': { + 'id': '140872', + 'title': 'Épisode 1', + 'series': 'District 31', + }, + 'only_matching': True, } ] @@ -67,8 +77,10 @@ class RadioCanadaIE(InfoExtractor): el = find_xpath_attr(metadata, './/Meta', 'name', name) return el.text if el is not None else None + # protectionType does not necessarily mean the video is DRM protected (see + # https://github.com/rg3/youtube-dl/pull/18609). if get_meta('protectionType'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_warning('This video is probably DRM protected.') device_types = ['ipad'] if not smuggled_data: From 6945b9e78f38284eb4e440b7badea2fc60b66c2f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 20 Jan 2019 13:31:41 +0100 Subject: [PATCH 019/785] [extractor/common] improve jwplayer relative url handling(closes #18892) --- youtube_dl/extractor/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6e36e6778..95456b291 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2630,7 +2630,7 @@ class InfoExtractor(object): 'id': this_video_id, 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, @@ -2657,12 +2657,9 @@ class InfoExtractor(object): for source in jwplayer_sources_data: if not isinstance(source, dict): continue - source_url = self._proto_relative_url(source.get('file')) - if not source_url: - continue - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - if source_url in urls: + source_url = urljoin( + base_url, self._proto_relative_url(source.get('file'))) + if not source_url or source_url in urls: continue urls.append(source_url) source_type = source.get('type') or '' From fad4ceb53404227f471af2f3544c4c14a5df4acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Jan 2019 20:21:24 +0700 Subject: [PATCH 020/785] [utils] Fix urljoin for paths with non-http(s) schemes --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9e28e008f..409482c3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') + self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') def test_url_or_none(self): self.assertEqual(url_or_none(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2d3c1a9f..d0cb65814 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1868,7 +1868,7 @@ def urljoin(base, path): path = path.decode('utf-8') if not isinstance(path, compat_str) or not path: return None - if re.match(r'^(?:https?:)?//', path): + if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): base = base.decode('utf-8') From 07f9febc4b86a9cd819329f3a7daafdbe9455f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Jan 2019 22:07:01 +0700 Subject: [PATCH 021/785] [tnaflix] Pass Referer in metadata request (closes #18925) --- youtube_dl/extractor/tnaflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 6798ef4c3..b3573c6e0 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -96,7 +96,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', - transform_source=fix_xml_ampersands) + transform_source=fix_xml_ampersands, headers={'Referer': url}) formats = [] From 19d6991312405f5af108af28b3721966720fc72d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Jan 2019 03:03:53 +0700 Subject: [PATCH 022/785] [videomore] Improve extraction and fix season extractor (closes #18908) --- youtube_dl/extractor/videomore.py | 96 ++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 9b56630de..e3eda3327 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -4,8 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, + orderedSet, + parse_duration, + str_or_none, + unified_strdate, + url_or_none, xpath_element, xpath_text, ) @@ -13,7 +19,19 @@ from ..utils import ( class VideomoreIE(InfoExtractor): IE_NAME = 'videomore' - _VALID_URL = r'videomore:(?P\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P\d+)(?:[/?#&]|\.(?:xml|json)|$)' + _VALID_URL = r'''(?x) + videomore:(?P\d+)$| + https?://(?:player\.)?videomore\.ru/ + (?: + (?: + embed| + [^/]+/[^/]+ + )/| + [^/]*\?.*?\btrack_id= + ) + (?P\d+) + (?:[/?#&]|\.(?:xml|json)|$) + ''' _TESTS = [{ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', 'md5': '44455a346edc0d509ac5b5a5b531dc35', @@ -79,6 +97,9 @@ class VideomoreIE(InfoExtractor): }, { 'url': 'videomore:367617', 'only_matching': True, + }, { + 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=', + 'only_matching': True, }] @staticmethod @@ -136,7 +157,7 @@ class VideomoreIE(InfoExtractor): class VideomoreVideoIE(InfoExtractor): IE_NAME = 'videomore:video' - _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P[^/?#&]+)[/?#&]*$' + _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P[^/?#&]+)(?:/*|[?#&].*?)$' _TESTS = [{ # single video with og:video:iframe 'url': 'http://videomore.ru/elki_3', @@ -176,6 +197,9 @@ class VideomoreVideoIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so', + 'only_matching': True, }] @classmethod @@ -196,13 +220,16 @@ class VideomoreVideoIE(InfoExtractor): r'track-id=["\'](\d+)', r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id') video_url = 'videomore:%s' % video_id + else: + video_id = None - return self.url_result(video_url, VideomoreIE.ie_key()) + return self.url_result( + video_url, ie=VideomoreIE.ie_key(), video_id=video_id) class VideomoreSeasonIE(InfoExtractor): IE_NAME = 'videomore:season' - _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P[^/]+/[^/?#&]+)[/?#&]*$' + _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$' _TESTS = [{ 'url': 'http://videomore.ru/molodezhka/sezon_promo', 'info_dict': { @@ -210,8 +237,16 @@ class VideomoreSeasonIE(InfoExtractor): 'title': 'Молодежка Промо', }, 'playlist_mincount': 12, + }, { + 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url)) + else super(VideomoreSeasonIE, cls).suitable(url)) + def _real_extract(self, url): display_id = self._match_id(url) @@ -219,9 +254,54 @@ class VideomoreSeasonIE(InfoExtractor): title = self._og_search_title(webpage) - entries = [ - self.url_result(item) for item in re.findall( - r']+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' - % display_id, webpage)] + data = self._parse_json( + self._html_search_regex( + r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P{.+?})\1', + webpage, 'data', default='{}', group='value'), + display_id, fatal=False) + + entries = [] + + if data: + episodes = data.get('episodes') + if isinstance(episodes, list): + for ep in episodes: + if not isinstance(ep, dict): + continue + ep_id = int_or_none(ep.get('id')) + ep_url = url_or_none(ep.get('url')) + if ep_id: + e = { + 'url': 'videomore:%s' % ep_id, + 'id': compat_str(ep_id), + } + elif ep_url: + e = {'url': ep_url} + else: + continue + e.update({ + '_type': 'url', + 'ie_key': VideomoreIE.ie_key(), + 'title': str_or_none(ep.get('title')), + 'thumbnail': url_or_none(ep.get('image')), + 'duration': parse_duration(ep.get('duration')), + 'episode_number': int_or_none(ep.get('number')), + 'upload_date': unified_strdate(ep.get('date')), + }) + entries.append(e) + + if not entries: + entries = [ + self.url_result( + 'videomore:%s' % video_id, ie=VideomoreIE.ie_key(), + video_id=video_id) + for video_id in orderedSet(re.findall( + r':(?:id|key)=["\'](\d+)["\']', webpage))] + + if not entries: + entries = [ + self.url_result(item) for item in re.findall( + r']+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' + % display_id, webpage)] return self.playlist_result(entries, display_id, title) From 4b85f0f9db9329ef1774a66c3e2fd4da558a5201 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 22 Jan 2019 14:38:29 +0100 Subject: [PATCH 023/785] [vrv] add support for authentication(closes #14307) --- youtube_dl/extractor/vrv.py | 95 ++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 483a3be3a..014513051 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -11,10 +11,12 @@ import time from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_parse_urlencode, compat_urllib_parse, ) from ..utils import ( + ExtractorError, float_or_none, int_or_none, ) @@ -24,29 +26,41 @@ class VRVBaseIE(InfoExtractor): _API_DOMAIN = None _API_PARAMS = {} _CMS_SIGNING = {} + _TOKEN = None + _TOKEN_SECRET = '' def _call_api(self, path, video_id, note, data=None): + # https://tools.ietf.org/html/rfc5849#section-3 base_url = self._API_DOMAIN + '/core/' + path - encoded_query = compat_urllib_parse_urlencode({ + query = { 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'oauth_signature_method': 'HMAC-SHA1', 'oauth_timestamp': int(time.time()), - 'oauth_version': '1.0', - }) + } + if self._TOKEN: + query['oauth_token'] = self._TOKEN + encoded_query = compat_urllib_parse_urlencode(query) headers = self.geo_verification_headers() if data: data = json.dumps(data).encode() headers['Content-Type'] = 'application/json' - method = 'POST' if data else 'GET' - base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')]) + base_string = '&'.join([ + 'POST' if data else 'GET', + compat_urllib_parse.quote(base_url, ''), + compat_urllib_parse.quote(encoded_query, '')]) oauth_signature = base64.b64encode(hmac.new( - (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), + (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), base_string.encode(), hashlib.sha1).digest()).decode() encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') - return self._download_json( - '?'.join([base_url, encoded_query]), video_id, - note='Downloading %s JSON metadata' % note, headers=headers, data=data) + try: + return self._download_json( + '?'.join([base_url, encoded_query]), video_id, + note='Downloading %s JSON metadata' % note, headers=headers, data=data) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True) + raise def _call_cms(self, path, video_id, note): if not self._CMS_SIGNING: @@ -55,19 +69,22 @@ class VRVBaseIE(InfoExtractor): self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) - def _set_api_params(self, webpage, video_id): - if not self._API_PARAMS: - self._API_PARAMS = self._parse_json(self._search_regex( - r'window\.__APP_CONFIG__\s*=\s*({.+?})', - webpage, 'api config'), video_id)['cxApiParams'] - self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') - def _get_cms_resource(self, resource_key, video_id): return self._call_api( 'cms_resource', video_id, 'resource path', data={ 'resource_key': resource_key, })['__links__']['cms_resource']['href'] + def _real_initialize(self): + webpage = self._download_webpage( + 'https://vrv.co/', None, headers=self.geo_verification_headers()) + self._API_PARAMS = self._parse_json(self._search_regex( + [ + r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:|;)', + r'window\.__APP_CONFIG__\s*=\s*({.+})' + ], webpage, 'app config'), None)['cxApiParams'] + self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') + class VRVIE(VRVBaseIE): IE_NAME = 'vrv' @@ -86,6 +103,22 @@ class VRVIE(VRVBaseIE): 'skip_download': True, }, }] + _NETRC_MACHINE = 'vrv' + + def _real_initialize(self): + super(VRVIE, self)._real_initialize() + + email, password = self._get_login_info() + if email is None: + return + + token_credentials = self._call_api( + 'authenticate/by:credentials', None, 'Token Credentials', data={ + 'email': email, + 'password': password, + }) + self._TOKEN = token_credentials['oauth_token'] + self._TOKEN_SECRET = token_credentials['oauth_token_secret'] def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): if not url or stream_format not in ('hls', 'dash'): @@ -116,28 +149,16 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, - headers=self.geo_verification_headers()) - media_resource = self._parse_json(self._search_regex( - [ - r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:|;)', - r'window\.__INITIAL_STATE__\s*=\s*({.+})' - ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} - video_data = media_resource.get('json') - if not video_data: - self._set_api_params(webpage, video_id) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + episode_path = self._get_cms_resource( + 'cms:/episodes/' + video_id, video_id) + video_data = self._call_cms(episode_path, video_id, 'video') title = video_data['title'] - streams_json = media_resource.get('streams', {}).get('json', {}) - if not streams_json: - self._set_api_params(webpage, video_id) - streams_path = video_data['__links__']['streams']['href'] - streams_json = self._call_cms(streams_path, video_id, 'streams') + streams_path = video_data['__links__'].get('streams', {}).get('href') + if not streams_path: + self.raise_login_required() + streams_json = self._call_cms(streams_path, video_id, 'streams') audio_locale = streams_json.get('audio_locale') formats = [] @@ -202,11 +223,7 @@ class VRVSeriesIE(VRVBaseIE): def _real_extract(self, url): series_id = self._match_id(url) - webpage = self._download_webpage( - url, series_id, - headers=self.geo_verification_headers()) - self._set_api_params(webpage, series_id) seasons_path = self._get_cms_resource( 'cms:/seasons?series_id=' + series_id, series_id) seasons_data = self._call_cms(seasons_path, series_id, 'seasons') From 503b604a316837b9dd6ef32045e4e9bbfb6a1363 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 22 Jan 2019 18:21:37 +0100 Subject: [PATCH 024/785] [vrv] fix oauth signing for python 2(#14307) --- youtube_dl/extractor/vrv.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 014513051..6c060ae76 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -32,14 +32,14 @@ class VRVBaseIE(InfoExtractor): def _call_api(self, path, video_id, note, data=None): # https://tools.ietf.org/html/rfc5849#section-3 base_url = self._API_DOMAIN + '/core/' + path - query = { - 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], - 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), - 'oauth_signature_method': 'HMAC-SHA1', - 'oauth_timestamp': int(time.time()), - } + query = [ + ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), + ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), + ('oauth_signature_method', 'HMAC-SHA1'), + ('oauth_timestamp', int(time.time())), + ] if self._TOKEN: - query['oauth_token'] = self._TOKEN + query.append(('oauth_token', self._TOKEN)) encoded_query = compat_urllib_parse_urlencode(query) headers = self.geo_verification_headers() if data: From 278d061a0c5eae20963c0a6df4b9b13fd1537186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Jan 2019 03:51:29 +0700 Subject: [PATCH 025/785] [pornhub] Bypass scrape detection (closes #5930) --- youtube_dl/extractor/pornhub.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e377de196..f5f3e6593 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -10,7 +10,9 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_str, + compat_urllib_request, ) +from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, @@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r']+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + @staticmethod def _extract_urls(webpage): return re.findall( From 6510a3aa971c00525969040ad654249c0c73f125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Jan 2019 03:55:41 +0700 Subject: [PATCH 026/785] [crunchyroll] Extend _VALID_URL (closes #18955) --- youtube_dl/extractor/crunchyroll.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4a68d092b..5e2cbe41d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -144,7 +144,7 @@ class CrunchyrollBaseIE(InfoExtractor): class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): IE_NAME = 'crunchyroll' - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { @@ -269,6 +269,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): }, { 'url': 'http://www.crunchyroll.com/media-723735', 'only_matching': True, + }, { + 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921', + 'only_matching': True, }] _FORMAT_IDS = { From 71a1f61700789fb0d61fc6ad9681b6f0899d2f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Jan 2019 04:12:06 +0700 Subject: [PATCH 027/785] [pornhub] Apply scrape detection bypass for all extractors --- youtube_dl/extractor/pornhub.py | 46 +++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index f5f3e6593..be93d5d48 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -24,7 +24,29 @@ from ..utils import ( ) -class PornHubIE(InfoExtractor): +class PornHubBaseIE(InfoExtractor): + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r']+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + +class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' _VALID_URL = r'''(?x) https?:// @@ -128,26 +150,6 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] - def _download_webpage_handle(self, *args, **kwargs): - def dl(*args, **kwargs): - return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs) - - webpage, urlh = dl(*args, **kwargs) - - if any(re.search(p, webpage) for p in ( - r']+\bonload=["\']go\(\)', - r'document\.cookie\s*=\s*["\']RNKEY=', - r'document\.location\.reload\(true\)')): - url_or_request = args[0] - url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) - else url_or_request) - phantom = PhantomJSwrapper(self, required_version='2.0') - phantom.get(url, html=webpage) - webpage, urlh = dl(*args, **kwargs) - - return webpage, urlh - @staticmethod def _extract_urls(webpage): return re.findall( @@ -329,7 +331,7 @@ class PornHubIE(InfoExtractor): } -class PornHubPlaylistBaseIE(InfoExtractor): +class PornHubPlaylistBaseIE(PornHubBaseIE): def _extract_entries(self, webpage, host): # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see From 0670bdd8f2bca39fdeadc63e1cd53b5d5b3e638c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Jan 2019 04:43:55 +0700 Subject: [PATCH 028/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 902301765..687796a0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version + +Core +* [utils] Fix urljoin for paths with non-http(s) schemes +* [extractor/common] Improve jwplayer relative URL handling (#18892) ++ [YoutubeDL] Add negation support for string comparisons in format selection + expressions (#18600, #18805) +* [extractor/common] Improve HLS video-only format detection (#18923) + +Extractors +* [crunchyroll] Extend URL regular expression (#18955) +* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722, + #17197, #18338 #18842, #18899) ++ [vrv] Add support for authentication (#14307) +* [videomore:season] Fix extraction +* [videomore] Improve extraction (#18908) ++ [tnaflix] Pass Referer in metadata request (#18925) +* [radiocanada] Relax DRM check (#18608, #18609) +* [vimeo] Fix video password verification for videos protected by + Referer HTTP header ++ [hketv] Add support for hkedcity.net (#18696) ++ [streamango] Add support for fruithosts.net (#18710) ++ [instagram] Add support for tags (#18757) ++ [odnoklassniki] Detect paid videos (#18876) +* [ted] Correct acodec for HTTP formats (#18923) +* [cartoonnetwork] Fix extraction (#15664, #17224) +* [vimeo] Fix extraction for password protected player URLs (#18889) + + version 2019.01.17 Extractors From 435e382423f860aca82a58d7c3db58cbfa242b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Jan 2019 04:46:55 +0700 Subject: [PATCH 029/785] release 2019.01.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 841bca914..db3ebaeed 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.17 +[debug] youtube-dl version 2019.01.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 687796a0e..55ad44315 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.01.23 Core * [utils] Fix urljoin for paths with non-http(s) schemes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c01409419..d759d0273 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -361,6 +361,7 @@ - **hitbox** - **hitbox:live** - **HitRecord** + - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - **HornBunny** - **HotNewHipHop** - **hotstar** @@ -386,6 +387,7 @@ - **IndavideoEmbed** - **InfoQ** - **Instagram** + - **instagram:tag**: Instagram hashtag search - **instagram:user**: Instagram user profile - **Internazionale** - **InternetVideoArchive** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea3f62928..d77949eed 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.17' +__version__ = '2019.01.23' From e118a8794ffe5a3a414afd489726f34d753b0b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Jan 2019 01:34:41 +0700 Subject: [PATCH 030/785] [YoutubeDL] Fix typo in string negation implementation and add more tests (closes #18961) --- test/test_YoutubeDL.py | 30 +++++++++++++++++++++++++++--- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index df8994b84..1d7452744 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -242,6 +242,7 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_string_ops(self): formats = [ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, ] info_dict = _make_result(formats) @@ -253,6 +254,11 @@ class TestFormatSelection(unittest.TestCase): # does not equal (!=) ydl = YDL({'format': '[format_id!=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # starts with (^=) @@ -262,7 +268,12 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'abc-cba') # does not start with (!^=) - ydl = YDL({'format': '[format_id!^=abc-cba]'}) + ydl = YDL({'format': '[format_id!^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # ends with ($=) @@ -272,16 +283,29 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'abc-cba') # does not end with (!$=) - ydl = YDL({'format': '[format_id!$=abc-cba]'}) + ydl = YDL({'format': '[format_id!$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # contains (*=) - ydl = YDL({'format': '[format_id*=-]'}) + ydl = YDL({'format': '[format_id*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + ydl = YDL({'format': '[format_id!*=-]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a827414dc..80ed8d7e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1078,7 +1078,7 @@ class YoutubeDL(object): comparison_value = m.group('value') str_op = STR_OPERATORS[m.group('op')] if m.group('negation'): - op = lambda attr, value: not str_op + op = lambda attr, value: not str_op(attr, value) else: op = str_op From 7d311586eda9eae9430da3f6d18932d79127daa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Jan 2019 01:44:09 +0700 Subject: [PATCH 031/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 55ad44315..a28c6e951 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [YoutubeDL] Fix negation for string operators in format selection (#18961) + + version 2019.01.23 Core From a1e171233d86a064865353cc820969c10cb1f251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Jan 2019 01:46:23 +0700 Subject: [PATCH 032/785] release 2019.01.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index db3ebaeed..63aefe013 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.23 +[debug] youtube-dl version 2019.01.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a28c6e951..1fda747bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.01.24 Core * [YoutubeDL] Fix negation for string operators in format selection (#18961) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d77949eed..18c1f8d4c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.23' +__version__ = '2019.01.24' From 9713d1d1e0a7eff5c1b9873a2f4f054111a568ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Jan 2019 02:30:12 +0700 Subject: [PATCH 033/785] [openload] Add support for oload.club (closes #18969) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index cf51e4770..b713e78b8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club) ) )/ (?:f|embed)/ @@ -334,6 +334,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.fun/f/gb6G1H4sHXY', 'only_matching': True, + }, { + 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 118afcf52ff23726e5f0c436083710f5c63230fa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 23 Jan 2019 22:16:21 +0100 Subject: [PATCH 034/785] [go] fix adobe pass requests for Disney Now(closes #18901) --- youtube_dl/extractor/go.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index ec9dd6e3a..206d89e82 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -25,15 +25,15 @@ class GoIE(AdobePassIE): }, 'watchdisneychannel': { 'brand': '004', - 'requestor_id': 'Disney', + 'resource_id': 'Disney', }, 'watchdisneyjunior': { 'brand': '008', - 'requestor_id': 'DisneyJunior', + 'resource_id': 'DisneyJunior', }, 'watchdisneyxd': { 'brand': '009', - 'requestor_id': 'DisneyXD', + 'resource_id': 'DisneyXD', } } _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ @@ -130,8 +130,8 @@ class GoIE(AdobePassIE): 'device': '001', } if video_data.get('accesslevel') == '1': - requestor_id = site_info['requestor_id'] - resource = self._get_mvpd_resource( + requestor_id = site_info.get('requestor_id', 'DisneyChannels') + resource = site_info.get('resource_id') or self._get_mvpd_resource( requestor_id, title, video_id, None) auth = self._extract_mvpd_auth( url, video_id, requestor_id, resource) From eb35b163adf61f8ff0ee6c504e98bc94db16e705 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 24 Jan 2019 20:23:04 +0100 Subject: [PATCH 035/785] [postprocessor/ffmpeg] fallback to ffmpeg/avconv for audio codec detection(closes #681) --- youtube_dl/postprocessor/ffmpeg.py | 53 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 39a905380..b952b0970 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -9,9 +9,6 @@ import re from .common import AudioConversionError, PostProcessor -from ..compat import ( - compat_subprocess_get_DEVNULL, -) from ..utils import ( encodeArgument, encodeFilename, @@ -165,27 +162,45 @@ class FFmpegPostProcessor(PostProcessor): return self._paths[self.probe_basename] def get_audio_codec(self, path): - if not self.probe_available: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + if not self.probe_available and not self.available: + raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.') try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self.probe_available: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams')] + else: + cmd = [ + encodeFilename(self.executable, True), + encodeArgument('-i')] + cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: + self._downloader.to_screen( + '[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen( + cmd, stderr=subprocess.PIPE, + stdout=subprocess.PIPE, stdin=subprocess.PIPE) + stdout_data, stderr_data = handle.communicate() + expected_ret = 0 if self.probe_available else 1 + if handle.wait() != expected_ret: return None except (IOError, OSError): return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec + output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore') + if self.probe_available: + audio_codec = None + for line in output.split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + else: + # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME + mobj = re.search( + r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)', + output) + if mobj: + return mobj.group(1) return None def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): From 0eba178fce80923515f4a9ac411e46648a19d78c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Jan 2019 04:04:58 +0700 Subject: [PATCH 036/785] [nhk] Extend _VALID_URL (closes #18968) --- youtube_dl/extractor/nhk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 5c8cd76dc..d4acbcc3e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -5,8 +5,8 @@ from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P[^/]+/[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/(?:vod|ondemand)/(?P[^/]+/[^/?#&]+)' + _TESTS = [{ # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', @@ -19,7 +19,10 @@ class NhkVodIE(InfoExtractor): 'episode': 'The Kimono as Global Fashion', }, 'skip': 'Videos available only for a limited period of time', - } + }, { + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', + 'only_matching': True, + }] _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): From 1602a240a7742f9e0a02b3f0effd215d00d859f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Jan 2019 04:16:49 +0700 Subject: [PATCH 037/785] [drtv] Fix extraction (closes #18989) --- youtube_dl/extractor/drtv.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index f757745ba..8d63ca433 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -77,10 +77,9 @@ class DRTVIE(InfoExtractor): r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') - programcard = self._download_json( - 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, - video_id, 'Downloading video JSON') - data = programcard['Data'][0] + data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id, + video_id, 'Downloading video JSON', query={'expanded': 'true'}) title = remove_end(self._og_search_title( webpage, default=None), ' | TV | DR') or data['Title'] @@ -97,7 +96,7 @@ class DRTVIE(InfoExtractor): formats = [] subtitles = {} - for asset in data['Assets']: + for asset in [data['PrimaryAsset']]: kind = asset.get('Kind') if kind == 'Image': thumbnail = asset.get('Uri') From ae18d58297c16a300a89693360efd19be4a97e92 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 25 Jan 2019 11:01:27 +0100 Subject: [PATCH 038/785] [usatoday] fix extraction for videos with custom brightcove partner id(closes #18990) --- youtube_dl/extractor/usatoday.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py index e5678dc78..b2103448d 100644 --- a/youtube_dl/extractor/usatoday.py +++ b/youtube_dl/extractor/usatoday.py @@ -3,21 +3,23 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, get_element_by_attribute, parse_duration, + try_get, update_url_query, - ExtractorError, ) from ..compat import compat_str class USATodayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P[^?/#]+)' - _TEST = { + _TESTS = [{ + # Brightcove Partner ID = 29906170001 'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/', - 'md5': '4d40974481fa3475f8bccfd20c5361f8', + 'md5': '033587d2529dc3411a1ab3644c3b8827', 'info_dict': { - 'id': '81729424', + 'id': '4799374959001', 'ext': 'mp4', 'title': 'US, France warn Syrian regime ahead of new peace talks', 'timestamp': 1457891045, @@ -25,8 +27,20 @@ class USATodayIE(InfoExtractor): 'uploader_id': '29906170001', 'upload_date': '20160313', } - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s' + }, { + # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001 + 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/', + 'info_dict': { + 'id': '5824495846001', + 'ext': 'mp4', + 'title': 'Yellowstone more likely to crack rather than explode', + 'timestamp': 1534790612, + 'description': 'md5:3715e7927639a4f16b474e9391687c62', + 'uploader_id': '28911775001', + 'upload_date': '20180820', + } + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) @@ -35,10 +49,11 @@ class USATodayIE(InfoExtractor): if not ui_video_data: raise ExtractorError('no video on the webpage', expected=True) video_data = self._parse_json(ui_video_data, display_id) + item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {} return { '_type': 'url_transparent', - 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'], + 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']), 'id': compat_str(video_data['id']), 'title': video_data['title'], 'thumbnail': video_data.get('thumbnail'), From 252abb1e8b881aa9d3942c436711ac33235b37cd Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sat, 26 Jan 2019 15:29:19 +0700 Subject: [PATCH 039/785] [README.md] Mention more convenience extraction functions --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ba982907..c1572f771 100644 --- a/README.md +++ b/README.md @@ -1213,7 +1213,7 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` -### Use safe conversion functions +### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. @@ -1221,6 +1221,8 @@ Use `url_or_none` for safe URL processing. Use `try_get` for safe metadata extraction from parsed JSON. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples From 845333acf6280761d19f91b3e018c418d922a0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Jan 2019 04:14:54 +0700 Subject: [PATCH 040/785] [wakanim] Add extractor (closes #14374) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wakanim.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/wakanim.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 574a47e6d..2ffcffa9e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1373,6 +1373,7 @@ from .vuclip import VuClipIE from .vvvvid import VVVVIDIE from .vyborymos import VyboryMosIE from .vzaar import VzaarIE +from .wakanim import WakanimIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py new file mode 100644 index 000000000..1d588bdd6 --- /dev/null +++ b/youtube_dl/extractor/wakanim.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + merge_dicts, + urljoin, +) + + +class WakanimIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P\d+)' + _TEST = { + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', + 'info_dict': { + 'id': '2997', + 'ext': 'mp4', + 'title': 'Episode 02', + 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d', + 'series': 'The Asterisk War (OmU.)', + 'season_number': 1, + 'episode': 'Episode 02', + 'episode_number': 2, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + m3u8_url = urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'm3u8 url', + group='url')) + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + info = self._search_json_ld(webpage, video_id, default={}) + + title = self._search_regex( + (r']+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P(?:(?!\1).)+)\1', + r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, group='title') + + return merge_dicts(info, { + 'id': video_id, + 'title': title, + 'formats': formats, + }) From 458fd30f56785d514862dcb8a604a329d8e29ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 04:36:58 +0700 Subject: [PATCH 041/785] [extractor/common] Extract season in _json_ld --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95456b291..c4ea2882f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1249,7 +1249,10 @@ class InfoExtractor(object): info['title'] = episode_name part_of_season = e.get('partOfSeason') if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): - info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) + info.update({ + 'season': unescapeHTML(part_of_season.get('name')), + 'season_number': int_or_none(part_of_season.get('seasonNumber')), + }) part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) From 30cd1a5f3920d7485225a5d57b6ce41be4cde672 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Jan 2019 22:52:54 +0100 Subject: [PATCH 042/785] [wakanim] detect DRM protected videos --- youtube_dl/extractor/wakanim.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py index 1d588bdd6..f9a2395d9 100644 --- a/youtube_dl/extractor/wakanim.py +++ b/youtube_dl/extractor/wakanim.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, merge_dicts, urljoin, ) @@ -10,7 +11,7 @@ from ..utils import ( class WakanimIE(InfoExtractor): _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', 'info_dict': { 'id': '2997', @@ -26,7 +27,11 @@ class WakanimIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, - } + }, { + # DRM Protected + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -36,6 +41,12 @@ class WakanimIE(InfoExtractor): m3u8_url = urljoin(url, self._search_regex( r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url', group='url')) + # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls + encryption = self._search_regex( + r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', + m3u8_url, 'encryption', default=None) + if encryption and encryption in ('cenc', 'cbcs-aapl'): + raise ExtractorError('This video is DRM protected.', expected=True) formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', From 1fcc91663bc84a599cc613ff1fa0e4bc15f42a9e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Jan 2019 10:53:38 +0100 Subject: [PATCH 043/785] [vice] fix extraction for locked videos(closes #16248) --- youtube_dl/extractor/vice.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 538258617..8fdfd743d 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -94,7 +94,6 @@ class ViceIE(AdobePassIE): 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1', 'only_matching': True, }] - _PREPLAY_HOST = 'vms.vice' @staticmethod def _extract_urls(webpage): @@ -158,9 +157,8 @@ class ViceIE(AdobePassIE): }) try: - host = 'www.viceland' if is_locked else self._PREPLAY_HOST preplay = self._download_json( - 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id), + 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id), video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401): From bf8ebc9cfe1ae2b62baf2116f84748db03c4df7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 21:25:43 +0700 Subject: [PATCH 044/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1fda747bb..9d5c25273 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version <unreleased> + +Core ++ [extractor/common] Extract season in _json_ld +* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection + (#681) + +Extractors +* [vice] Fix extraction for locked videos (#16248) ++ [wakanim] Detect DRM protected videos ++ [wakanim] Add support for wakanim.tv (#14374) +* [usatoday] Fix extraction for videos with custom brightcove partner id + (#18990) +* [drtv] Fix extraction (#18989) +* [nhk] Extend URL regular expression (#18968) +* [go] Fix Adobe Pass requests for Disney Now (#18901) ++ [openload] Add support for oload.club (#18969) + + version 2019.01.24 Core From e71be6ee9f239308765443d49d91358fa306e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 21:28:09 +0700 Subject: [PATCH 045/785] release 2019.01.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 4 +++- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 63aefe013..f529e3f4b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.24 +[debug] youtube-dl version 2019.01.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a71b045d0..6c1739860 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -339,7 +339,7 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` -### Use safe conversion functions +### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. @@ -347,6 +347,8 @@ Use `url_or_none` for safe URL processing. Use `try_get` for safe metadata extraction from parsed JSON. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/ChangeLog b/ChangeLog index 9d5c25273..d94fe36ec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.27 Core + [extractor/common] Extract season in _json_ld diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d759d0273..6377bf815 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1069,6 +1069,7 @@ - **VVVVID** - **VyboryMos** - **Vzaar** + - **Wakanim** - **Walla** - **WalyTV** - **washingtonpost** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 18c1f8d4c..ec89cfc64 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.24' +__version__ = '2019.01.27' From 2b3afe6b0fee226bb1e61027d815df088cb40757 Mon Sep 17 00:00:00 2001 From: Andrew Udvare <audvare@gmail.com> Date: Sun, 27 Jan 2019 22:24:37 -0500 Subject: [PATCH 046/785] [postprocessor/ffmpeg] Disable "Last message repeated" messages which cause non-zero exit status --- youtube_dl/postprocessor/ffmpeg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index b952b0970..8ef03f43b 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -218,6 +218,7 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self._ffmpeg_filename_argument(path), True) ]) cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + + ['-loglevel', 'repeat+info'] + files_cmd + [encodeArgument(o) for o in opts] + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) From 7f903dd8bfbc9c2de129d5b0be23ef62d8ca3df3 Mon Sep 17 00:00:00 2001 From: Tatsh <Tatsh@users.noreply.github.com> Date: Mon, 28 Jan 2019 10:57:14 -0500 Subject: [PATCH 047/785] [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding subtitles (closes #19042) Related issue: https://trac.ffmpeg.org/ticket/6016 --- youtube_dl/postprocessor/ffmpeg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index b952b0970..fff2021ff 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -407,6 +407,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', + # Don't copy Apple TV chapters track, bin_data (see #19042, #19024, + # https://trac.ffmpeg.org/ticket/6016) + '-map', '-0:d', ] if information['ext'] == 'mp4': opts += ['-c:s', 'mov_text'] From 61ff92e11ea876532697451b1ed727f42274b109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Jan 2019 01:59:56 +0700 Subject: [PATCH 048/785] [postprocessor/ffmpeg] Wrap loglevel args in encodeArgument --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 33dbcad9f..88b9ae9be 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -218,7 +218,7 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self._ffmpeg_filename_argument(path), True) ]) cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + - ['-loglevel', 'repeat+info'] + + [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + files_cmd + [encodeArgument(o) for o in opts] + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) From a81daba2311cb4d6c5bc7e62b47438a78aa5c10f Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Mon, 28 Jan 2019 20:20:46 +0100 Subject: [PATCH 049/785] [zattoo] Add support for tv.salt.ch --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/zattoo.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2ffcffa9e..9d776ff45 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1497,6 +1497,7 @@ from .zattoo import ( QuantumTVIE, QuicklineIE, QuicklineLiveIE, + SaltTVIE, SAKTVIE, VTXTVIE, WalyTVIE, diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 896276301..ee514666b 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -420,3 +420,14 @@ class EinsUndEinsTVIE(ZattooIE): 'url': 'https://www.1und1.tv/watch/abc/123-abc', 'only_matching': True, }] + + +class SaltTVIE(ZattooIE): + _NETRC_MACHINE = 'salttv' + _HOST = 'tv.salt.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://tv.salt.ch/watch/abc/123-abc', + 'only_matching': True, + }] From 41c2c254d3c30afde395e8abbe0ced2c53485a78 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Jan 2019 22:39:08 +0100 Subject: [PATCH 050/785] [fox] fix extraction for free videos(#19060) --- youtube_dl/extractor/fox.py | 46 ++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index b1c91f095..2d6c97ec9 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,10 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -# import json -# import uuid +import json +import uuid from .adobepass import AdobePassIE +from ..compat import compat_str from ..utils import ( int_or_none, parse_age_limit, @@ -47,38 +48,31 @@ class FOXIE(AdobePassIE): 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', 'only_matching': True, }] - # _access_token = None + _access_token = None - # def _call_api(self, path, video_id, data=None): - # headers = { - # 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', - # } - # if self._access_token: - # headers['Authorization'] = 'Bearer ' + self._access_token - # return self._download_json( - # 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers) + def _call_api(self, path, video_id, data=None): + headers = { + 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + } + if self._access_token: + headers['Authorization'] = 'Bearer ' + self._access_token + return self._download_json( + 'https://api2.fox.com/v2.0/' + path, + video_id, data=data, headers=headers) - # def _real_initialize(self): - # self._access_token = self._call_api( - # 'login', None, json.dumps({ - # 'deviceId': compat_str(uuid.uuid4()), - # }).encode())['accessToken'] + def _real_initialize(self): + self._access_token = self._call_api( + 'login', None, json.dumps({ + 'deviceId': compat_str(uuid.uuid4()), + }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) - video = self._download_json( - 'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id, - video_id, headers={ - 'apikey': 'abdcbed02c124d393b39e818a4312055', - 'Content-Type': 'application/json', - 'Referer': url, - }) - # video = self._call_api('vodplayer/' + video_id, video_id) + video = self._call_api('vodplayer/' + video_id, video_id) title = video['name'] - release_url = video['videoRelease']['url'] - # release_url = video['url'] + release_url = video['url'] data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} From 6df196f32e68ec22bd854c4d779b9d94e04e63b2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 29 Jan 2019 00:31:49 +0100 Subject: [PATCH 051/785] [fox] add support for locked videos using cookies(closes #19060) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/fox.py | 50 +++++++++++----------- youtube_dl/extractor/nationalgeographic.py | 22 ++++++++++ 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d776ff45..b40be42e6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -692,7 +692,10 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .nationalgeographic import NationalGeographicVideoIE +from .nationalgeographic import ( + NationalGeographicVideoIE, + NationalGeographicTVIE, +) from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 2d6c97ec9..568656542 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -5,19 +5,23 @@ import json import uuid from .adobepass import AdobePassIE -from ..compat import compat_str +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urllib_parse_unquote, +) from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_duration, try_get, unified_timestamp, - update_url_query, ) class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)' + _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' _TESTS = [{ # clip 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', @@ -32,6 +36,7 @@ class FOXIE(AdobePassIE): 'upload_date': '20170901', 'creator': 'FOX', 'series': 'Gotham', + 'age_limit': 14, }, 'params': { 'skip_download': True, @@ -44,15 +49,14 @@ class FOXIE(AdobePassIE): # episode, geo-restricted, tv provided required 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, - }, { - 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', - 'only_matching': True, }] + _HOME_PAGE_URL = 'https://www.fox.com/' + _API_KEY = 'abdcbed02c124d393b39e818a4312055' _access_token = None def _call_api(self, path, video_id, data=None): headers = { - 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + 'X-Api-Key': self._API_KEY, } if self._access_token: headers['Authorization'] = 'Bearer ' + self._access_token @@ -61,10 +65,16 @@ class FOXIE(AdobePassIE): video_id, data=data, headers=headers) def _real_initialize(self): - self._access_token = self._call_api( - 'login', None, json.dumps({ - 'deviceId': compat_str(uuid.uuid4()), - }).encode())['accessToken'] + if not self._access_token: + mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth') + if mvpd_auth: + self._access_token = (self._parse_json(compat_urllib_parse_unquote( + mvpd_auth.value), None, fatal=False) or {}).get('accessToken') + if not self._access_token: + self._access_token = self._call_api( + 'login', None, json.dumps({ + 'deviceId': compat_str(uuid.uuid4()), + }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) @@ -73,25 +83,15 @@ class FOXIE(AdobePassIE): title = video['name'] release_url = video['url'] - - data = try_get( - video, lambda x: x['trackingData']['properties'], dict) or {} - - rating = video.get('contentRating') - if data.get('authRequired'): - resource = self._get_mvpd_resource( - 'fbc-fox', title, video.get('guid'), rating) - release_url = update_url_query( - release_url, { - 'auth': self._extract_mvpd_auth( - url, video_id, 'fbc-fox', resource) - }) m3u8_url = self._download_json(release_url, video_id)['playURL'] formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) + data = try_get( + video, lambda x: x['trackingData']['properties'], dict) or {} + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( video.get('duration')) or parse_duration(video.get('duration')) timestamp = unified_timestamp(video.get('datePublished')) @@ -117,7 +117,7 @@ class FOXIE(AdobePassIE): 'description': video.get('description'), 'duration': duration, 'timestamp': timestamp, - 'age_limit': parse_age_limit(rating), + 'age_limit': parse_age_limit(video.get('contentRating')), 'creator': creator, 'series': series, 'season_number': int_or_none(video.get('seasonNumber')), diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 165964ca0..ee12e2b47 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .fox import FOXIE from ..utils import ( smuggle_url, url_basename, @@ -58,3 +59,24 @@ class NationalGeographicVideoIE(InfoExtractor): {'force_smil_url': True}), 'id': guid, } + + +class NationalGeographicTVIE(FOXIE): + _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)' + _TESTS = [{ + 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/', + 'info_dict': { + 'id': '6a875e6e734b479beda26438c9f21138', + 'ext': 'mp4', + 'title': 'Why Nat Geo? Valley of the Boom', + 'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.', + 'timestamp': 1542662458, + 'upload_date': '20181119', + 'age_limit': 14, + }, + 'params': { + 'skip_download': True, + }, + }] + _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/' + _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd' From a2d821d7112fb1423f99ddf309a843c80cc3be2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Jan 2019 23:33:09 +0700 Subject: [PATCH 052/785] [drtv] Improve extraction (closes #19039) + Add support for EncryptedUri videos + Extract more metadata * Fix subtitles extraction --- youtube_dl/extractor/drtv.py | 133 +++++++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 8d63ca433..c5f211128 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,15 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals +import binascii +import hashlib +import re + + from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import compat_urllib_parse_unquote from ..utils import ( + bytes_to_intlist, ExtractorError, int_or_none, + intlist_to_bytes, float_or_none, mimetype2ext, - parse_iso8601, - remove_end, + str_or_none, + unified_timestamp, update_url_query, + url_or_none, ) @@ -20,23 +30,31 @@ class DRTVIE(InfoExtractor): IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', - 'md5': '7ae17b4e18eb5d29212f424a7511c184', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', 'title': 'Klassen - Dårlig taber (10)', 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', - 'timestamp': 1471991907, - 'upload_date': '20160823', + 'timestamp': 1539085800, + 'upload_date': '20181009', 'duration': 606.84, + 'series': 'Klassen', + 'season': 'Klassen I', + 'season_number': 1, + 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b', + 'episode': 'Episode 10', + 'episode_number': 10, + 'release_year': 2016, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { # embed 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', 'info_dict': { - 'id': 'christiania-pusher-street-ryddes-drdkrjpo', + 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6', 'ext': 'mp4', - 'title': 'LIVE Christianias rydning af Pusher Street er i gang', + 'title': 'christiania pusher street ryddes drdkrjpo', 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5', 'timestamp': 1472800279, 'upload_date': '20160902', @@ -45,17 +63,18 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { # with SignLanguage formats 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 'info_dict': { 'id': 'historien-om-danmark-stenalder', 'ext': 'mp4', - 'title': 'Historien om Danmark: Stenalder (1)', + 'title': 'Historien om Danmark: Stenalder', 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', - 'timestamp': 1490401996, - 'upload_date': '20170325', - 'duration': 3502.04, + 'timestamp': 1546628400, + 'upload_date': '20190104', + 'duration': 3502.56, 'formats': 'mincount:20', }, 'params': { @@ -74,19 +93,26 @@ class DRTVIE(InfoExtractor): video_id = self._search_regex( (r'data-(?:material-identifier|episode-slug)="([^"]+)"', - r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), - webpage, 'video id') + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), + webpage, 'video id', default=None) + + if not video_id: + video_id = compat_urllib_parse_unquote(self._search_regex( + r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', + webpage, 'urn')) data = self._download_json( 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id, video_id, 'Downloading video JSON', query={'expanded': 'true'}) - title = remove_end(self._og_search_title( - webpage, default=None), ' | TV | DR') or data['Title'] + title = str_or_none(data.get('Title')) or re.sub( + r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', + self._og_search_title(webpage)) description = self._og_search_description( webpage, default=None) or data.get('Description') - timestamp = parse_iso8601(data.get('CreatedTime')) + timestamp = unified_timestamp( + data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime')) thumbnail = None duration = None @@ -96,16 +122,51 @@ class DRTVIE(InfoExtractor): formats = [] subtitles = {} - for asset in [data['PrimaryAsset']]: + assets = [] + primary_asset = data.get('PrimaryAsset') + if isinstance(primary_asset, dict): + assets.append(primary_asset) + secondary_assets = data.get('SecondaryAssets') + if isinstance(secondary_assets, list): + for secondary_asset in secondary_assets: + if isinstance(secondary_asset, dict): + assets.append(secondary_asset) + + def hex_to_bytes(hex): + return binascii.a2b_hex(hex.encode('ascii')) + + def decrypt_uri(e): + n = int(e[2:10], 16) + a = e[10 + n:] + data = bytes_to_intlist(hex_to_bytes(e[10:10 + n])) + key = bytes_to_intlist(hashlib.sha256( + ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()) + iv = bytes_to_intlist(hex_to_bytes(a)) + decrypted = aes_cbc_decrypt(data, key, iv) + return intlist_to_bytes( + decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0] + + for asset in assets: kind = asset.get('Kind') if kind == 'Image': - thumbnail = asset.get('Uri') + thumbnail = url_or_none(asset.get('Uri')) elif kind in ('VideoResource', 'AudioResource'): duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) restricted_to_denmark = asset.get('RestrictedToDenmark') asset_target = asset.get('Target') for link in asset.get('Links', []): uri = link.get('Uri') + if not uri: + encrypted_uri = link.get('EncryptedUri') + if not encrypted_uri: + continue + try: + uri = decrypt_uri(encrypted_uri) + except Exception: + self.report_warning( + 'Unable to decrypt EncryptedUri', video_id) + continue + uri = url_or_none(uri) if not uri: continue target = link.get('Target') @@ -139,19 +200,22 @@ class DRTVIE(InfoExtractor): 'vcodec': 'none' if kind == 'AudioResource' else None, 'preference': preference, }) - subtitles_list = asset.get('SubtitlesList') - if isinstance(subtitles_list, list): - LANGS = { - 'Danish': 'da', - } - for subs in subtitles_list: - if not subs.get('Uri'): - continue - lang = subs.get('Language') or 'da' - subtitles.setdefault(LANGS.get(lang, lang), []).append({ - 'url': subs['Uri'], - 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' - }) + subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist') + if isinstance(subtitles_list, list): + LANGS = { + 'Danish': 'da', + } + for subs in subtitles_list: + if not isinstance(subs, dict): + continue + sub_uri = url_or_none(subs.get('Uri')) + if not sub_uri: + continue + lang = subs.get('Language') or 'da' + subtitles.setdefault(LANGS.get(lang, lang), []).append({ + 'url': sub_uri, + 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' + }) if not formats and restricted_to_denmark: self.raise_geo_restricted( @@ -169,6 +233,13 @@ class DRTVIE(InfoExtractor): 'duration': duration, 'formats': formats, 'subtitles': subtitles, + 'series': str_or_none(data.get('SeriesTitle')), + 'season': str_or_none(data.get('SeasonTitle')), + 'season_number': int_or_none(data.get('SeasonNumber')), + 'season_id': str_or_none(data.get('SeasonUrn')), + 'episode': str_or_none(data.get('EpisodeTitle')), + 'episode_number': int_or_none(data.get('EpisodeNumber')), + 'release_year': int_or_none(data.get('ProductionYear')), } From 41cff90c41006b30213c7f676bd3920a1612b717 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Mon, 28 Jan 2019 19:42:49 -0400 Subject: [PATCH 053/785] [yourporn] Fix extraction and extract duration (closes #18815, closes #18852) change cdn to cdn4 for the video_url --- youtube_dl/extractor/yourporn.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index c8dc29bd8..01e5f0c0e 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import urljoin +from ..utils import ( + parse_duration, + urljoin +) class YourPornIE(InfoExtractor): @@ -27,17 +30,21 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn3/') + video_id)[video_id]).replace('/cdn/', '/cdn4/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', default=None) or self._og_search_description(webpage)).strip() + thumbnail = self._og_search_thumbnail(webpage) + duration = parse_duration(self._search_regex(r'duration:[^0-9]*([0-9:]+)', + webpage, 'duration', default=None)) return { 'id': video_id, 'url': video_url, 'title': title, + 'duration': duration, 'thumbnail': thumbnail, 'age_limit': 18 } From 9868f1ab1853484d7a6c38cd6fa0d94a11914cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Jan 2019 23:56:42 +0700 Subject: [PATCH 054/785] [yourporn] Improve (closes #19061) --- youtube_dl/extractor/yourporn.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 01e5f0c0e..2c63f9752 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( parse_duration, - urljoin + urljoin, ) @@ -17,7 +17,11 @@ class YourPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'md5:c9f43630bd968267672651ba905a7d35', 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18 + 'duration': 165, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, }, } @@ -35,16 +39,16 @@ class YourPornIE(InfoExtractor): title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', default=None) or self._og_search_description(webpage)).strip() - thumbnail = self._og_search_thumbnail(webpage) + duration = parse_duration(self._search_regex( + r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', + default=None)) - duration = parse_duration(self._search_regex(r'duration:[^0-9]*([0-9:]+)', - webpage, 'duration', default=None)) return { 'id': video_id, 'url': video_url, 'title': title, - 'duration': duration, 'thumbnail': thumbnail, - 'age_limit': 18 + 'duration': duration, + 'age_limit': 18, } From 5496754ae4c9097f37cfd9b307261cbbca438260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 00:03:19 +0700 Subject: [PATCH 055/785] [fox] Remove unused imports --- youtube_dl/extractor/fox.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 568656542..0ffceeb7c 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -6,12 +6,10 @@ import uuid from .adobepass import AdobePassIE from ..compat import ( - compat_HTTPError, compat_str, compat_urllib_parse_unquote, ) from ..utils import ( - ExtractorError, int_or_none, parse_age_limit, parse_duration, From ca01e5f9039dd6c0d5abff5c7139f82c5d1dfba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 00:05:32 +0700 Subject: [PATCH 056/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index d94fe36ec..8f5343b23 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core +* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding + subtitles (#19024, #19042) +* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025) + +Extractors +* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061) +* [drtv] Improve extraction (#19039) + + Add support for EncryptedUri videos + + Extract more metadata + * Fix subtitles extraction ++ [fox] Add support for locked videos using cookies (#19060) +* [fox] Fix extraction for free videos (#19060) ++ [zattoo] Add support for tv.salt.ch (#19059) + + version 2019.01.27 Core From 1063b4c7073ce056f694b1690dd5d5a1a06fb347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 00:08:39 +0700 Subject: [PATCH 057/785] release 2019.01.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f529e3f4b..3944a4a38 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.27 +[debug] youtube-dl version 2019.01.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8f5343b23..745fffeaa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.30 Core * [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6377bf815..2918520c3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -546,6 +546,7 @@ - **MyVisionTV** - **n-tv.de** - **natgeo:video** + - **NationalGeographicTV** - **Naver** - **NBA** - **NBC** @@ -776,6 +777,7 @@ - **safari:api** - **safari:course**: safaribooksonline.com online courses - **SAKTV** + - **SaltTV** - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ec89cfc64..97818d0c7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.27' +__version__ = '2019.01.30' From ce52c7c111602f41d7f9c498f2915fd255ba2eab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 06:15:23 +0700 Subject: [PATCH 058/785] [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (closes #19067) --- youtube_dl/postprocessor/ffmpeg.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 88b9ae9be..5bcb00ac0 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -217,11 +217,13 @@ class FFmpegPostProcessor(PostProcessor): encodeArgument('-i'), encodeFilename(self._ffmpeg_filename_argument(path), True) ]) - cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + - [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + - files_cmd + - [encodeArgument(o) for o in opts] + - [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) + cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] + # avconv does not have repeat option + if self.basename == 'ffmpeg': + cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + cmd += (files_cmd + + [encodeArgument(o) for o in opts] + + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) if self._downloader.params.get('verbose', False): self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) From c2a0fe2ea7422c437a27c8fac57c7e865517354b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 06:17:25 +0700 Subject: [PATCH 059/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 745fffeaa..e6de6ca03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067) + + version 2019.01.30 Core From 7b0f9df23d9842ddb2a545a0ceaf594daa0e12ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 06:19:36 +0700 Subject: [PATCH 060/785] release 2019.01.30.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3944a4a38..423a08e4d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.30 +[debug] youtube-dl version 2019.01.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e6de6ca03..4872cd9fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.30.1 Core * [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 97818d0c7..be3bbdd73 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.30' +__version__ = '2019.01.30.1' From 645c4885cf38ecb244412dffda2760f4c0e72033 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 30 Jan 2019 14:43:44 +0100 Subject: [PATCH 061/785] [crackle] authorize media detail request(closes #16931) --- youtube_dl/extractor/crackle.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index f73ef6b63..49bf3a4f9 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,7 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals, division +import hashlib +import hmac import re +import time from .common import InfoExtractor from ..compat import compat_HTTPError @@ -74,13 +77,16 @@ class CrackleIE(InfoExtractor): for country in countries: try: + # Authorization generation algorithm is reverse engineered from: + # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js + media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country) + timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) + h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() media = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' - % (video_id, country), video_id, - 'Downloading media JSON as %s' % country, - 'Unable to download media JSON', query={ - 'disableProtocols': 'true', - 'format': 'json' + media_detail_url, video_id, 'Downloading media JSON as %s' % country, + 'Unable to download media JSON', headers={ + 'Accept': 'application/json', + 'Authorization': '|'.join([h, timestamp, '117', '1']), }) except ExtractorError as e: # 401 means geo restriction, trying next country From 15e832ff2a1bee42b299d3498439cf789c16fffa Mon Sep 17 00:00:00 2001 From: Batuhan's Unmaintained Account <batuhanosmantaskaya@gmail.com> Date: Wed, 30 Jan 2019 19:39:02 +0300 Subject: [PATCH 062/785] [openload] Add support for oload.info --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b713e78b8..747aa298a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info) ) )/ (?:f|embed)/ @@ -337,6 +337,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', 'only_matching': True, + }, { + 'url': 'https://oload.info/f/5NEAbI2BDSk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 9613e14a92429046b162145dfd40dee5795ca409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Jan 2019 00:15:45 +0700 Subject: [PATCH 063/785] [openload] Add support for openload.pw and oload.pw (closes #18930) --- youtube_dl/extractor/openload.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 747aa298a..a2ae25272 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -248,8 +248,8 @@ class OpenloadIE(InfoExtractor): (?P<host> (?:www\.)? (?: - openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info) + openload\.(?:co|io|link|pw)| + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw) ) )/ (?:f|embed)/ @@ -340,6 +340,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.info/f/5NEAbI2BDSk', 'only_matching': True, + }, { + 'url': 'https://openload.pw/f/WyKgK8s94N0', + 'only_matching': True, + }, { + 'url': 'https://oload.pw/f/WyKgK8s94N0', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 49fe4175ae165527a0b06b8d97cdc85d83041fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Feb 2019 01:48:10 +0700 Subject: [PATCH 064/785] [drtv] Improve preference (closes #19079) --- youtube_dl/extractor/drtv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c5f211128..0c7e350f0 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -171,10 +171,13 @@ class DRTVIE(InfoExtractor): continue target = link.get('Target') format_id = target or '' - preference = None - if asset_target in ('SpokenSubtitles', 'SignLanguage'): + if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): preference = -1 format_id += '-%s' % asset_target + elif asset_target == 'Default': + preference = 1 + else: + preference = None if target == 'HDS': f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', From 3ef2da2d21061bd44df0b0a0d27e82a365209662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 04:00:29 +0700 Subject: [PATCH 065/785] [soundcloud] Fix paged playlists extraction, add support for albums and update client id --- youtube_dl/extractor/soundcloud.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 81c81c8d5..030840fd8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -34,7 +34,7 @@ class SoundcloudIE(InfoExtractor): (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?!stations/track) (?P<uploader>[\w\d-]+)/ - (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -157,7 +157,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb' + _CLIENT_ID = 'NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK' @staticmethod def _extract_urls(webpage): @@ -368,7 +368,6 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): - _API_BASE = 'https://api.soundcloud.com' _API_V2_BASE = 'https://api-v2.soundcloud.com' def _extract_playlist(self, base_url, playlist_id, playlist_title): @@ -389,8 +388,12 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): next_href, playlist_id, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - break + + if not isinstance(collection, list): + collection = [] + + # Empty collection may be returned, in this case we proceed + # straight to next_href def resolve_permalink_url(candidates): for cand in candidates: @@ -429,7 +432,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): (?:(?:www|m)\.)?soundcloud\.com/ (?P<user>[^/]+) (?:/ - (?P<rsrc>tracks|sets|reposts|likes|spotlight) + (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight) )? /?(?:[?#].*)?$ ''' @@ -476,13 +479,17 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): 'title': 'Grynpyret (Spotlight)', }, 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/soft-cell-official/albums', + 'only_matching': True, }] _BASE_URL_MAP = { - 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE, + 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, } @@ -490,6 +497,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _TITLE_MAP = { 'all': 'All', 'tracks': 'Tracks', + 'albums': 'Albums', 'sets': 'Playlists', 'reposts': 'Reposts', 'likes': 'Likes', From b6423e6ca215e1583e013cf7b2c1faf8d3dcace7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 04:11:32 +0700 Subject: [PATCH 066/785] [soundcloud:user] Update tests --- youtube_dl/extractor/soundcloud.py | 48 ++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 030840fd8..13463ae4f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -438,40 +438,47 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): ''' IE_NAME = 'soundcloud:user' _TESTS = [{ - 'url': 'https://soundcloud.com/the-akashic-chronicler', + 'url': 'https://soundcloud.com/soft-cell-official', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (All)', + 'id': '207965082', + 'title': 'Soft Cell (All)', }, - 'playlist_mincount': 74, + 'playlist_mincount': 28, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', + 'url': 'https://soundcloud.com/soft-cell-official/tracks', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Tracks)', + 'id': '207965082', + 'title': 'Soft Cell (Tracks)', }, - 'playlist_mincount': 37, + 'playlist_mincount': 27, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/sets', + 'url': 'https://soundcloud.com/soft-cell-official/albums', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Playlists)', + 'id': '207965082', + 'title': 'Soft Cell (Albums)', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/jcv246/sets', + 'info_dict': { + 'id': '12982173', + 'title': 'Jordi / cv (Playlists)', }, 'playlist_mincount': 2, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', + 'url': 'https://soundcloud.com/jcv246/reposts', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Reposts)', + 'id': '12982173', + 'title': 'Jordi / cv (Reposts)', }, - 'playlist_mincount': 7, + 'playlist_mincount': 6, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/likes', + 'url': 'https://soundcloud.com/clalberg/likes', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Likes)', + 'id': '11817582', + 'title': 'clalberg (Likes)', }, - 'playlist_mincount': 321, + 'playlist_mincount': 5, }, { 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { @@ -479,9 +486,6 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): 'title': 'Grynpyret (Spotlight)', }, 'playlist_mincount': 1, - }, { - 'url': 'https://soundcloud.com/soft-cell-official/albums', - 'only_matching': True, }] _BASE_URL_MAP = { From e9fef7ee4e666b60bc7a757391f16e2be76f6cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 05:44:31 +0700 Subject: [PATCH 067/785] [YoutubeDL] Fallback to ie_key of matching extractor while making download archive id when no explicit ie_key is provided (#19022) --- youtube_dl/YoutubeDL.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 80ed8d7e5..c168415ce 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2060,15 +2060,21 @@ class YoutubeDL(object): self.report_warning('Unable to remove downloaded original file') def _make_archive_id(self, info_dict): + video_id = info_dict.get('id') + if not video_id: + return # Future-proof against any change in case # and backwards compatibility with prior versions - extractor = info_dict.get('extractor_key') + extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: - if 'id' in info_dict: - extractor = info_dict.get('ie_key') # key in a playlist - if extractor is None: - return None # Incomplete video information - return extractor.lower() + ' ' + info_dict['id'] + # Try to find matching extractor for the URL and take its ie_key + for ie in self._ies: + if ie.suitable(info_dict['url']): + extractor = ie.ie_key() + break + else: + return + return extractor.lower() + ' ' + video_id def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -2076,7 +2082,7 @@ class YoutubeDL(object): return False vid_id = self._make_archive_id(info_dict) - if vid_id is None: + if not vid_id: return False # Incomplete video information try: From b9bc1cff721b6f63e733c6ababeec45b92f1484b Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Fri, 1 Feb 2019 19:04:00 -0400 Subject: [PATCH 068/785] [drtuber] Extract duration --- youtube_dl/extractor/drtuber.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 5c41c8022..2baea585b 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -4,7 +4,9 @@ import re from .common import InfoExtractor from ..utils import ( + int_or_none, NO_DEFAULT, + parse_duration, str_to_int, ) @@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor): }) self._sort_formats(formats) + duration = int_or_none(video_data.get('duration')) or parse_duration( + video_data.get('duration_format')) + title = self._html_search_regex( (r'<h1[^>]+class=["\']title[^>]+>([^<]+)', r'<title>([^<]+)\s*@\s+DrTuber', @@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor): 'comment_count': comment_count, 'categories': categories, 'age_limit': self._rta_search(webpage), + 'duration': duration, } From 6cc6e0c34d0f67747be7bac91690820f47b26acb Mon Sep 17 00:00:00 2001 From: Cory Hall <corydantehall@gmail.com> Date: Thu, 31 Jan 2019 20:51:37 -0500 Subject: [PATCH 069/785] [soundcloud:pagedplaylist] Add ie and title to entries (#19022) rel: https://github.com/rg3/youtube-dl/issues/19022 --- youtube_dl/extractor/soundcloud.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 13463ae4f..1c8d3c53b 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -395,18 +395,20 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): # Empty collection may be returned, in this case we proceed # straight to next_href - def resolve_permalink_url(candidates): - for cand in candidates: + def append_url_result(entries, item): + for cand in (item, item.get('track'), item.get('playlist')): if isinstance(cand, dict): permalink_url = cand.get('permalink_url') - entry_id = self._extract_id(cand) if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id + return entries.append( + self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title'))) for e in collection: - permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) - if permalink_url: - entries.append(self.url_result(permalink_url, video_id=entry_id)) + append_url_result(entries, e) next_href = response.get('next_href') if not next_href: From 7c5307f4c4e91ef6551d70cd844b93fbdc5c3cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 23:40:06 +0700 Subject: [PATCH 070/785] [soundcloud:pagedplaylist] Improve (closes #19086) --- youtube_dl/extractor/soundcloud.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1c8d3c53b..5536e7851 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, unified_strdate, update_url_query, + url_or_none, ) @@ -395,20 +396,23 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): # Empty collection may be returned, in this case we proceed # straight to next_href - def append_url_result(entries, item): - for cand in (item, item.get('track'), item.get('playlist')): - if isinstance(cand, dict): - permalink_url = cand.get('permalink_url') - if permalink_url and permalink_url.startswith('http'): - return entries.append( - self.url_result( - permalink_url, - ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=self._extract_id(cand), - video_title=cand.get('title'))) + def resolve_entry(candidates): + for cand in candidates: + if not isinstance(cand, dict): + continue + permalink_url = url_or_none(cand.get('permalink_url')) + if not permalink_url: + continue + return self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title')) for e in collection: - append_url_result(entries, e) + entry = resolve_entry((e, e.get('track'), e.get('playlist'))) + if entry: + entries.append(entry) next_href = response.get('next_href') if not next_href: From 0efcb5a2fe0c3024d3e5affe74b3d0d416413ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Feb 2019 00:33:45 +0700 Subject: [PATCH 071/785] [vporn] Remove extractor (closes #16276) Handled by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/vporn.py | 123 ----------------------------- 2 files changed, 124 deletions(-) delete mode 100644 youtube_dl/extractor/vporn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b40be42e6..693c16e49 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1362,7 +1362,6 @@ from .voxmedia import ( VoxMediaVolumeIE, VoxMediaIE, ) -from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE from .vrv import ( diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py deleted file mode 100644 index 858ac9e71..000000000 --- a/youtube_dl/extractor/vporn.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - str_to_int, - urljoin, -) - - -class VpornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)' - _TESTS = [ - { - 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/', - 'md5': 'facf37c1b86546fa0208058546842c55', - 'info_dict': { - 'id': '497944', - 'display_id': 'violet-on-her-th-birthday', - 'ext': 'mp4', - 'title': 'Violet on her 19th birthday', - 'description': 'Violet dances in front of the camera which is sure to get you horny.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'kileyGrope', - 'categories': ['Masturbation', 'Teen'], - 'duration': 393, - 'age_limit': 18, - 'view_count': int, - }, - 'skip': 'video removed', - }, - { - 'url': 'http://www.vporn.com/female/hana-shower/523564/', - 'md5': 'ced35a4656198a1664cf2cda1575a25f', - 'info_dict': { - 'id': '523564', - 'display_id': 'hana-shower', - 'ext': 'mp4', - 'title': 'Hana Shower', - 'description': 'Hana showers at the bathroom.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Hmmmmm', - 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], - 'duration': 588, - 'age_limit': 18, - 'view_count': int, - } - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!' - if errmsg in webpage: - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) - - title = self._html_search_regex( - r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() - description = self._html_search_regex( - r'class="(?:descr|description_txt)">(.*?)</div>', - webpage, 'description', fatal=False) - thumbnail = urljoin('http://www.vporn.com', self._html_search_regex( - r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', - default=None)) - - uploader = self._html_search_regex( - r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>', - webpage, 'uploader', fatal=False) - - categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage) - - duration = parse_duration(self._search_regex( - r'Runtime:\s*</span>\s*(\d+ min \d+ sec)', - webpage, 'duration', fatal=False)) - - view_count = str_to_int(self._search_regex( - r'class="views">([\d,\.]+) [Vv]iews<', - webpage, 'view count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r"'Comments \(([\d,\.]+)\)'", - webpage, 'comment count', default=None)) - - formats = [] - - for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage): - video_url = video[1] - fmt = { - 'url': video_url, - 'format_id': video[0], - } - m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url) - if m: - fmt.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - 'vbr': int(m.group('vbr')), - }) - formats.append(fmt) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'categories': categories, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'age_limit': 18, - 'formats': formats, - } From eecf788b90fa4d49567c714f5a613fdd2b6e2507 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Feb 2019 09:10:09 +0100 Subject: [PATCH 072/785] [teachable] add support for courses.workitdaily.com (closes #18871) --- youtube_dl/extractor/teachable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 47ac95ee8..c1a9deafe 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -27,6 +27,7 @@ class TeachableBaseIE(InfoExtractor): 'market.saleshacker.com': 'saleshacker', 'learnability.org': 'learnability', 'edurila.com': 'edurila', + 'courses.workitdaily.com': 'workitdaily', } _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) From 07fbfef1c7e36b25dd7098be73ab76b87378a015 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Feb 2019 12:10:41 +0100 Subject: [PATCH 073/785] [radiocanada] switch to the new media requests(closes #19115) --- youtube_dl/extractor/radiocanada.py | 133 ++++++++-------------------- 1 file changed, 39 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 302f67d96..58e294892 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -5,14 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, - find_xpath_attr, determine_ext, + ExtractorError, int_or_none, unified_strdate, - xpath_element, - ExtractorError, - determine_protocol, unsmuggle_url, ) @@ -61,107 +57,53 @@ class RadioCanadaIE(InfoExtractor): 'only_matching': True, } ] + _GEO_COUNTRIES = ['CA'] + + def _call_api(self, path, video_id, app_code, query): + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + 'output': 'json', + }) + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, headers={ + 'Authorization': 'Client-Key 773aea60-0e80-41bb-9c7f-e6d7c3ad17fb' + }, query=query) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) app_code, video_id = re.match(self._VALID_URL, url).groups() - metadata = self._download_xml( - 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', - video_id, note='Downloading metadata XML', query={ - 'appCode': app_code, - 'idMedia': video_id, - }) + metas = self._call_api('meta/v1/index.ashx', video_id, app_code, {})['Metas'] def get_meta(name): - el = find_xpath_attr(metadata, './/Meta', 'name', name) - return el.text if el is not None else None + for meta in metas: + if meta.get('name') == name: + text = meta.get('text') + if text: + return text # protectionType does not necessarily mean the video is DRM protected (see # https://github.com/rg3/youtube-dl/pull/18609). if get_meta('protectionType'): self.report_warning('This video is probably DRM protected.') - device_types = ['ipad'] - if not smuggled_data: - device_types.append('flash') - device_types.append('android') - - formats = [] - error = None - # TODO: extract f4m formats - # f4m formats can be extracted using flashhd device_type but they produce unplayable file - for device_type in device_types: - validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' - query = { - 'appCode': app_code, - 'idMedia': video_id, - 'connectionType': 'broadband', - 'multibitrate': 'true', - 'deviceType': device_type, - } - if smuggled_data: - validation_url = 'https://services.radio-canada.ca/media/validation/v2/' - query.update(smuggled_data) - else: - query.update({ - # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction - 'paysJ391wsHjbOJwvCs26toz': 'CA', - 'bypasslock': 'NZt5K62gRqfc', - }) - v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) - v_url = xpath_text(v_data, 'url') - if not v_url: - continue - if v_url == 'null': - error = xpath_text(v_data, 'message') - continue - ext = determine_ext(v_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - v_url, video_id, f4m_id='hds', fatal=False)) - else: - ext = determine_ext(v_url) - bitrates = xpath_element(v_data, 'bitrates') - for url_e in bitrates.findall('url'): - tbr = int_or_none(url_e.get('bitrate')) - if not tbr: - continue - f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) - protocol = determine_protocol({'url': f_url}) - f = { - 'format_id': '%s-%d' % (protocol, tbr), - 'url': f_url, - 'ext': 'flv' if protocol == 'rtmp' else ext, - 'protocol': protocol, - 'width': int_or_none(url_e.get('width')), - 'height': int_or_none(url_e.get('height')), - 'tbr': tbr, - } - mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url) - if mobj: - f.update({ - 'url': mobj.group('url') + mobj.group('auth'), - 'play_path': mobj.group('playpath'), - }) - formats.append(f) - if protocol == 'rtsp': - base_url = self._search_regex( - r'rtsp://([^?]+)', f_url, 'base url', default=None) - if base_url: - base_url = 'http://' + base_url - formats.extend(self._extract_m3u8_formats( - base_url + '/playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - base_url + '/manifest.f4m', video_id, - f4m_id='hds', fatal=False)) - if not formats and error: + query = { + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'multibitrate': 'true', + } + if smuggled_data: + query.update(smuggled_data) + v_data = self._call_api('validation/v2/', video_id, app_code, query) + v_url = v_data.get('url') + if not v_url: + error = v_data['message'] + if error == "Le contenu sélectionné n'est pas disponible dans votre pays": + raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) + formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') self._sort_formats(formats) subtitles = {} @@ -189,8 +131,8 @@ class RadioCanadaIE(InfoExtractor): class RadioCanadaAudioVideoIE(InfoExtractor): 'radiocanada:audiovideo' - _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 'info_dict': { 'id': '7527184', @@ -203,7 +145,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', + 'only_matching': True, + }] def _real_extract(self, url): return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) From 70c3ee13671798d7e3b80ea9be863ef73bd08653 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Mon, 4 Feb 2019 13:06:04 -0400 Subject: [PATCH 074/785] [pornhd] Extract like count --- youtube_dl/extractor/pornhd.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index b52879c7a..a079cd32a 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -23,6 +23,7 @@ class PornHdIE(InfoExtractor): 'description': 'md5:3748420395e03e31ac96857a8f125b2b', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, + 'like_count': int, 'age_limit': 18, } }, { @@ -37,6 +38,7 @@ class PornHdIE(InfoExtractor): 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, + 'like_count': int, 'age_limit': 18, }, 'skip': 'Not available anymore', @@ -85,6 +87,11 @@ class PornHdIE(InfoExtractor): r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, 'thumbnail', fatal=False, group='url') + like_count = int_or_none(self._search_regex( + (r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes', + r'class=["\']save-count["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) + return { 'id': video_id, 'display_id': display_id, @@ -92,6 +99,7 @@ class PornHdIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'view_count': view_count, + 'like_count': like_count, 'formats': formats, 'age_limit': 18, } From 48fb963b2f9495922a4acf751608167cbc273693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Feb 2019 00:07:37 +0700 Subject: [PATCH 075/785] [pornhd] Fix formats extraction --- youtube_dl/extractor/pornhd.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index a079cd32a..27d65d4b9 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -4,9 +4,11 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, js_to_json, + urljoin, ) @@ -14,7 +16,7 @@ class PornHdIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' _TESTS = [{ 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', - 'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5', + 'md5': '87f1540746c1d32ec7a2305c12b96b25', 'info_dict': { 'id': '9864', 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', @@ -67,12 +69,14 @@ class PornHdIE(InfoExtractor): formats = [] for format_id, video_url in sources.items(): + video_url = urljoin(url, video_url) if not video_url: continue height = int_or_none(self._search_regex( r'^(\d+)[pP]', format_id, 'height', default=None)) formats.append({ 'url': video_url, + 'ext': determine_ext(video_url, 'mp4'), 'format_id': format_id, 'height': height, }) From d2d970d07ec82f648b62bff8b15ac0b57d0d0496 Mon Sep 17 00:00:00 2001 From: JChris246 <chris.401@live.com> Date: Mon, 4 Feb 2019 13:33:54 -0400 Subject: [PATCH 076/785] [pornhub] Fix tags and categories extraction (closes #13720) --- youtube_dl/extractor/pornhub.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index be93d5d48..428324ef0 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -16,7 +16,6 @@ from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, - js_to_json, orderedSet, remove_quotes, str_to_int, @@ -303,14 +302,17 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - page_params = self._parse_json(self._search_regex( - r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', - webpage, 'page parameters', group='data', default='{}'), - video_id, transform_source=js_to_json, fatal=False) - tags = categories = None - if page_params: - tags = page_params.get('tags', '').split(',') - categories = page_params.get('categories', '').split(',') + def _get_items(class_name): + div = self._search_regex( + r'<div class="' + class_name + '">([\S\s]+?)</div>', + webpage, class_name, default=None) + if div: + return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)] + else: + return None + + categories = _get_items('categoriesWrapper') + tags = _get_items('tagsWrapper') return { 'id': video_id, From 5dda1edef93d94c9a49672f905df0c49c75c5739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Feb 2019 23:06:55 +0700 Subject: [PATCH 077/785] [pornhub] Improve and simplify (closes #19135) --- youtube_dl/extractor/pornhub.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 428324ef0..641083da7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -302,17 +302,12 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - def _get_items(class_name): + def extract_list(meta_key): div = self._search_regex( - r'<div class="' + class_name + '">([\S\s]+?)</div>', - webpage, class_name, default=None) + r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' + % meta_key, webpage, meta_key, default=None) if div: - return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)] - else: - return None - - categories = _get_items('categoriesWrapper') - tags = _get_items('tagsWrapper') + return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) return { 'id': video_id, @@ -327,8 +322,8 @@ class PornHubIE(PornHubBaseIE): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, - 'tags': tags, - 'categories': categories, + 'tags': extract_list('tags'), + 'categories': extract_list('categories'), 'subtitles': subtitles, } From 8fecc7353df35f6cac305c04a4e203fb2bbb4827 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Feb 2019 13:59:12 +0100 Subject: [PATCH 078/785] [toutv] fix authentication(closes #16398)(closes #18700) --- youtube_dl/extractor/radiocanada.py | 47 +++++++++++++++++--------- youtube_dl/extractor/toutv.py | 51 +++++++++-------------------- 2 files changed, 47 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 58e294892..dd95f99f2 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -4,12 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, int_or_none, unified_strdate, - unsmuggle_url, ) @@ -58,23 +58,35 @@ class RadioCanadaIE(InfoExtractor): } ] _GEO_COUNTRIES = ['CA'] + _access_token = None + _claims = None - def _call_api(self, path, video_id, app_code, query): + def _call_api(self, path, video_id=None, app_code=None, query=None): + if not query: + query = {} query.update({ - 'appCode': app_code, - 'idMedia': video_id, + 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', 'output': 'json', }) - return self._download_json( - 'https://services.radio-canada.ca/media/' + path, video_id, headers={ - 'Authorization': 'Client-Key 773aea60-0e80-41bb-9c7f-e6d7c3ad17fb' - }, query=query) + if video_id: + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + }) + if self._access_token: + query['access_token'] = self._access_token + try: + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): + data = self._parse_json(e.cause.read().decode(), None) + error = data.get('error_description') or data['errorMessage']['text'] + raise ExtractorError(error, expected=True) + raise - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - app_code, video_id = re.match(self._VALID_URL, url).groups() - - metas = self._call_api('meta/v1/index.ashx', video_id, app_code, {})['Metas'] + def _extract_info(self, app_code, video_id): + metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] def get_meta(name): for meta in metas: @@ -93,14 +105,16 @@ class RadioCanadaIE(InfoExtractor): 'deviceType': 'ipad', 'multibitrate': 'true', } - if smuggled_data: - query.update(smuggled_data) + if self._claims: + query['claims'] = self._claims v_data = self._call_api('validation/v2/', video_id, app_code, query) v_url = v_data.get('url') if not v_url: error = v_data['message'] if error == "Le contenu sélectionné n'est pas disponible dans votre pays": raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) + if error == 'Le contenu sélectionné est disponible seulement en premium': + self.raise_login_required(error) raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') @@ -128,6 +142,9 @@ class RadioCanadaIE(InfoExtractor): 'formats': formats, } + def _real_extract(self, url): + return self._extract_info(*re.match(self._VALID_URL, url).groups()) + class RadioCanadaAudioVideoIE(InfoExtractor): 'radiocanada:audiovideo' diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 2e7876cc5..f1ab91cf2 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -3,22 +3,19 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .radiocanada import RadioCanadaIE from ..utils import ( - int_or_none, - js_to_json, - urlencode_postdata, extract_attributes, - smuggle_url, + int_or_none, + merge_dicts, + urlencode_postdata, ) -class TouTvIE(InfoExtractor): +class TouTvIE(RadioCanadaIE): _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' - _access_token = None - _claims = None _TESTS = [{ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor): email, password = self._get_login_info() if email is None: return - state = 'http://ici.tou.tv/' - webpage = self._download_webpage(state, None, 'Downloading homepage') - toutvlogin = self._parse_json(self._search_regex( - r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) - authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' login_webpage = self._download_webpage( - authorize_url, None, 'Downloading login page', query={ - 'client_id': toutvlogin['clientId'], - 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'https://services.radio-canada.ca/auth/oauth/v2/authorize', + None, 'Downloading login page', query={ + 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', + 'redirect_uri': 'https://ici.tou.tv/logincallback', 'response_type': 'token', - 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', - 'state': state, + 'scope': 'id.write media-validation.read', + 'state': '/', }) def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): @@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor): self._access_token = self._search_regex( r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', urlh.geturl(), 'access token') - self._claims = self._download_json( - 'https://services.radio-canada.ca/media/validation/v2/getClaims', - None, 'Extracting Claims', query={ - 'token': self._access_token, - 'access_token': self._access_token, - })['claims'] + self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): path = self._match_id(url) @@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor): self.report_warning('This video is probably DRM protected.', path) video_id = metadata['IdMedia'] details = metadata['Details'] - title = details['OriginalTitle'] - video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) - if self._access_token and self._claims: - video_url = smuggle_url(video_url, { - 'access_token': self._access_token, - 'claims': self._claims, - }) - return { - '_type': 'url_transparent', - 'url': video_url, + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': details.get('OriginalTitle'), 'thumbnail': details.get('ImageUrl'), 'duration': int_or_none(details.get('LengthInSeconds')), - } + }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id)) From 241c5d7d384dcb01a62702274cfbead01f537145 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Feb 2019 19:38:10 +0100 Subject: [PATCH 079/785] [trutv] fix extraction(closes #17336) --- youtube_dl/extractor/trutv.py | 84 +++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index 3a5782525..ce892c8c5 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -4,44 +4,72 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE +from ..utils import ( + int_or_none, + parse_iso8601, +) class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))' + _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))' _TEST = { - 'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html', - 'md5': '2cdc844f317579fed1a7251b087ff417', + 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', 'info_dict': { - 'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets', + 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', 'ext': 'mp4', - 'title': 'You Won\'t Believe These Sports Bets', - 'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.', - 'upload_date': '20130305', - } + 'title': 'Sunlight-Activated Flower', + 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): - path, video_id = re.match(self._VALID_URL, url).groups() - auth_required = False - if path: - data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path + series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups() + + if video_id: + path = 'episode' + display_id = video_id else: - webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';", - webpage, 'video id', default=video_id) - auth_required = self._search_regex( - r'TTV\.TVE\.authRequired\s*=\s*(true|false);', - webpage, 'auth required', default='false') == 'true' - data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id - return self._extract_cvp_info( - data_src, path, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', - 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do', - }, - }, { + path = 'series/clip' + display_id = clip_slug + + data = self._download_json( + 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id), + display_id) + video_data = data['episode'] if video_id else data['info'] + media_id = video_data['mediaId'] + title = video_data['title'].strip() + + info = self._extract_ngtv_info( + media_id, {}, { 'url': url, 'site_name': 'truTV', - 'auth_required': auth_required, + 'auth_required': video_data.get('isAuthRequired'), }) + + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('srcUrl') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + }) + + info.update({ + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnails': thumbnails, + 'timestamp': parse_iso8601(video_data.get('publicationDate')), + 'series': video_data.get('showTitle'), + 'season_number': int_or_none(video_data.get('seasonNum')), + 'episode_number': int_or_none(video_data.get('episodeNum')), + }) + return info From f06a1cabe8b3831b5f2ff3bc27f5e7336c597e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 Feb 2019 23:57:58 +0700 Subject: [PATCH 080/785] [spankbang] Extend _VALID_URL --- youtube_dl/extractor/spankbang.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 67500b69c..e48cfff71 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -12,7 +12,7 @@ from ..utils import ( class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -41,13 +41,22 @@ class SpankBangIE(InfoExtractor): # 4k 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', 'only_matching': True, + }, { + 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', + 'only_matching': True, + }, { + 'url': 'https://m.spankbang.com/3vvn/play', + 'only_matching': True, + }, { + 'url': 'https://spankbang.com/2y3td/embed/', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, headers={ - 'Cookie': 'country=US' - }) + webpage = self._download_webpage( + url.replace('/%s/embed' % video_id, '/%s/video' % video_id), + video_id, headers={'Cookie': 'country=US'}) if re.search(r'<[^>]+\bid=["\']video_removed', webpage): raise ExtractorError( From 49bd993fd9adbcf6b5c11a7ec11c2b4a552e49c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 00:09:50 +0700 Subject: [PATCH 081/785] [spankbang:playlist] Add extractor (closes #19145) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/spankbang.py | 33 +++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 693c16e49..d7685cd87 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1058,7 +1058,10 @@ from .southpark import ( SouthParkEsIE, SouthParkNlIE ) -from .spankbang import SpankBangIE +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index e48cfff71..fbe6ef31a 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + orderedSet, parse_duration, parse_resolution, str_to_int, @@ -12,7 +13,7 @@ from ..utils import ( class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)' + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -103,3 +104,33 @@ class SpankBangIE(InfoExtractor): 'formats': formats, 'age_limit': age_limit, } + + +class SpankBangPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+' + _TEST = { + 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', + 'info_dict': { + 'id': 'ug0k', + 'title': 'Big Ass Titties', + }, + 'playlist_mincount': 50, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage( + url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) + + entries = [self.url_result( + 'https://spankbang.com/%s/video' % video_id, + ie=SpankBangIE.ie_key(), video_id=video_id) + for video_id in orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))] + + title = self._html_search_regex( + r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title', + fatal=False) + + return self.playlist_result(entries, playlist_id, title) From 22f5f5c6fcd2d7f0c9f1ff3019fe0b957b771f44 Mon Sep 17 00:00:00 2001 From: Ales Jirasek <schunkac@gmail.com> Date: Wed, 10 Oct 2018 23:47:21 +0200 Subject: [PATCH 082/785] [malltv] Add extractor (closes #18058) --- test/test_InfoExtractor.py | 2 ++ youtube_dl/extractor/common.py | 2 +- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/malltv.py | 58 ++++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 5 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/malltv.py diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 75fa0bbb7..f0aa8466b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase): <meta content='Foo' property=og:foobar> <meta name="og:test1" content='foo > < bar'/> <meta name="og:test2" content="foo >//< bar"/> + <meta property=og-test3 content='Ill-formatted opengraph'/> ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') @@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c4ea2882f..c3b0586a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,7 +1058,7 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' - property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' + property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)' % {'prop': re.escape(prop)}) template = r'<meta[^>]+?%s[^>]+?%s' return [ diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d7685cd87..f212b5116 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -619,6 +619,7 @@ from .mailru import ( MailRuMusicSearchIE, ) from .makertv import MakerTVIE +from .malltv import MallTVIE from .mangomolo import ( MangomoloVideoIE, MangomoloLiveIE, diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py new file mode 100644 index 000000000..7e0876ecc --- /dev/null +++ b/youtube_dl/extractor/malltv.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import parse_duration, merge_dicts + + +class MallTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:.+/)?(?P<id>.+)(?:\?.*$|$)' + _TESTS = [ + { + 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'md5': '9ced0de056534410837077e23bfba796', + 'info_dict': { + 'id': 't0zzt0', + 'ext': 'mp4', + 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', + 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', + 'upload_date': '20181007', + 'timestamp': 1538870400 + } + }, + { + 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'md5': '9ced0de056534410837077e23bfba796', + 'only_matching': 1, + 'info_dict': { + 'id': 't0zzt0', + 'ext': 'mp4', + 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', + 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', + 'upload_date': '20181007', + 'timestamp': 1538870400 + } + } + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + src_id_regex = r'(?P<src><source src=([\"\'])?.+?/(?P<id>\w{6,}?)/index)(?P<after>\1?[^>]*?>)' + video_id = self._search_regex(src_id_regex, webpage, 'ID', + group='id') + info = self._search_json_ld(webpage, video_id, default={}) + html = re.sub(src_id_regex, r'\g<src>.m3u8\g<after>', webpage) + media = self._parse_html5_media_entries(url, html, video_id) + thumbnail = info.get('thumbnail', self._og_search_thumbnail(webpage)) + duration = parse_duration(info.get('duration')) + result = { + 'id': video_id, + 'title': info.get('title', self._og_search_title(webpage)), + 'description': self._og_search_description(webpage) + } + result.update({'thumbnail': thumbnail}) + result.update({'duration': duration}) + + return merge_dicts(media[0], info, result) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d0cb65814..f5a0bb4b0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([ ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" -JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' +JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' def preferredencoding(): From 4de3cb883c61eeec56d8d271375a0624d481ad37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 00:43:10 +0700 Subject: [PATCH 083/785] [malltv] Fix issues and simplify (closes #17856) --- youtube_dl/extractor/malltv.py | 85 ++++++++++++++++------------------ 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py index 7e0876ecc..e13c2e11a 100644 --- a/youtube_dl/extractor/malltv.py +++ b/youtube_dl/extractor/malltv.py @@ -2,57 +2,52 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor -from ..utils import parse_duration, merge_dicts +from ..utils import merge_dicts class MallTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:.+/)?(?P<id>.+)(?:\?.*$|$)' - _TESTS = [ - { - 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', - 'md5': '9ced0de056534410837077e23bfba796', - 'info_dict': { - 'id': 't0zzt0', - 'ext': 'mp4', - 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', - 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', - 'upload_date': '20181007', - 'timestamp': 1538870400 - } - }, - { - 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', - 'md5': '9ced0de056534410837077e23bfba796', - 'only_matching': 1, - 'info_dict': { - 'id': 't0zzt0', - 'ext': 'mp4', - 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', - 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', - 'upload_date': '20181007', - 'timestamp': 1538870400 - } + _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'md5': '1c4a37f080e1f3023103a7b43458e518', + 'info_dict': { + 'id': 't0zzt0', + 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'ext': 'mp4', + 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', + 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb', + 'duration': 216, + 'timestamp': 1538870400, + 'upload_date': '20181007', + 'view_count': int, } - ] + }, { + 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - src_id_regex = r'(?P<src><source src=([\"\'])?.+?/(?P<id>\w{6,}?)/index)(?P<after>\1?[^>]*?>)' - video_id = self._search_regex(src_id_regex, webpage, 'ID', - group='id') - info = self._search_json_ld(webpage, video_id, default={}) - html = re.sub(src_id_regex, r'\g<src>.m3u8\g<after>', webpage) - media = self._parse_html5_media_entries(url, html, video_id) - thumbnail = info.get('thumbnail', self._og_search_thumbnail(webpage)) - duration = parse_duration(info.get('duration')) - result = { - 'id': video_id, - 'title': info.get('title', self._og_search_title(webpage)), - 'description': self._og_search_description(webpage) - } - result.update({'thumbnail': thumbnail}) - result.update({'duration': duration}) - return merge_dicts(media[0], info, result) + webpage = self._download_webpage( + url, display_id, headers=self.geo_verification_headers()) + + SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b' + video_id = self._search_regex( + SOURCE_RE, webpage, 'video id', group='id') + + media = self._parse_html5_media_entries( + url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id, + m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0] + + info = self._search_json_ld(webpage, video_id, default={}) + + return merge_dicts(media, info, { + 'id': video_id, + 'display_id': display_id, + 'title': self._og_search_title(webpage, default=None) or display_id, + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + }) From 1211bb6dace4773f67bbf46b8944317679573a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 01:08:48 +0700 Subject: [PATCH 084/785] [YoutubeDL] Improve _make_archive_id (closes #19149) --- youtube_dl/YoutubeDL.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c168415ce..bc9fc270c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,6 +82,7 @@ from .utils import ( sanitize_url, sanitized_Request, std_headers, + str_or_none, subtitles_filename, UnavailableVideoError, url_basename, @@ -2067,9 +2068,12 @@ class YoutubeDL(object): # and backwards compatibility with prior versions extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: + url = str_or_none(info_dict.get('url')) + if not url: + return # Try to find matching extractor for the URL and take its ie_key for ie in self._ies: - if ie.suitable(info_dict['url']): + if ie.suitable(url): extractor = ie.ie_key() break else: From f1f5b47255a44e791a84b769c523127e0f047578 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 01:10:12 +0700 Subject: [PATCH 085/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4872cd9fc..b0fbde43d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version <unreleased> + +Core +* [utils] Improve JSON-LD regular expression (#18058) +* [YoutubeDL] Fallback to ie_key of matching extractor while making + download archive id when no explicit ie_key is provided (#19022) + +Extractors ++ [malltv] Add support for mall.tv (#18058, #17856) ++ [spankbang:playlist] Add support for playlists (#19145) +* [spankbang] Extend URL regular expression +* [trutv] Fix extraction (#17336) +* [toutv] Fix authentication (#16398, #18700) +* [pornhub] Fix tags and categories extraction (#13720, #19135) +* [pornhd] Fix formats extraction ++ [pornhd] Extract like count (#19123, #19125) +* [radiocanada] Switch to the new media requests (#19115) ++ [teachable] Add support for courses.workitdaily.com (#18871) +- [vporn] Remove extractor (#16276) ++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086) ++ [drtuber] Extract duration (#19078) +* [soundcloud] Fix paged playlists extraction, add support for albums and update client id +* [soundcloud] Update client id +* [drtv] Improve preference (#19079) ++ [openload] Add support for openload.pw and oload.pw (#18930) ++ [openload] Add support for oload.info (#19073) +* [crackle] Authorize media detail request (#16931) + + version 2019.01.30.1 Core From 04eacf54530e6a17129d3f1b90f759f9935f2b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 01:12:51 +0700 Subject: [PATCH 086/785] release 2019.02.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 423a08e4d..7128d998f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.30.1 +[debug] youtube-dl version 2019.02.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b0fbde43d..398528f76 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.02.08 Core * [utils] Improve JSON-LD regular expression (#18058) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2918520c3..32fe6b647 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -476,6 +476,7 @@ - **mailru:music**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru - **MakerTV** + - **MallTV** - **mangomolo:live** - **mangomolo:video** - **ManyVids** @@ -827,6 +828,7 @@ - **southpark.nl** - **southparkstudios.dk** - **SpankBang** + - **SpankBangPlaylist** - **Spankwire** - **Spiegel** - **Spiegel:Article**: Articles on spiegel.de @@ -1057,7 +1059,6 @@ - **Voot** - **VoxMedia** - **VoxMediaVolume** - - **Vporn** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be diff --git a/youtube_dl/version.py b/youtube_dl/version.py index be3bbdd73..4dc5a611e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.30.1' +__version__ = '2019.02.08' From 91effe22a091035bc5abace2fcf562a0db89090f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 8 Feb 2019 07:21:31 +0100 Subject: [PATCH 087/785] [linkedin:learning] extract chapter_number and chapter_id(closes #19162) --- youtube_dl/extractor/linkedin.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 259fc4c5e..5a86b0064 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -34,12 +34,15 @@ class LinkedInLearningBaseIE(InfoExtractor): 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, }, query=query)['elements'][0] - def _get_video_id(self, urn, course_slug, video_slug): + def _get_urn_id(self, video_data): + urn = video_data.get('urn') if urn: mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) if mobj: return mobj.group(1) - return '%s/%s' % (course_slug, video_slug) + + def _get_video_id(self, video_data, course_slug, video_slug): + return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) def _real_initialize(self): email, password = self._get_login_info() @@ -123,7 +126,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) return { - 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), + 'id': self._get_video_id(video_data, course_slug, video_slug), 'title': title, 'formats': formats, 'thumbnail': video_data.get('defaultThumbnail'), @@ -154,18 +157,21 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE): course_data = self._call_api(course_slug, 'chapters,description,title') entries = [] - for chapter in course_data.get('chapters', []): + for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1): chapter_title = chapter.get('title') + chapter_id = self._get_urn_id(chapter) for video in chapter.get('videos', []): video_slug = video.get('slug') if not video_slug: continue entries.append({ '_type': 'url_transparent', - 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), + 'id': self._get_video_id(video, course_slug, video_slug), 'title': video.get('title'), 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), 'chapter': chapter_title, + 'chapter_number': chapter_number, + 'chapter_id': chapter_id, 'ie_key': LinkedInLearningIE.ie_key(), }) From e9dee7f1b26e1c23b011a5ad5433d4debc6b48ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Feb 2019 23:49:37 +0700 Subject: [PATCH 088/785] [trunews] Add extractor (closes #19153) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/trunews.py | 75 ++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/trunews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f212b5116..3e1b63b4b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1171,6 +1171,7 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trunews import TruNewsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/trunews.py b/youtube_dl/extractor/trunews.py new file mode 100644 index 000000000..b0c7caabf --- /dev/null +++ b/youtube_dl/extractor/trunews.py @@ -0,0 +1,75 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + dict_get, + float_or_none, + int_or_none, + unified_timestamp, + update_url_query, + url_or_none, +) + + +class TruNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', + 'md5': 'a19c024c3906ff954fac9b96ce66bb08', + 'info_dict': { + 'id': '5c5a21e65d3c196e1c0020cc', + 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', + 'ext': 'mp4', + 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?", + 'description': 'md5:c583b72147cc92cf21f56a31aff7a670', + 'duration': 3685, + 'timestamp': 1549411440, + 'upload_date': '20190206', + }, + 'add_ie': ['Zype'], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + video = self._download_json( + 'https://api.zype.com/videos', display_id, query={ + 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H', + 'per_page': 1, + 'active': 'true', + 'friendly_title': display_id, + })['response'][0] + + zype_id = video['_id'] + + thumbnails = [] + thumbnails_list = video.get('thumbnails') + if isinstance(thumbnails_list, list): + for thumbnail in thumbnails_list: + if not isinstance(thumbnail, dict): + continue + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + '_type': 'url_transparent', + 'url': update_url_query( + 'https://player.zype.com/embed/%s.js' % zype_id, + {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}), + 'ie_key': 'Zype', + 'id': zype_id, + 'display_id': display_id, + 'title': video.get('title'), + 'description': dict_get(video, ('description', 'ott_description', 'short_description')), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('published_at')), + 'average_rating': float_or_none(video.get('rating')), + 'view_count': int_or_none(video.get('request_count')), + 'thumbnails': thumbnails, + } From f516f44094d2244a805d8d0ac4d809fc6cd16782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Feb 2019 23:44:08 +0700 Subject: [PATCH 089/785] [soundcloud] Extract more metadata --- youtube_dl/extractor/soundcloud.py | 65 +++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5536e7851..15da3496e 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -16,7 +16,8 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, - unified_strdate, + try_get, + unified_timestamp, update_url_query, url_or_none, ) @@ -51,12 +52,17 @@ class SoundcloudIE(InfoExtractor): 'info_dict': { 'id': '62986583', 'ext': 'mp3', - 'upload_date': '20121011', + 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music', - 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', + 'timestamp': 1349920598, + 'upload_date': '20121011', 'duration': 143, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, } }, # not streamable song @@ -68,9 +74,14 @@ class SoundcloudIE(InfoExtractor): 'title': 'Goldrushed', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', + 'timestamp': 1337635207, 'upload_date': '20120521', - 'duration': 227, + 'duration': 30, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, 'params': { # rtmp @@ -85,11 +96,16 @@ class SoundcloudIE(InfoExtractor): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # private link (alt format) @@ -100,11 +116,16 @@ class SoundcloudIE(InfoExtractor): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # downloadable song @@ -117,9 +138,14 @@ class SoundcloudIE(InfoExtractor): 'title': 'Bus Brakes', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'uploader': 'oddsamples', + 'timestamp': 1389232924, 'upload_date': '20140109', 'duration': 17, 'license': 'cc-by-sa', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # private link, downloadable format @@ -132,9 +158,14 @@ class SoundcloudIE(InfoExtractor): 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'uploader': 'Ori Uplift Music', + 'timestamp': 1504206263, 'upload_date': '20170831', 'duration': 7449, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # no album art, use avatar pic for thumbnail @@ -147,10 +178,15 @@ class SoundcloudIE(InfoExtractor): 'title': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'uploader': 'garyvee', + 'timestamp': 1488152409, 'upload_date': '20170226', 'duration': 207, 'thumbnail': r're:https?://.*\.jpg', 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, 'params': { 'skip_download': True, @@ -176,22 +212,33 @@ class SoundcloudIE(InfoExtractor): def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): track_id = compat_str(info['id']) + title = info['title'] name = full_title or track_id if quiet: self.report_extraction(name) thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') if isinstance(thumbnail, compat_str): thumbnail = thumbnail.replace('-large', '-t500x500') + username = try_get(info, lambda x: x['user']['username'], compat_str) + + def extract_count(key): + return int_or_none(info.get('%s_count' % key)) + result = { 'id': track_id, - 'uploader': info.get('user', {}).get('username'), - 'upload_date': unified_strdate(info.get('created_at')), - 'title': info['title'], + 'uploader': username, + 'timestamp': unified_timestamp(info.get('created_at')), + 'title': title, 'description': info.get('description'), 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), 'license': info.get('license'), + 'view_count': extract_count('playback'), + 'like_count': extract_count('favoritings'), + 'comment_count': extract_count('comment'), + 'repost_count': extract_count('reposts'), + 'genre': info.get('genre'), } formats = [] query = {'client_id': self._CLIENT_ID} From 4c0e0dc9dc13d53a334f75f7d7b9073f79a2dfc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Feb 2019 00:49:51 +0700 Subject: [PATCH 090/785] [rutube:embed] Fix extraction and add support private videos (closes #19163) --- youtube_dl/extractor/rutube.py | 115 +++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 10ac8ed1f..8f54d5675 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -21,7 +21,17 @@ from ..utils import ( class RutubeBaseIE(InfoExtractor): - def _extract_video(self, video, video_id=None, require_title=True): + def _download_api_info(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/video/%s/' % video_id, + video_id, 'Downloading video JSON', + 'Unable to download video JSON', query=query) + + @staticmethod + def _extract_info(video, video_id=None, require_title=True): title = video['title'] if require_title else video.get('title') age_limit = video.get('is_adult') @@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor): category = try_get(video, lambda x: x['category']['name']) return { - 'id': video.get('id') or video_id, + 'id': video.get('id') or video_id if video_id else video['id'], 'title': title, 'description': video.get('description'), 'thumbnail': video.get('thumbnail_url'), @@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor): 'is_live': bool_or_none(video.get('is_livestream')), } + def _download_and_extract_info(self, video_id, query=None): + return self._extract_info( + self._download_api_info(video_id, query=query), video_id) + + def _download_api_options(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/play/options/%s/' % video_id, + video_id, 'Downloading options JSON', + 'Unable to download options JSON', + headers=self.geo_verification_headers(), query=query) + + def _extract_formats(self, options, video_id): + formats = [] + for format_id, format_url in options['video_balancer'].items(): + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + return formats + + def _download_and_extract_formats(self, video_id, query=None): + return self._extract_formats( + self._download_api_options(video_id, query=query), video_id) + class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' @@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE): _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': '79938ade01294ef7e27574890d0d3769', + 'md5': '1d24f180fac7a02f3900712e5a5764d6', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', - 'duration': 80, + 'duration': 81, 'uploader': 'NTDRussian', 'uploader_id': '29790', 'timestamp': 1381943602, @@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - - video = self._download_json( - 'http://rutube.ru/api/video/%s/?format=json' % video_id, - video_id, 'Downloading video JSON') - - info = self._extract_video(video, video_id) - - options = self._download_json( - 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON', - headers=self.geo_verification_headers()) - - formats = [] - for format_id, format_url in options['video_balancer'].items(): - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - format_url, video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - info['formats'] = formats + info = self._download_and_extract_info(video_id) + info['formats'] = self._download_and_extract_formats(video_id) return info -class RutubeEmbedIE(InfoExtractor): +class RutubeEmbedIE(RutubeBaseIE): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' @@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor): 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', - 'ext': 'flv', + 'ext': 'mp4', 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', @@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor): }, { 'url': 'http://rutube.ru/play/embed/8083783', 'only_matching': True, + }, { + # private video + 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', + 'only_matching': True, }] def _real_extract(self, url): embed_id = self._match_id(url) - webpage = self._download_webpage(url, embed_id) - - canonical_url = self._html_search_regex( - r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, - 'Canonical URL') - return self.url_result(canonical_url, RutubeIE.ie_key()) + # Query may contain private videos token and should be passed to API + # requests (see #19163) + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + options = self._download_api_options(embed_id, query) + video_id = options['effective_video'] + formats = self._extract_formats(options, video_id) + info = self._download_and_extract_info(video_id, query) + info.update({ + 'extractor_key': 'Rutube', + 'formats': formats, + }) + return info class RutubePlaylistBaseIE(RutubeBaseIE): @@ -181,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE): video_url = url_or_none(result.get('video_url')) if not video_url: continue - entry = self._extract_video(result, require_title=False) + entry = self._extract_info(result, require_title=False) entry.update({ '_type': 'url', 'url': video_url, From d777f3e81c5f9f6c7be39586703f1f076a2025a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Feb 2019 04:39:23 +0700 Subject: [PATCH 091/785] [tvplayhome] Fix episode metadata extraction (closes #19190) --- youtube_dl/extractor/tvplay.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 8f1ff3b76..7c07b26bc 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -537,8 +537,9 @@ class TVPlayHomeIE(InfoExtractor): r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number', default=None)) episode = self._search_regex( - r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode', - default=None, group='value') + (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, + 'episode', default=None, group='value') episode_number = int_or_none(self._search_regex( r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number', default=None)) From 7d8b89163c43dfec27dab5250183e52fce838389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Feb 2019 04:41:28 +0700 Subject: [PATCH 092/785] [tvplayhome] Fix video id extraction (closes #19190) --- youtube_dl/extractor/tvplay.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 7c07b26bc..d82d48f94 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -493,10 +493,9 @@ class TVPlayHomeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_id = self._search_regex( - r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id', - default=None) + r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id') - if video_id: + if len(video_id) < 8: return self.url_result( 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id) From 985637cbbfc1a79091eb2f5ca4afec84f6616c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 12 Feb 2019 00:13:50 +0700 Subject: [PATCH 093/785] [twitch] Add new source format detection approach (closes #19193) --- youtube_dl/extractor/twitch.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 401615683..8c87f6dd3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -136,7 +136,12 @@ class TwitchBaseIE(InfoExtractor): source = next(f for f in formats if f['format_id'] == 'Source') source['preference'] = 10 except StopIteration: - pass # No Source stream present + for f in formats: + if '/chunked/' in f['url']: + f.update({ + 'source_preference': 10, + 'format_note': 'Source', + }) self._sort_formats(formats) From 6f5c1807f43b9dfd17fdc6932ae7ecb6c77fb1a0 Mon Sep 17 00:00:00 2001 From: bitraid <bitraid@protonmail.ch> Date: Tue, 12 Feb 2019 19:02:29 +0200 Subject: [PATCH 094/785] [imgur] Use video id as title fallback (closes #18590) --- youtube_dl/extractor/imgur.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 0eb54db3f..a5ba03efa 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -27,6 +27,10 @@ class ImgurIE(InfoExtractor): }, { 'url': 'https://i.imgur.com/crGpqCV.mp4', 'only_matching': True, + }, { + # no title + 'url': 'https://i.imgur.com/jxBXAMC.gifv', + 'only_matching': True, }] def _real_extract(self, url): @@ -87,7 +91,7 @@ class ImgurIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': self._og_search_title(webpage), + 'title': self._og_search_title(webpage, default=video_id), } From 7bee705d8f110f09d8e72b1c863ff197ccc1d4f1 Mon Sep 17 00:00:00 2001 From: yonaikerlol <lawlietrs7@gmail.com> Date: Thu, 14 Feb 2019 11:28:16 -0400 Subject: [PATCH 095/785] [openload] Add support for oload.live --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a2ae25272..c1dcbb7eb 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) ) )/ (?:f|embed)/ @@ -346,6 +346,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.pw/f/WyKgK8s94N0', 'only_matching': True, + }, { + 'url': 'https://oload.live/f/-Z58UZ-GR4M', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 794c1b6e02591b04da931fa59745bc47bfae7492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 14 Feb 2019 23:40:46 +0700 Subject: [PATCH 096/785] [vshare] Pass Referer to download request (closes #19205, closes #19221) --- youtube_dl/extractor/vshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index e4ec77889..c631ac1fa 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -48,7 +48,7 @@ class VShareIE(InfoExtractor): webpage = self._download_webpage( 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, - video_id) + video_id, headers={'Referer': url}) title = self._html_search_regex( r'<title>([^<]+)', webpage, 'title') From 2b2da3ba10cc325d00b665aae87f0fa8508bccdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Feb 2019 23:56:29 +0700 Subject: [PATCH 097/785] [rai] Relax _VALID_URL (closes #19232) --- youtube_dl/extractor/rai.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 548a6553b..149153b8f 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -288,7 +288,7 @@ class RaiPlayPlaylistIE(InfoExtractor): class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ # var uniquename = "ContentItem-..." # data-id="ContentItem-..." @@ -375,6 +375,9 @@ class RaiIE(RaiBaseIE): # Direct MMS URL 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'only_matching': True, + }, { + 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', + 'only_matching': True, }] def _extract_from_content_id(self, content_id, url): From ba2e3730d125eab952eded3bb7749d479a2262d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Feb 2019 22:45:53 +0700 Subject: [PATCH 098/785] [noovo] Fix extraction (closes #19230) --- youtube_dl/extractor/noovo.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/noovo.py b/youtube_dl/extractor/noovo.py index 974de3c3e..b40770d07 100644 --- a/youtube_dl/extractor/noovo.py +++ b/youtube_dl/extractor/noovo.py @@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - bc_url = BrightcoveNewIE._extract_url(self, webpage) + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') data = self._parse_json( self._search_regex( @@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor): return { '_type': 'url_transparent', 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'id': brightcove_id, 'title': title, 'description': description, 'series': series, From ae65c93a26f2b3cf806477a3ee891aa461b5c6b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 00:58:13 +0700 Subject: [PATCH 099/785] [udemy] Update User-Agent and detect captcha (closes #14713, closes #15839, closes #18126) --- youtube_dl/extractor/udemy.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 105826e9b..89a7f6ade 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -123,10 +123,22 @@ class UdemyIE(InfoExtractor): def _download_webpage_handle(self, *args, **kwargs): headers = kwargs.get('headers', {}).copy() - headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers - return super(UdemyIE, self)._download_webpage_handle( + ret = super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) + if not ret: + return ret + webpage, _ = ret + if any(p in webpage for p in ( + '>Please verify you are a human', + 'Access to this page has been denied because we believe you are using automation tools to browse the website', + '"_pxCaptcha"')): + raise ExtractorError( + 'Udemy asks you to solve a CAPTCHA. Login with browser, ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies.', expected=True) + return ret def _download_json(self, url_or_request, *args, **kwargs): headers = { From d7d513891b7e63337218c5cb0bf743c8f7044381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 01:05:01 +0700 Subject: [PATCH 100/785] [udemy] Extend _VALID_URLs (closes #14330, closes #15883) --- youtube_dl/extractor/udemy.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89a7f6ade..ae8de9897 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -29,7 +29,7 @@ class UdemyIE(InfoExtractor): IE_NAME = 'udemy' _VALID_URL = r'''(?x) https?:// - www\.udemy\.com/ + (?:[^/]+\.)?udemy\.com/ (?: [^#]+\#/lecture/| lecture/view/?\?lectureId=| @@ -64,6 +64,9 @@ class UdemyIE(InfoExtractor): # only outputs rendition 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0', 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757', + 'only_matching': True, }] def _extract_course_info(self, webpage, video_id): @@ -415,8 +418,14 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P[^/?#&]+)' - _TESTS = [] + _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.udemy.com/java-tutorial/', + 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/', + 'only_matching': True, + }] @classmethod def suitable(cls, url): From c9a0ea6e51eff28b9a383a47215870fd5875fc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 05:00:16 +0700 Subject: [PATCH 101/785] [bilibili] Update keys (closes #19233) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4d6b051fe..3746671d3 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor): }] }] - _APP_KEY = '84956560bc028eb7' - _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' def _report_error(self, result): if 'message' in result: From 659e93fcf5c0480ac461cda412335cecf6a5595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 07:12:10 +0700 Subject: [PATCH 102/785] [linuxacademy] Add extractor (closes #12207) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/linuxacademy.py | 174 +++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 youtube_dl/extractor/linuxacademy.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3e1b63b4b..c70452dcd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -593,6 +593,7 @@ from .linkedin import ( LinkedInLearningIE, LinkedInLearningCourseIE, ) +from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE from .liveleak import ( LiveLeakIE, diff --git a/youtube_dl/extractor/linuxacademy.py b/youtube_dl/extractor/linuxacademy.py new file mode 100644 index 000000000..a78c6556e --- /dev/null +++ b/youtube_dl/extractor/linuxacademy.py @@ -0,0 +1,174 @@ +from __future__ import unicode_literals + +import json +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_HTTPError, + compat_str, +) +from ..utils import ( + ExtractorError, + orderedSet, + unescapeHTML, + urlencode_postdata, + urljoin, +) + + +class LinuxAcademyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?linuxacademy\.com/cp/ + (?: + courses/lesson/course/(?P\d+)/lesson/(?P\d+)| + modules/view/id/(?P\d+) + ) + ''' + _TESTS = [{ + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', + 'info_dict': { + 'id': '1498-2', + 'ext': 'mp4', + 'title': "Introduction to the Practitioner's Brief", + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Requires Linux Academy account credentials', + }, { + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', + 'only_matching': True, + }, { + 'url': 'https://linuxacademy.com/cp/modules/view/id/154', + 'info_dict': { + 'id': '154', + 'title': 'AWS Certified Cloud Practitioner', + 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', + }, + 'playlist_count': 41, + 'skip': 'Requires Linux Academy account credentials', + }] + + _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' + _ORIGIN_URL = 'https://linuxacademy.com' + _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' + _NETRC_MACHINE = 'linuxacademy' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + def random_string(): + return ''.join([ + random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') + for _ in range(32)]) + + webpage, urlh = self._download_webpage_handle( + self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ + 'client_id': self._CLIENT_ID, + 'response_type': 'token id_token', + 'redirect_uri': self._ORIGIN_URL, + 'scope': 'openid email user_impersonation profile', + 'audience': self._ORIGIN_URL, + 'state': random_string(), + 'nonce': random_string(), + }) + + login_data = self._parse_json( + self._search_regex( + r'atob\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'login info', group='value'), None, + transform_source=lambda x: compat_b64decode(x).decode('utf-8') + )['extraParams'] + + login_data.update({ + 'client_id': self._CLIENT_ID, + 'redirect_uri': self._ORIGIN_URL, + 'tenant': 'lacausers', + 'connection': 'Username-Password-Authentication', + 'username': username, + 'password': password, + 'sso': 'true', + }) + + login_state_url = compat_str(urlh.geturl()) + + try: + login_page = self._download_webpage( + 'https://login.linuxacademy.com/usernamepassword/login', None, + 'Downloading login page', data=json.dumps(login_data).encode(), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read(), None) + message = error.get('description') or error['code'] + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) + raise + + callback_page, urlh = self._download_webpage_handle( + 'https://login.linuxacademy.com/login/callback', None, + 'Downloading callback page', + data=urlencode_postdata(self._hidden_inputs(login_page)), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + + access_token = self._search_regex( + r'access_token=([^=&]+)', compat_str(urlh.geturl()), + 'access token') + + self._download_webpage( + 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' + % access_token, None, 'Downloading token validation page') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') + item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) + + webpage = self._download_webpage(url, item_id) + + # course path + if course_id: + entries = [ + self.url_result( + urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) + for lesson_url in orderedSet(re.findall( + r']+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', + webpage))] + title = unescapeHTML(self._html_search_regex( + (r'class=["\']course-title["\'][^>]*>(?P[^<]+)', + r'var\s+title\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'title', default=None, group='value')) + description = unescapeHTML(self._html_search_regex( + r'var\s+description\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'description', default=None, group='value')) + return self.playlist_result(entries, course_id, title, description) + + # single video path + info = self._extract_jwplayer_data( + webpage, item_id, require_title=False, m3u8_id='hls',) + title = self._search_regex( + (r'>Lecture\s*:\s*(?P[^<]+)', + r'lessonName\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage, + 'title', group='value') + info.update({ + 'id': item_id, + 'title': title, + }) + return info From 3c9647372e78134777d201e157a5ef42345c9da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 13:38:21 +0700 Subject: [PATCH 103/785] [tvp] Fix description extraction, make thumbnail optional and fix tests --- youtube_dl/extractor/tvp.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 3954f0b93..f9bf600b0 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -19,12 +19,12 @@ class TVPIE(InfoExtractor): _TESTS = [{ 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', - 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', 'info_dict': { 'id': '194536', 'ext': 'mp4', - 'title': 'Czas honoru, I seria – odc. 13', - 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d', + 'title': 'Czas honoru, odc. 13 – Władek', + 'description': 'md5:437f48b93558370b031740546b696e24', }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', @@ -45,6 +45,7 @@ class TVPIE(InfoExtractor): 'title': 'Wiadomości, 28.09.2017, 19:30', 'description': 'Wydanie główne codziennego serwisu informacyjnego.' }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 'only_matching': True, @@ -75,8 +76,10 @@ class TVPIE(InfoExtractor): return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -87,6 +90,14 @@ class TVPEmbedIE(InfoExtractor): _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P\d+)' _TESTS = [{ + 'url': 'tvp:194536', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', + 'info_dict': { + 'id': '194536', + 'ext': 'mp4', + 'title': 'Czas honoru, odc. 13 – Władek', + }, + }, { 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { From 34568dc2967d227630ed9d7150deaa62a689b937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 13:39:00 +0700 Subject: [PATCH 104/785] [tvp] Detect unavailable videos --- youtube_dl/extractor/tvp.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index f9bf600b0..536b580fc 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -98,6 +98,7 @@ class TVPEmbedIE(InfoExtractor): 'title': 'Czas honoru, odc. 13 – Władek', }, }, { + # not available 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { @@ -105,6 +106,7 @@ class TVPEmbedIE(InfoExtractor): 'ext': 'mp4', 'title': 'Panorama, 07.12.2015, 15:40', }, + 'skip': 'Transmisja została zakończona lub materiał niedostępny', }, { 'url': 'tvp:22670268', 'only_matching': True, @@ -116,10 +118,13 @@ class TVPEmbedIE(InfoExtractor): webpage = self._download_webpage( 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) - error_massage = get_element_by_attribute('class', 'msg error', webpage) - if error_massage: + error = self._html_search_regex( + r'(?s)]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)

', + webpage, 'error', default=None) or clean_html( + get_element_by_attribute('class', 'msg error', webpage)) + if error: raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error_massage)), expected=True) + self.IE_NAME, clean_html(error)), expected=True) title = self._search_regex( r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P.+?)\1', From d93083789bf9c318b18d52ac132e9495345b9ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Feb 2019 14:09:30 +0700 Subject: [PATCH 105/785] [tvp:series] Fix extraction --- youtube_dl/extractor/tvp.py | 67 ++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 536b580fc..05669a366 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor from ..utils import ( - determine_ext, clean_html, - get_element_by_attribute, + determine_ext, ExtractorError, + get_element_by_attribute, + orderedSet, ) @@ -198,46 +200,35 @@ class TVPEmbedIE(InfoExtractor): class TVPSeriesIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)/video' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem', + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { - 'title': 'Ogniem i mieczem', - 'id': '4278026', + 'id': '38678312', }, - 'playlist_count': 4, - }, { - 'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat', - 'info_dict': { - 'title': 'Boso przez świat', - 'id': '9329207', - }, - 'playlist_count': 86, + 'playlist_count': 115, }] + def _entries(self, url, display_id): + for page_num in itertools.count(1): + page = self._download_webpage( + url, display_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + + video_ids = orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, + page)) + + if not video_ids: + break + + for video_id in video_ids: + yield self.url_result( + 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), + video_id=video_id) + def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id, tries=5) - - title = self._html_search_regex( - r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series') - playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id') - playlist = self._download_webpage( - 'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend' - 'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5, - note='Downloading playlist') - - videos_paths = re.findall( - '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) - entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) - for v_path in videos_paths] - - return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': display_id, - 'title': title, - 'entries': entries, - } + mobj = re.match(self._VALID_URL, url) + display_id, playlist_id = mobj.group('display_id', 'id') + return self.playlist_result(self._entries(url, display_id), playlist_id) From 388cfbd3d8915ebb99714ac8e7ce4151edf96d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Feb 2019 14:27:00 +0700 Subject: [PATCH 106/785] [tvp:website] Improve support --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/tvp.py | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c70452dcd..923dfe7f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1218,7 +1218,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, - TVPSeriesIE, + TVPWebsiteIE, ) from .tvplay import ( TVPlayIE, diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 05669a366..accff75b5 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -198,19 +198,36 @@ class TVPEmbedIE(InfoExtractor): } -class TVPSeriesIE(InfoExtractor): +class TVPWebsiteIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)/video' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' _TESTS = [{ + # series 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { 'id': '38678312', }, 'playlist_count': 115, + }, { + # film + 'url': 'https://vod.tvp.pl/website/gloria,35139666', + 'info_dict': { + 'id': '36637049', + 'ext': 'mp4', + 'title': 'Gloria, Gloria', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['TVPEmbed'], + }, { + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', + 'only_matching': True, }] - def _entries(self, url, display_id): + def _entries(self, display_id, playlist_id): + url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) for page_num in itertools.count(1): page = self._download_webpage( url, display_id, 'Downloading page %d' % page_num, @@ -231,4 +248,5 @@ class TVPSeriesIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id, playlist_id = mobj.group('display_id', 'id') - return self.playlist_result(self._entries(url, display_id), playlist_id) + return self.playlist_result( + self._entries(display_id, playlist_id), playlist_id) From c76fc5b22a70f9ac24fe7e34c37aa8ef82e85c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 18 Feb 2019 02:10:06 +0700 Subject: [PATCH 107/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 398528f76..adbdf166d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version <unreleased> + +Extractors +* [tvp:website] Fix and improve extraction ++ [tvp] Detect unavailable videos +* [tvp] Fix description extraction and make thumbnail optional ++ [linuxacademy] Add support for linuxacademy.com (#12207) +* [bilibili] Update keys (#19233) +* [udemy] Extend URL regular expressions (#14330, #15883) +* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) +* [noovo] Fix extraction (#19230) +* [rai] Relax URL regular expression (#19232) ++ [vshare] Pass Referer to download request (#19205, #19221) ++ [openload] Add support for oload.live (#19222) +* [imgur] Use video id as title fallback (#18590) ++ [twitch] Add new source format detection approach (#19193) +* [tvplayhome] Fix video id extraction (#19190) +* [tvplayhome] Fix episode metadata extraction (#19190) +* [rutube:embed] Fix extraction (#19163) ++ [rutube:embed] Add support private videos (#19163) ++ [soundcloud] Extract more metadata ++ [trunews] Add support for trunews.com (#19153) ++ [linkedin:learning] Extract chapter_number and chapter_id (#19162) + + version 2019.02.08 Core From 77a842c8926625fe791ed36613f183bb195394cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 18 Feb 2019 02:11:11 +0700 Subject: [PATCH 108/785] release 2019.02.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7128d998f..ff626883d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.02.08 +[debug] youtube-dl version 2019.02.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index adbdf166d..f9dd7928f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.02.18 Extractors * [tvp:website] Fix and improve extraction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 32fe6b647..d8a8d7ede 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -458,6 +458,7 @@ - **LineTV** - **linkedin:learning** - **linkedin:learning:course** + - **LinuxAcademy** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -915,6 +916,7 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** + - **TruNews** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4dc5a611e..ea1d5a4a5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.08' +__version__ = '2019.02.18' From caf48f557a8f4f904c88346bcfc462069b8745bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 21 Feb 2019 05:59:07 +0700 Subject: [PATCH 109/785] [metacafe] Fix family filter bypass (closes #19287) --- youtube_dl/extractor/metacafe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 28f59f63c..9e92416d1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_urllib_parse, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor): headers = { # Disable family filter - 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False}) + 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False})) } # AnyClip videos require the flashversion cookie so that we get the link From 37b239b3b66ea9e2a71bae41e9da6dba8ee5554c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Feb 2019 00:43:29 +0700 Subject: [PATCH 110/785] [downloader/external] Fix infinite retries for curl (closes #19303) --- youtube_dl/downloader/external.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 958d00aac..0b88bfd94 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -121,7 +121,11 @@ class CurlFD(ExternalFD): cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') cmd += self._option('--limit-rate', 'ratelimit') - cmd += self._option('--retry', 'retries') + retry = self._option('--retry', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '2147483647' + cmd += retry cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') From 8c80603f1adea843d96c0598b902106c7a3efb7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Feb 2019 00:58:56 +0700 Subject: [PATCH 111/785] [downloader/external] Add support for rate limit and retries for wget --- youtube_dl/downloader/external.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 0b88bfd94..22e6093b3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -164,6 +164,12 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--tries', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '0' + cmd += retry cmd += self._option('--bind-address', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') From f0228f56fb2441510aa966ba9298e388b209cde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 Feb 2019 21:01:25 +0700 Subject: [PATCH 112/785] [bbccouk] Make subtitles non fatal (#19651) --- youtube_dl/extractor/bbc.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index eac9a5a46..13340ec64 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import itertools +import re +import xml from .common import InfoExtractor from ..utils import ( @@ -17,6 +18,7 @@ from ..utils import ( parse_iso8601, try_get, unescapeHTML, + url_or_none, urlencode_postdata, urljoin, ) @@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor): def _get_subtitles(self, media, programme_id): subtitles = {} for connection in self._extract_connections(media): - captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') + cc_url = url_or_none(connection.get('href')) + if not cc_url: + continue + captions = self._download_xml( + cc_url, programme_id, 'Downloading captions', fatal=False) + if not isinstance(captions, xml.etree.ElementTree.Element): + continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ { From 55b8588f0e4dd9597b6da5c46d05b9dd1e9f5960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 Feb 2019 23:19:15 +0700 Subject: [PATCH 113/785] [servus] Fix extraction (closes #19297) --- youtube_dl/extractor/servus.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py index 264e1dd8b..e579d42cf 100644 --- a/youtube_dl/extractor/servus.py +++ b/youtube_dl/extractor/servus.py @@ -1,31 +1,44 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class ServusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)' + _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)' _TESTS = [{ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/', - 'md5': '046dee641cda1c4cabe13baef3be2c1c', + 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', 'info_dict': { 'id': 'AA-1T6VBU5PW1W12', 'ext': 'mp4', - 'title': 'Die Grünen aus Volkssicht', - 'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Die Grünen aus Sicht des Volkes', + 'description': 'md5:1247204d85783afe3682644398ff2ec4', + 'thumbnail': r're:^https?://.*\.jpg', } }, { 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/', 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/', + 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id = self._match_id(url).upper() webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + title = self._search_regex( + (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + title = re.sub(r'\s*-\s*Servus TV\s*$', '', title) description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) From db1c3a9d3f202cc6f3fd83a2a918869e7c0d147f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 Feb 2019 03:41:15 +0700 Subject: [PATCH 114/785] [periscope] Extract width and height (closes #20015) --- youtube_dl/extractor/periscope.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 8afe541ec..b337a56c0 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + int_or_none, parse_iso8601, unescapeHTML, ) @@ -75,6 +76,14 @@ class PeriscopeIE(PeriscopeBaseIE): 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + width = int_or_none(broadcast.get('width')) + height = int_or_none(broadcast.get('height')) + + def add_width_and_height(f): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + video_urls = set() formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): @@ -83,16 +92,21 @@ class PeriscopeIE(PeriscopeBaseIE): continue video_urls.add(video_url) if format_id != 'rtmp': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( video_url, token, 'mp4', entry_protocol='m3u8_native' if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=False)) + m3u8_id=format_id, fatal=False) + if len(m3u8_formats) == 1: + add_width_and_height(m3u8_formats[0]) + formats.extend(m3u8_formats) continue - formats.append({ + rtmp_format = { 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', - }) + } + add_width_and_height(rtmp_format) + formats.append(rtmp_format) self._sort_formats(formats) return { From 9d9a8676dc02101069cf5fa9862500d39352538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Feb 2019 23:26:52 +0700 Subject: [PATCH 115/785] [francetv:site] Extend video id regex (closes #20029, closes #20071) --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 2ffe83a78..3c4ef08a8 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: From ff60ec8f029d12c119855ec82d7ce9ecda388651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 00:47:18 +0700 Subject: [PATCH 116/785] [npo] Fix extraction (#20084) --- youtube_dl/extractor/npo.py | 120 +++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 5a427c396..857845d35 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -12,11 +12,16 @@ from ..utils import ( ExtractorError, fix_xml_ampersands, int_or_none, + merge_dicts, orderedSet, parse_duration, qualities, + str_or_none, strip_jsonp, unified_strdate, + unified_timestamp, + url_or_none, + urlencode_postdata, ) @@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - return self._get_info(video_id) + try: + return self._get_info(url, video_id) + except ExtractorError: + return self._get_old_info(video_id) - def _get_info(self, video_id): + def _get_info(self, url, video_id): + token = self._download_json( + 'https://www.npostart.nl/api/token', video_id, + 'Downloading token', headers={ + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + })['token'] + + player = self._download_json( + 'https://www.npostart.nl/player/%s' % video_id, video_id, + 'Downloading player JSON', data=urlencode_postdata({ + 'autoplay': 0, + 'share': 1, + 'pageUrl': url, + 'hasAdConsent': 0, + '_token': token, + })) + + player_token = player['token'] + + format_urls = set() + formats = [] + for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'): + streams = self._download_json( + 'https://start-player.npo.nl/video/%s/streams' % video_id, + video_id, 'Downloading %s profile JSON' % profile, fatal=False, + query={ + 'profile': profile, + 'quality': 'npo', + 'tokenId': player_token, + 'streamType': 'broadcast', + }) + if not streams: + continue + stream = streams.get('stream') + if not isinstance(stream, dict): + continue + stream_url = url_or_none(stream.get('src')) + if not stream_url or stream_url in format_urls: + continue + format_urls.add(stream_url) + if stream.get('protection') is not None: + continue + stream_type = stream.get('type') + stream_ext = determine_ext(stream_url) + if stream_type == 'application/dash+xml' or stream_ext == 'mpd': + formats.extend(self._extract_mpd_formats( + stream_url, video_id, mpd_id='dash', fatal=False)) + elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + elif '.ism/Manifest' in stream_url: + formats.extend(self._extract_ism_formats( + stream_url, video_id, ism_id='mss', fatal=False)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + info = { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + embed_url = url_or_none(player.get('embedUrl')) + if embed_url: + webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed page', fatal=False) + if webpage: + video = self._parse_json( + self._search_regex( + r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video', + default='{}'), video_id) + if video: + title = video.get('episodeTitle') + subtitles = {} + subtitles_list = video.get('subtitles') + if isinstance(subtitles_list, list): + for cc in subtitles_list: + cc_url = url_or_none(cc.get('src')) + if not cc_url: + continue + lang = str_or_none(cc.get('language')) or 'nl' + subtitles.setdefault(lang, []).append({ + 'url': cc_url, + }) + return merge_dicts({ + 'title': title, + 'description': video.get('description'), + 'thumbnail': url_or_none( + video.get('still_image_url') or video.get('orig_image_url')), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('broadcastDate')), + 'creator': video.get('channel'), + 'series': video.get('title'), + 'episode': title, + 'episode_number': int_or_none(video.get('episodeNumber')), + 'subtitles': subtitles, + }, info) + + return info + + def _get_old_info(self, video_id): metadata = self._download_json( 'http://e.omroep.nl/metadata/%s' % video_id, video_id, @@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE): # JSON else: video_url = stream_info.get('url') - if not video_url or video_url in urls: + if not video_url or 'vodnotavailable.' in video_url or video_url in urls: continue urls.add(video_url) if determine_ext(video_url) == 'm3u8': From 333f617b1207cb53efaa5e2f7af174cfa87deee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 01:02:36 +0700 Subject: [PATCH 117/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index f9dd7928f..f717f99a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Core ++ [downloader/external] Add support for rate limit and retries for wget +* [downloader/external] Fix infinite retries for curl (#19303) + +Extractors +* [npo] Fix extraction (#20084) +* [francetv:site] Extend video id regex (#20029, #20071) ++ [periscope] Extract width and height (#20015) +* [servus] Fix extraction (#19297) +* [bbccouk] Make subtitles non fatal (#19651) +* [metacafe] Fix family filter bypass (#19287) + + version 2019.02.18 Extractors From 04c33bdfb3cd73e71bf0788f02998cab30cf1da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 01:03:51 +0700 Subject: [PATCH 118/785] release 2019.03.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ff626883d..71a500f04 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.02.18 +[debug] youtube-dl version 2019.03.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f717f99a8..018a30641 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.03.01 Core + [downloader/external] Add support for rate limit and retries for wget diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea1d5a4a5..42ba37f15 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.18' +__version__ = '2019.03.01' From 06242d44fe261999e2424d9ecb00f20ff30ccb9b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 08:14:34 +0100 Subject: [PATCH 119/785] [vimeo] add support for Vimeo Pro portfolio protected videos(closes #20070) --- youtube_dl/extractor/vimeo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6215b3258..6f32ea6f1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -502,7 +502,11 @@ class VimeoIE(VimeoBaseInfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') orig_url = url - if mobj.group('pro') or mobj.group('player'): + if mobj.group('pro'): + # some videos require portfolio_id to be present in player url + # https://github.com/rg3/youtube-dl/issues/20070 + url = self._extract_url(url, self._download_webpage(url, video_id)) + elif mobj.group('player'): url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id From c5b02efe20cff1612104fd731c7f02cbbce4f5f3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 15:08:11 +0100 Subject: [PATCH 120/785] [sixplay] handle videos with empty assets(closes #20016) --- youtube_dl/extractor/sixplay.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 0c4f865ef..35bc9fa50 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -61,7 +61,8 @@ class SixPlayIE(InfoExtractor): quality_key = qualities(['lq', 'sd', 'hq', 'hd']) formats = [] subtitles = {} - for asset in clip_data['assets']: + assets = clip_data.get('assets') or [] + for asset in assets: asset_url = asset.get('full_physical_path') protocol = asset.get('protocol') if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls: From 398e1e21d6cbf6eb1e8e7e84de4fad30b7d59613 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 15:34:05 +0100 Subject: [PATCH 121/785] [espn] extend _VALID_URL regex(closes #20013) --- youtube_dl/extractor/espn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 127c69b2e..8cc9bd165 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -29,7 +29,8 @@ class ESPNIE(OnceIE): (?: .*?\?.*?\bid=| /_/id/ - ) + )| + [^/]+/video/ ) )| (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ @@ -94,6 +95,9 @@ class ESPNIE(OnceIE): }, { 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', + 'only_matching': True, }] def _real_extract(self, url): From dca0e0040ae97b2fc0cd54d5e819a5a278937350 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Mar 2019 08:01:42 +0100 Subject: [PATCH 122/785] Revert "use older login method(closes #11572)" This reverts commit cc6a960e134614f8af2a42dcd8bf146d63638a3c. --- youtube_dl/extractor/crunchyroll.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 5e2cbe41d..ce2e2d3ba 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -56,17 +56,6 @@ class CrunchyrollBaseIE(InfoExtractor): if username is None: return - self._download_webpage( - 'https://www.crunchyroll.com/?a=formhandler', - None, 'Logging in', 'Wrong login info', - data=urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'next_url': 'https://www.crunchyroll.com/acct/membership', - 'name': username, - 'password': password, - })) - - ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -110,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - ''' def _real_initialize(self): self._login() From a8f83f0c56e81b871a46c18fa9ebc6643370fa48 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Mar 2019 08:25:47 +0100 Subject: [PATCH 123/785] [crunchyroll] fix is_logged check --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ce2e2d3ba..fd1e7afad 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -60,7 +60,7 @@ class CrunchyrollBaseIE(InfoExtractor): self._LOGIN_URL, None, 'Downloading login page') def is_logged(webpage): - return '<title>Redirecting' in webpage + return 'href="/logout"' in webpage # Already logged in if is_logged(login_page): From 7465e0aee2301c3e86fe38d6e0ef5ad01c16ec79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Mar 2019 06:25:45 +0700 Subject: [PATCH 124/785] [spankbang] Fix extraction (closes #20023) --- youtube_dl/extractor/spankbang.py | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index fbe6ef31a..f11d728ca 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -9,6 +9,8 @@ from ..utils import ( parse_duration, parse_resolution, str_to_int, + url_or_none, + urlencode_postdata, ) @@ -64,16 +66,49 @@ class SpankBangIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) formats = [] - for mobj in re.finditer( - r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', - webpage): - format_id, format_url = mobj.group('id', 'url') + + def extract_format(format_id, format_url): + f_url = url_or_none(format_url) + if not f_url: + return f = parse_resolution(format_id) f.update({ - 'url': format_url, + 'url': f_url, 'format_id': format_id, }) formats.append(f) + + STREAM_URL_PREFIX = 'stream_url_' + + for mobj in re.finditer( + r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' + % STREAM_URL_PREFIX, webpage): + extract_format(mobj.group('id', 'url')) + + if not formats: + stream_key = self._search_regex( + r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + webpage, 'stream key', group='value') + + sb_csrf_session = self._get_cookies( + 'https://spankbang.com')['sb_csrf_session'].value + + stream = self._download_json( + 'https://spankbang.com/api/videos/stream', video_id, + 'Downloading stream JSON', data=urlencode_postdata({ + 'id': stream_key, + 'data': 0, + 'sb_csrf_session': sb_csrf_session, + }), headers={ + 'Referer': url, + 'X-CSRFToken': sb_csrf_session, + }) + + for format_id, format_url in stream.items(): + if format_id.startswith(STREAM_URL_PREFIX): + extract_format( + format_id[len(STREAM_URL_PREFIX):], format_url) + self._sort_formats(formats) title = self._html_search_regex( From 7aeb788e564d397face83b580362189753edd9dd Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@me.com> Date: Sun, 3 Mar 2019 02:16:48 +0100 Subject: [PATCH 125/785] [travis] Remove sudo: false Travis now recommends removing `sudo: false` from configuration: https://blog.travis-ci.com/2018-11-19-required-linux-infrastructure-migration. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 79287ccf6..82e81d078 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ python: - "3.6" - "pypy" - "pypy3" -sudo: false env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download From 8ae113ca9df0abd790e3391cd529bac42fce304f Mon Sep 17 00:00:00 2001 From: dimqua <dimqua@users.noreply.github.com> Date: Sun, 3 Mar 2019 04:19:36 +0300 Subject: [PATCH 126/785] [youtube] Add more invidious instances See [Invidious-Instances](https://github.com/omarroth/invidious/wiki/Invidious-Instances) for the reference. --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c8bf98b58..457e2acea 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -352,6 +352,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| (?:www\.)?invidio\.us/| + (?:www\.)?invidious\.snopyta\.org/| + (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: From 0a5baf9c210df9f492ae48dd8fdae90561c971bd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Mar 2019 06:18:15 +0100 Subject: [PATCH 127/785] [libsyn] improve extraction(closes #20229) --- youtube_dl/extractor/libsyn.py | 64 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index f7311f483..2cf444258 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_duration, + strip_or_none, unified_strdate, ) @@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor): 'id': '6385796', 'ext': 'mp3', 'title': "Champion Minded - Developing a Growth Mindset", - 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', + # description fetched using another request: + # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 + # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', 'upload_date': '20180320', 'thumbnail': 're:^https?://.*', }, @@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor): }] def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - url = m.group('mainurl') + url, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, video_id) - podcast_title = self._search_regex( - r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) - if podcast_title: - podcast_title = podcast_title.strip() - episode_title = self._search_regex( - r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title') - if episode_title: - episode_title = episode_title.strip() + data = self._parse_json(self._search_regex( + r'var\s+playlistItem\s*=\s*({.+?});', + webpage, 'JSON data block'), video_id) + + episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage) + if not episode_title: + self._search_regex( + [r'data-title="([^"]+)"', r'<title>(.+?)'], + webpage, 'episode title') + episode_title = episode_title.strip() + + podcast_title = strip_or_none(clean_html(self._search_regex( + r'

([^<]+)

', webpage, 'podcast title', + default=None) or get_element_by_class('podcast-title', webpage))) title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title + formats = [] + for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')): + f_url = data.get(k) + if not f_url: + continue + formats.append({ + 'url': f_url, + 'format_id': format_id, + }) + description = self._html_search_regex( r'(.+?)

', webpage, 'description', default=None) @@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor): # Strip non-breaking and normal spaces description = description.replace('\u00A0', ' ').strip() release_date = unified_strdate(self._search_regex( - r'
Released: ([^<]+)<', webpage, 'release date', fatal=False)) - - data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') - data = json.loads(data_json) - - formats = [{ - 'url': data['media_url'], - 'format_id': 'main', - }, { - 'url': data['media_url_libsyn'], - 'format_id': 'libsyn', - }] - thumbnail = data.get('thumbnail_url') - duration = parse_duration(data.get('duration')) + r'
Released: ([^<]+)<', + webpage, 'release date', default=None) or data.get('release_date')) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': data.get('thumbnail_url'), 'upload_date': release_date, - 'duration': duration, + 'duration': parse_duration(data.get('duration')), 'formats': formats, } From e7e62441cdde6dca6211c073be73677f195a0dff Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 3 Mar 2019 13:23:59 +0100 Subject: [PATCH 128/785] [utils] strip #HttpOnly_ prefix from cookies files (#20219) --- test/test_YoutubeDLCookieJar.py | 10 ++++++++++ test/testdata/cookies/httponly_cookies.txt | 6 ++++++ youtube_dl/utils.py | 18 +++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 test/testdata/cookies/httponly_cookies.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 6a8243590..f959798de 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -29,6 +29,16 @@ class TestYoutubeDLCookieJar(unittest.TestCase): tf.close() os.remove(tf.name) + def test_strip_httponly_prefix(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt new file mode 100644 index 000000000..c46541d6b --- /dev/null +++ b/test/testdata/cookies/httponly_cookies.txt @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE +www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5a0bb4b0..a71eda85d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1141,6 +1141,8 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + _HTTPONLY_PREFIX = '#HttpOnly_' + def save(self, filename=None, ignore_discard=False, ignore_expires=False): # Store session cookies with `expires` set to 0 instead of an empty # string @@ -1150,7 +1152,21 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) def load(self, filename=None, ignore_discard=False, ignore_expires=False): - compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires) + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + + cf = io.StringIO() + with open(filename) as f: + for line in f: + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + cf.write(compat_str(line)) + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to # an empty string or 0. MozillaCookieJar only recognizes the former # (see [1]). So we need force the latter to be recognized as session From 39c780fdec2c62135f37e3565efedf7dcad605ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 00:37:39 +0700 Subject: [PATCH 129/785] [extractor/common] Return MPD manifest as format's url meta field (#20242) For symmetry with other segmented media --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c3b0586a0..1fa8048b8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2120,7 +2120,7 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': base_url, + 'url': mpd_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), From c790e93ab5db5f318fb094b8a45f9160cdf4bd9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 00:39:15 +0700 Subject: [PATCH 130/785] [extractor/common] Clarify url and manifest_url meta fields --- youtube_dl/extractor/common.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1fa8048b8..641e50f3c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -102,10 +102,20 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url The mandatory URL representing the media: + for plain file media - HTTP URL of this file, + for RTMP - RTMP URL, + for HLS - URL of the M3U8 media playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of - fragmented media (DASH, hls, hds) + fragmented media: + for HLS - URL of the M3U8 master playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). From 5dcd630dca9b75ec2ca920ae7799252e0e0bb599 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Mar 2019 22:26:32 +0100 Subject: [PATCH 131/785] [paramountnetwork] fix mgid extraction(closes #20241) --- youtube_dl/extractor/spike.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 6090e0066..21b93a5b3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,8 +46,12 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): _GEO_COUNTRIES = ['US'] def _extract_mgid(self, webpage): - cs = self._parse_json(self._search_regex( + root_data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+})', - webpage, 'data'), None)['children'] - c = next(c for c in cs if c.get('type') == 'VideoPlayer') + webpage, 'data'), None) + + def find_sub_data(data, data_type): + return next(c for c in data['children'] if c.get('type') == data_type) + + c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer') return c['props']['media']['video']['config']['uri'] From d9eb580a796ef6c9a248fdd8896ccf85349c35eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:45:40 +0700 Subject: [PATCH 132/785] [extractor/common] Do not fail on invalid data while parsing F4M manifest in non fatal mode --- youtube_dl/extractor/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 641e50f3c..55ce1a888 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,6 +13,7 @@ import socket import sys import time import math +import xml from ..compat import ( compat_cookiejar, @@ -1464,6 +1465,9 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: + return [] + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: From c17eb5b4b06cfa2c8bffb378b0a5c84d4c5a6834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:54:25 +0700 Subject: [PATCH 133/785] [rai] Improve extraction (closes #20253) --- youtube_dl/extractor/rai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 149153b8f..207a6c247 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -74,11 +74,11 @@ class RaiBaseIE(InfoExtractor): if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): continue - if ext == 'm3u8': + if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif ext == 'f4m': + elif ext == 'f4m' or platform == 'flash': manifest_url = update_url_query( media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) From bb6f112d9d57d7c6260de132cad604c1c05bc5a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:57:39 +0700 Subject: [PATCH 134/785] [npo] Improve ISM extraction --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 857845d35..ad62f8ec6 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -238,7 +238,7 @@ class NPOIE(NPOBaseIE): formats.extend(self._extract_m3u8_formats( stream_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - elif '.ism/Manifest' in stream_url: + elif re.search(r'\.isml?/Manifest', stream_url): formats.extend(self._extract_ism_formats( stream_url, video_id, ism_id='mss', fatal=False)) else: From e5ada4f3ad771d4cf3f533efb2597a3f1618ce75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 00:33:08 +0700 Subject: [PATCH 135/785] [extractor/common] Fallback url to base URL for DASH formats --- youtube_dl/extractor/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 55ce1a888..a17f7cbc4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,7 +108,10 @@ class InfoExtractor(object): for RTMP - RTMP URL, for HLS - URL of the M3U8 media playlist, for HDS - URL of the F4M manifest, - for DASH - URL of the MPD manifest, + for DASH - URL of the MPD manifest or + base URL representing the media + if MPD manifest is parsed from + a string, for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of @@ -2134,7 +2137,8 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': mpd_url, + # NB: mpd_url may be empty when MPD manifest is parsed from a string + 'url': mpd_url or base_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), From 399f76870d7dc72631e7da1f54a46ed8a039c838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:18:52 +0700 Subject: [PATCH 136/785] [compat] Introduce compat_etree_Element --- test/test_compat.py | 7 +++++++ youtube_dl/compat.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index 51fe6aa0b..4822260ac 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_getenv, compat_setenv, + compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_shlex_split, @@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) + def test_compat_etree_Element(self): + try: + compat_etree_Element.text + except AttributeError: + self.fail('compat_etree_Element is not a type') + def test_compat_etree_fromstring(self): xml = ''' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7b770340f..b2fe62f12 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2508,6 +2508,15 @@ class _TreeBuilder(etree.TreeBuilder): pass +try: + # xml.etree.ElementTree.Element is a method in Python <=2.6 and + # the following will crash with: + # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types + isinstance(None, xml.etree.ElementTree.Element) + from xml.etree.ElementTree import Element as compat_etree_Element +except TypeError: # Python <=2.6 + from xml.etree.ElementTree import _ElementInterface as compat_etree_Element + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2969,6 +2978,7 @@ __all__ = [ 'compat_cookiejar', 'compat_cookies', 'compat_ctypes_WINFUNCTYPE', + 'compat_etree_Element', 'compat_etree_fromstring', 'compat_etree_register_namespace', 'compat_expanduser', From ee0ba927aac067dec533a618540e43ed3deebaba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:21:57 +0700 Subject: [PATCH 137/785] Use compat_etree_Element --- youtube_dl/extractor/bbc.py | 4 ++-- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/extractor/crunchyroll.py | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 13340ec64..d479d2577 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import itertools import re -import xml from .common import InfoExtractor from ..utils import ( @@ -23,6 +22,7 @@ from ..utils import ( urljoin, ) from ..compat import ( + compat_etree_Element, compat_HTTPError, compat_urlparse, ) @@ -317,7 +317,7 @@ class BBCCoUkIE(InfoExtractor): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, xml.etree.ElementTree.Element): + if not isinstance(captions, compat_etree_Element): continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a17f7cbc4..4839edbf7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,11 +13,11 @@ import socket import sys import time import math -import xml from ..compat import ( compat_cookiejar, compat_cookies, + compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, @@ -802,7 +802,7 @@ class InfoExtractor(object): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). + Return a tuple (xml as an compat_etree_Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -823,7 +823,7 @@ class InfoExtractor(object): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an xml.etree.ElementTree.Element. + Return the xml as an compat_etree_Element. See _download_webpage docstring for arguments specification. """ @@ -1468,7 +1468,7 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: + if not isinstance(manifest, compat_etree_Element) and not fatal: return [] # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index fd1e7afad..5948154f8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re import json -import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -12,6 +11,7 @@ from .common import InfoExtractor from .vrv import VRVIE from ..compat import ( compat_b64decode, + compat_etree_Element, compat_etree_fromstring, compat_urllib_parse_urlencode, compat_urllib_request, @@ -390,7 +390,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, etree.Element): + if not isinstance(sub_doc, compat_etree_Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -507,7 +507,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, etree.Element): + if isinstance(streamdata, compat_etree_Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, etree.Element): + if isinstance(stream_info, compat_etree_Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -593,7 +593,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text season = episode = episode_number = duration = thumbnail = None - if isinstance(metadata, etree.Element): + if isinstance(metadata, compat_etree_Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) From a551768acfd177e425f518c43a2992a50a2ff69f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:27:22 +0700 Subject: [PATCH 138/785] [facebook] Improve uploader extraction (closes #20250) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 74954049d..789dd79d5 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -424,7 +424,7 @@ class FacebookIE(InfoExtractor): uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False) or self._og_search_title(webpage, fatal=False) + default=None) or self._og_search_title(webpage, fatal=False) timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) From 97157c692c94e3853a6ad1b8a220f064815b6957 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Tue, 5 Mar 2019 14:34:34 -0400 Subject: [PATCH 139/785] [openload] Add support for oload.space --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c1dcbb7eb..bae7c7ee7 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space) ) )/ (?:f|embed)/ @@ -349,6 +349,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.live/f/-Z58UZ-GR4M', 'only_matching': True, + }, { + 'url': 'https://oload.space/f/IY4eZSst3u8/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From d347b52b63282b3276815fd03fc63a1bc8b82cf5 Mon Sep 17 00:00:00 2001 From: 0x9fff00 <0x9fff00+git@protonmail.ch> Date: Tue, 5 Mar 2019 20:11:32 +0100 Subject: [PATCH 140/785] [urplay] Extract timestamp (#20235) --- youtube_dl/extractor/urplay.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py index 8e6fd4731..6030b7cb5 100644 --- a/youtube_dl/extractor/urplay.py +++ b/youtube_dl/extractor/urplay.py @@ -2,18 +2,31 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import unified_timestamp class URPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', - 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb', + 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', + 'md5': 'ff5b0c89928f8083c74bbd5099c9292d', + 'info_dict': { + 'id': '203704', + 'ext': 'mp4', + 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', + 'timestamp': 1513512768, + 'upload_date': '20171217', + }, + }, { + 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'info_dict': { 'id': '190031', 'ext': 'mp4', 'title': 'Tripp, Trapp, Träd : Sovkudde', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', + 'timestamp': 1440093600, + 'upload_date': '20150820', }, }, { 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', @@ -51,6 +64,7 @@ class URPlayIE(InfoExtractor): 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), 'thumbnail': urplayer_data.get('image'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), 'series': urplayer_data.get('series_title'), 'subtitles': subtitles, 'formats': formats, From fca9baf0da9720bac25d160924204395930191fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 02:45:33 +0700 Subject: [PATCH 141/785] [test] Fix test_compat_etree_Element --- test/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index 4822260ac..86ff389fd 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -93,7 +93,7 @@ class TestCompat(unittest.TestCase): def test_compat_etree_Element(self): try: - compat_etree_Element.text + compat_etree_Element.items except AttributeError: self.fail('compat_etree_Element is not a type') From 829685b88a0c7610a874b980bc25b308c4f34590 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 6 Mar 2019 09:20:27 +0100 Subject: [PATCH 142/785] [toutv] fix authentication(closes #20261) --- youtube_dl/extractor/toutv.py | 53 +++++++++-------------------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index f1ab91cf2..124ca064c 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,14 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import json from .radiocanada import RadioCanadaIE from ..utils import ( - extract_attributes, int_or_none, merge_dicts, - urlencode_postdata, ) @@ -38,47 +36,24 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] + _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' def _real_initialize(self): email, password = self._get_login_info() if email is None: return - login_webpage = self._download_webpage( - 'https://services.radio-canada.ca/auth/oauth/v2/authorize', - None, 'Downloading login page', query={ - 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', - 'redirect_uri': 'https://ici.tou.tv/logincallback', - 'response_type': 'token', - 'scope': 'id.write media-validation.read', - 'state': '/', - }) - - def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): - form, form_elem = re.search( - r'(?s)((]+?%s[^>]*?>).+?)' % form_spec_re, wp).groups() - form_data = self._hidden_inputs(form) - form_url = extract_attributes(form_elem).get('action') or default_form_url - return form_url, form_data - - post_url, form_data = extract_form_url_and_data( - login_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login', - r'(?:id|name)="Form-login"') - form_data.update({ - 'login-email': email, - 'login-password': password, - }) - consent_webpage = self._download_webpage( - post_url, None, 'Logging in', data=urlencode_postdata(form_data)) - post_url, form_data = extract_form_url_and_data( - consent_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent') - _, urlh = self._download_webpage_handle( - post_url, None, 'Following Redirection', - data=urlencode_postdata(form_data)) - self._access_token = self._search_regex( - r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', - urlh.geturl(), 'access token') + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): From 7b6e76087080eac54e14cdead4e3bc0225c654b5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 6 Mar 2019 09:28:14 +0100 Subject: [PATCH 143/785] [toutv] detect invalid login error --- youtube_dl/extractor/toutv.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 124ca064c..25e1fd46d 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -4,7 +4,9 @@ from __future__ import unicode_literals import json from .radiocanada import RadioCanadaIE +from ..compat import compat_HTTPError from ..utils import ( + ExtractorError, int_or_none, merge_dicts, ) @@ -42,18 +44,24 @@ class TouTvIE(RadioCanadaIE): email, password = self._get_login_info() if email is None: return - self._access_token = self._download_json( - 'https://services.radio-canada.ca/toutv/profiling/accounts/login', - None, 'Logging in', data=json.dumps({ - 'ClientId': self._CLIENT_KEY, - 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', - 'Email': email, - 'Password': password, - 'Scope': 'id.write media-validation.read', - }).encode(), headers={ - 'Authorization': 'client-key ' + self._CLIENT_KEY, - 'Content-Type': 'application/json;charset=utf-8', - })['access_token'] + try: + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['Message'] + raise ExtractorError(error, expected=True) + raise self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): From 9d74ea6d36696396392974e94a40dce1e5a881a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Mar 2019 23:26:59 +0700 Subject: [PATCH 144/785] [francetv:site] Relax video id regex and update test (closes #20268) --- youtube_dl/extractor/francetv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3c4ef08a8..6101fb6bd 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': '162311093', + 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', @@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: From bba35695eb4ab9cc70624583375ba3d15b4e6cc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 02:52:08 +0700 Subject: [PATCH 145/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ChangeLog b/ChangeLog index 018a30641..272191a01 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +version + +Core +* [extractor/common] Use compat_etree_Element ++ [compat] Introduce compat_etree_Element +* [extractor/common] Fallback url to base URL for DASH formats +* [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode +* [extractor/common] Return MPD manifest as format's url meta field (#20242) +* [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + +Extractors +* [francetv:site] Relax video id regular expression (#20268) +* [toutv] Detect invalid login error +* [toutv] Fix authentication (#20261) ++ [urplay] Extract timestamp (#20235) ++ [openload] Add support for oload.space (#20246) +* [facebook] Improve uploader extraction (#20250) +* [bbc] Use compat_etree_Element +* [crunchyroll] Use compat_etree_Element +* [npo] Improve ISM extraction +* [rai] Improve extraction (#20253) +* [paramountnetwork] Fix mgid extraction (#20241) +* [libsyn] Improve extraction (#20229) ++ [youtube] Add more invidious instances to URL regular expression (#20228) +* [spankbang] Fix extraction (#20023) +* [espn] Extend URL regular expression (#20013) +* [sixplay] Handle videos with empty assets (#20016) ++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + + version 2019.03.01 Core From 10734553feef497e2810a23bbe62a0b3d630e78d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 02:53:18 +0700 Subject: [PATCH 146/785] release 2019.03.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 71a500f04..5f97e2cbe 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.03.01 +[debug] youtube-dl version 2019.03.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 272191a01..eda94ad33 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.03.09 Core * [extractor/common] Use compat_etree_Element diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 42ba37f15..f72fee57f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.01' +__version__ = '2019.03.09' From 0d08bcdb70008f0d500afbd19059b3c0971a4776 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Mar 2019 09:37:28 +0100 Subject: [PATCH 147/785] [fox] detect geo restriction and authentication errors(#20208) --- youtube_dl/extractor/fox.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 0ffceeb7c..f30d3cba8 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -6,10 +6,12 @@ import uuid from .adobepass import AdobePassIE from ..compat import ( + compat_HTTPError, compat_str, compat_urllib_parse_unquote, ) from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_duration, @@ -48,6 +50,7 @@ class FOXIE(AdobePassIE): 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, }] + _GEO_BYPASS = False _HOME_PAGE_URL = 'https://www.fox.com/' _API_KEY = 'abdcbed02c124d393b39e818a4312055' _access_token = None @@ -58,9 +61,22 @@ class FOXIE(AdobePassIE): } if self._access_token: headers['Authorization'] = 'Bearer ' + self._access_token - return self._download_json( - 'https://api2.fox.com/v2.0/' + path, - video_id, data=data, headers=headers) + try: + return self._download_json( + 'https://api2.fox.com/v2.0/' + path, + video_id, data=data, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + entitlement_issues = self._parse_json( + e.cause.read().decode(), video_id)['entitlementIssues'] + for e in entitlement_issues: + if e.get('errorCode') == 1005: + raise ExtractorError( + 'This video is only available via cable service provider ' + 'subscription. You may want to use --cookies.', expected=True) + messages = ', '.join([e['message'] for e in entitlement_issues]) + raise ExtractorError(messages, expected=True) + raise def _real_initialize(self): if not self._access_token: @@ -81,7 +97,15 @@ class FOXIE(AdobePassIE): title = video['name'] release_url = video['url'] - m3u8_url = self._download_json(release_url, video_id)['playURL'] + try: + m3u8_url = self._download_json(release_url, video_id)['playURL'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.read().decode(), video_id) + if error.get('exception') == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=['US']) + raise ExtractorError(error['description'], expected=True) + raise formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') From 276550371313dbfe7d94ceb294bd1284c1e7c404 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Mar 2019 15:03:32 +0100 Subject: [PATCH 148/785] [vimeo:review] improve config url extraction and extract original format(closes #20305) --- youtube_dl/extractor/vimeo.py | 64 +++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6f32ea6f1..e3ec550f0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -195,6 +195,32 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'subtitles': subtitles, } + def _extract_original_format(self, url, video_id): + download_data = self._download_json( + url, video_id, fatal=False, + query={'action': 'load_download_config'}, + headers={'X-Requested-With': 'XMLHttpRequest'}) + if download_data: + source_file = download_data.get('source_file') + if isinstance(source_file, dict): + download_url = source_file.get('download_url') + if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): + source_name = source_file.get('public_name', 'Original') + if self._is_valid_url(download_url, video_id, '%s video' % source_name): + ext = (try_get( + source_file, lambda x: x['extension'], + compat_str) or determine_ext( + download_url, None) or 'mp4').lower() + return { + 'url': download_url, + 'ext': ext, + 'width': int_or_none(source_file.get('width')), + 'height': int_or_none(source_file.get('height')), + 'filesize': parse_filesize(source_file.get('size')), + 'format_id': source_name, + 'preference': 1, + } + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -659,29 +685,11 @@ class VimeoIE(VimeoBaseInfoExtractor): comment_count = None formats = [] - download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest'}) - download_data = self._download_json(download_request, video_id, fatal=False) - if download_data: - source_file = download_data.get('source_file') - if isinstance(source_file, dict): - download_url = source_file.get('download_url') - if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): - source_name = source_file.get('public_name', 'Original') - if self._is_valid_url(download_url, video_id, '%s video' % source_name): - ext = (try_get( - source_file, lambda x: x['extension'], - compat_str) or determine_ext( - download_url, None) or 'mp4').lower() - formats.append({ - 'url': download_url, - 'ext': ext, - 'width': int_or_none(source_file.get('width')), - 'height': int_or_none(source_file.get('height')), - 'filesize': parse_filesize(source_file.get('size')), - 'format_id': source_name, - 'preference': 1, - }) + + source_format = self._extract_original_format( + 'https://vimeo.com/' + video_id, video_id) + if source_format: + formats.append(source_format) info_dict_config = self._parse_config(config, video_id) formats.extend(info_dict_config['formats']) @@ -940,7 +948,7 @@ class VimeoGroupsIE(VimeoAlbumIE): class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P[^/]+)' + _VALID_URL = r'(?Phttps://vimeo\.com/[^/]+/review/(?P[^/]+)/[0-9a-f]{10})' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'md5': 'c507a72f780cacc12b2248bb4006d253', @@ -992,7 +1000,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): data = self._parse_json(self._search_regex( r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config = data.get('vimeo_esi', {}).get('config', {}) + config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl']) if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( @@ -1000,10 +1009,13 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): return config_url def _real_extract(self, url): - video_id = self._match_id(url) + page_url, video_id = re.match(self._VALID_URL, url).groups() config_url = self._get_config_url(url, video_id) config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) + source_format = self._extract_original_format(page_url, video_id) + if source_format: + info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) info_dict['id'] = video_id return info_dict From 067aa17edf5a46a8cbc4d6b90864eddf051fa2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 19:14:41 +0700 Subject: [PATCH 149/785] Start moving to ytdl-org --- .github/ISSUE_TEMPLATE.md | 10 +++--- .github/ISSUE_TEMPLATE_tmpl.md | 10 +++--- .github/PULL_REQUEST_TEMPLATE.md | 4 +-- CONTRIBUTING.md | 20 +++++------ README.md | 42 +++++++++++------------ devscripts/buildserver.py | 2 +- devscripts/create-github-release.py | 4 +-- devscripts/gh-pages/update-feed.py | 4 +-- devscripts/release.sh | 2 +- devscripts/show-downloads-statistics.py | 2 +- setup.py | 2 +- test/test_InfoExtractor.py | 16 ++++----- test/test_YoutubeDL.py | 6 ++-- test/test_all_urls.py | 6 ++-- youtube-dl.plugin.zsh | 2 +- youtube_dl/YoutubeDL.py | 16 ++++----- youtube_dl/__init__.py | 2 +- youtube_dl/compat.py | 6 ++-- youtube_dl/downloader/external.py | 4 +-- youtube_dl/downloader/f4m.py | 6 ++-- youtube_dl/downloader/hls.py | 4 +-- youtube_dl/downloader/http.py | 2 +- youtube_dl/extractor/arkena.py | 2 +- youtube_dl/extractor/bambuser.py | 4 +-- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/brightcove.py | 6 ++-- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/common.py | 20 +++++------ youtube_dl/extractor/commonmistakes.py | 2 +- youtube_dl/extractor/crunchyroll.py | 4 +-- youtube_dl/extractor/dailymotion.py | 4 +-- youtube_dl/extractor/dreisat.py | 2 +- youtube_dl/extractor/francetv.py | 2 +- youtube_dl/extractor/generic.py | 10 +++--- youtube_dl/extractor/googledrive.py | 2 +- youtube_dl/extractor/kuwo.py | 2 +- youtube_dl/extractor/liveleak.py | 6 ++-- youtube_dl/extractor/msn.py | 2 +- youtube_dl/extractor/nhl.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/once.py | 2 +- youtube_dl/extractor/pbs.py | 6 ++-- youtube_dl/extractor/pluralsight.py | 6 ++-- youtube_dl/extractor/pornhub.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 4 +-- youtube_dl/extractor/radiocanada.py | 2 +- youtube_dl/extractor/rtlnl.py | 2 +- youtube_dl/extractor/theplatform.py | 4 +-- youtube_dl/extractor/toutv.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 2 +- youtube_dl/extractor/veehd.py | 2 +- youtube_dl/extractor/vevo.py | 2 +- youtube_dl/extractor/vimeo.py | 4 +-- youtube_dl/extractor/vk.py | 2 +- youtube_dl/extractor/vlive.py | 2 +- youtube_dl/extractor/yandexmusic.py | 4 +-- youtube_dl/extractor/youtube.py | 44 ++++++++++++------------- youtube_dl/options.py | 2 +- youtube_dl/update.py | 2 +- youtube_dl/utils.py | 18 +++++----- 61 files changed, 182 insertions(+), 182 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5f97e2cbe..911e912a4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,12 +6,12 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones +- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser ### What is the purpose of your *issue*? @@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. --- ### Description of your *issue*, suggested solution and other information -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 8edbd5a0f..8b7e73417 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -6,12 +6,12 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones +- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser ### What is the purpose of your *issue*? @@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. --- ### Description of your *issue*, suggested solution and other information -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ba4ca7553..e69b907d8 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,8 +7,8 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6c1739860..cd9ccbe96 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,11 +42,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? @@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -98,7 +98,7 @@ If you want to add support for a new site, first of all **make sure** this site After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): -1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) +1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) 2. Check out the source code with: git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git @@ -150,9 +150,9 @@ After you have ensured this site is distributing its content legally, you can fo # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 youtube_dl/extractor/yourextractor.py @@ -177,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: - `id` (media identifier) - `title` (media title) @@ -185,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. -[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example @@ -341,7 +341,7 @@ Incorrect: ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -349,7 +349,7 @@ Use `try_get` for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. +Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/README.md b/README.md index c1572f771..e476045b2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl) +[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl) youtube-dl - download videos from youtube.com or other video platforms @@ -43,7 +43,7 @@ Or with [MacPorts](https://www.macports.org/): sudo port install youtube-dl -Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). +Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html). # DESCRIPTION **youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. @@ -685,7 +685,7 @@ You can merge the video and audio of two formats into a single file using `-f . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). +Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` @@ -1342,11 +1342,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 1344b4d87..4a4295ba9 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -322,7 +322,7 @@ class GITBuilder(GITInfoBuilder): class YoutubeDLBuilder(object): - authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org'] def __init__(self, **kwargs): if self.repoName != 'youtube-dl': diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 30716ad8e..428111b3f 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -27,8 +27,8 @@ from youtube_dl.utils import ( class GitHubReleaser(object): - _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' - _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s' _NETRC_MACHINE = 'github.com' def __init__(self, debuglevel=0): diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index e93eb60fb..506a62377 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -10,7 +10,7 @@ import textwrap atom_template = textwrap.dedent("""\ - + youtube-dl releases https://yt-dl.org/feed/youtube-dl-updates-feed @TIMESTAMP@ @@ -21,7 +21,7 @@ entry_template = textwrap.dedent(""" https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@ New version @VERSION@ - +
Downloads available at https://yt-dl.org/downloads/@VERSION@/ diff --git a/devscripts/release.sh b/devscripts/release.sh index 4db5def5d..4c413bf6d 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -96,7 +96,7 @@ git push origin "$version" REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 -wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py index e25d28411..6c8d1cc2d 100644 --- a/devscripts/show-downloads-statistics.py +++ b/devscripts/show-downloads-statistics.py @@ -24,7 +24,7 @@ total_bytes = 0 for page in itertools.count(1): releases = json.loads(compat_urllib_request.urlopen( - 'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page + 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page ).read().decode('utf-8')) if not releases: diff --git a/setup.py b/setup.py index dfb669ad2..af68b485e 100644 --- a/setup.py +++ b/setup.py @@ -104,7 +104,7 @@ setup( version=__version__, description=DESCRIPTION, long_description=LONG_DESCRIPTION, - url='https://github.com/rg3/youtube-dl', + url='https://github.com/ytdl-org/youtube-dl', author='Ricardo Garcia', author_email='ytdl@yt-dl.org', maintainer='Sergey M.', diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index f0aa8466b..da6cd39b6 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -201,7 +201,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_m3u8_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/11507 + # https://github.com/ytdl-org/youtube-dl/issues/11507 # http://pluzz.francetv.fr/videos/le_ministere.html 'pluzz_francetv_11507', 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', @@ -263,7 +263,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/11995 + # https://github.com/ytdl-org/youtube-dl/issues/11995 # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor 'teamcoco_11995', 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', @@ -337,7 +337,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/12211 + # https://github.com/ytdl-org/youtube-dl/issues/12211 # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601 'toggle_mobile_12211', 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', @@ -501,7 +501,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/18923 + # https://github.com/ytdl-org/youtube-dl/issues/18923 # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa 'ted_18923', 'http://hls.ted.com/talks/31241.m3u8', @@ -570,9 +570,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_mpd_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/13919 + # https://github.com/ytdl-org/youtube-dl/issues/13919 # Also tests duplicate representation ids, see - # https://github.com/rg3/youtube-dl/issues/15111 + # https://github.com/ytdl-org/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', [{ @@ -652,7 +652,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'height': 1080, }] ), ( - # https://github.com/rg3/youtube-dl/pull/14844 + # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', 'http://unknown/manifest.mpd', [{ @@ -748,7 +748,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_f4m_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/14660 + # https://github.com/ytdl-org/youtube-dl/issues/14660 'custom_base_url', 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', [{ diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1d7452744..ce9666171 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -411,7 +411,7 @@ class TestFormatSelection(unittest.TestCase): # For extractors with incomplete formats (all formats are audio-only or # video-only) best and worst should fallback to corresponding best/worst # video-only or audio-only formats (as per - # https://github.com/rg3/youtube-dl/pull/5556) + # https://github.com/ytdl-org/youtube-dl/pull/5556) formats = [ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, @@ -442,7 +442,7 @@ class TestFormatSelection(unittest.TestCase): self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_format_selection_issue_10083(self): - # See https://github.com/rg3/youtube-dl/issues/10083 + # See https://github.com/ytdl-org/youtube-dl/issues/10083 formats = [ {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, @@ -853,7 +853,7 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(result, [2, 3, 4]) def test_urlopen_no_file_protocol(self): - # see https://github.com/rg3/youtube-dl/issues/8227 + # see https://github.com/ytdl-org/youtube-dl/issues/8227 ydl = YDL() self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd') diff --git a/test/test_all_urls.py b/test/test_all_urls.py index cd1cd4b24..465ce0050 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -110,7 +110,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) - # https://github.com/rg3/youtube-dl/issues/1930 + # https://github.com/ytdl-org/youtube-dl/issues/1930 def test_soundcloud_not_matching_sets(self): self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) @@ -119,12 +119,12 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) def test_pbs(self): - # https://github.com/rg3/youtube-dl/issues/2350 + # https://github.com/ytdl-org/youtube-dl/issues/2350 self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) def test_yahoo_https(self): - # https://github.com/rg3/youtube-dl/issues/2701 + # https://github.com/ytdl-org/youtube-dl/issues/2701 self.assertMatch( 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh index 4edab5214..17ab1341a 100644 --- a/youtube-dl.plugin.zsh +++ b/youtube-dl.plugin.zsh @@ -7,7 +7,7 @@ # https://github.com/zsh-users/antigen # Install youtube-dl: -# antigen bundle rg3/youtube-dl +# antigen bundle ytdl-org/youtube-dl # Bundles installed by antigen are available for use immediately. # Update youtube-dl (and all other antigen bundles): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bc9fc270c..3b92acd97 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -889,7 +889,7 @@ class YoutubeDL(object): # url_transparent. In such cases outer metadata (from ie_result) # should be propagated to inner one (info). For this to happen # _type of info should be overridden with url_transparent. This - # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. + # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. if new_result.get('_type') == 'url': new_result['_type'] = 'url_transparent' @@ -1606,7 +1606,7 @@ class YoutubeDL(object): # by extractor are incomplete or not (i.e. whether extractor provides only # video-only or audio-only formats) for proper formats selection for # extractors with such incomplete formats (see - # https://github.com/rg3/youtube-dl/pull/5556). + # https://github.com/ytdl-org/youtube-dl/pull/5556). # Since formats may be filtered during format selection and may not match # the original formats the results may be incorrect. Thus original formats # or pre-calculated metrics should be passed to format selection routines @@ -1614,7 +1614,7 @@ class YoutubeDL(object): # We will pass a context object containing all necessary additional data # instead of just formats. # This fixes incorrect format selection issue (see - # https://github.com/rg3/youtube-dl/issues/10083). + # https://github.com/ytdl-org/youtube-dl/issues/10083). incomplete_formats = ( # All formats are video-only or all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or @@ -1810,7 +1810,7 @@ class YoutubeDL(object): if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters - # See https://github.com/rg3/youtube-dl/issues/10268 + # See https://github.com/ytdl-org/youtube-dl/issues/10268 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) except (OSError, IOError): @@ -2229,7 +2229,7 @@ class YoutubeDL(object): return if type('') is not compat_str: - # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) + # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326) self.report_warning( 'Your Python is broken! Update to a newer and supported version') @@ -2323,7 +2323,7 @@ class YoutubeDL(object): proxies = {'http': opts_proxy, 'https': opts_proxy} else: proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) + # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = PerRequestProxyHandler(proxies) @@ -2336,7 +2336,7 @@ class YoutubeDL(object): # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which # can be used for malicious purposes (see - # https://github.com/rg3/youtube-dl/issues/8227) + # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): @@ -2348,7 +2348,7 @@ class YoutubeDL(object): # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/rg3/youtube-dl/issues/1309 for details) + # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ba435ea42..94788d936 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -48,7 +48,7 @@ from .YoutubeDL import YoutubeDL def _real_main(argv=None): # Compatibility fixes for Windows if sys.platform == 'win32': - # https://github.com/rg3/youtube-dl/issues/820 + # https://github.com/ytdl-org/youtube-dl/issues/820 codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) workaround_optparse_bug9161() diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b2fe62f12..7992a23ca 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2364,7 +2364,7 @@ except ImportError: # Python 2 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus # implementations from cpython 3.4.3's stdlib. Python 2's version - # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244) + # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244) def compat_urllib_parse_unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" @@ -2828,7 +2828,7 @@ else: compat_socket_create_connection = socket.create_connection -# Fix https://github.com/rg3/youtube-dl/issues/4223 +# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # See http://bugs.python.org/issue9161 for what is broken def workaround_optparse_bug9161(): op = optparse.OptionParser() @@ -2953,7 +2953,7 @@ if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, # PyPy2 prior to version 5.4.0 expects byte strings as Windows function # names, see the original PyPy issue [1] and the youtube-dl one [2]. # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name - # 2. https://github.com/rg3/youtube-dl/pull/4392 + # 2. https://github.com/ytdl-org/youtube-dl/pull/4392 def compat_ctypes_WINFUNCTYPE(*args, **kwargs): real = ctypes.WINFUNCTYPE(*args, **kwargs) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 22e6093b3..5f73f7f0f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -239,7 +239,7 @@ class FFmpegFD(ExternalFD): # setting -seekable prevents ffmpeg from guessing if the server # supports seeking(by adding the header `Range: bytes=0-`), which # can cause problems in some cases - # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 + # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] @@ -334,7 +334,7 @@ class FFmpegFD(ExternalFD): # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable - # files (see https://github.com/rg3/youtube-dl/issues/8300). + # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if sys.platform != 'win32': proc.communicate(b'q') raise diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 15e71be9a..9b15a0e15 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -324,8 +324,8 @@ class F4mFD(FragmentFD): urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests - # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 - # and https://github.com/rg3/youtube-dl/issues/7823) + # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/ytdl-org/youtube-dl/issues/7823) manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) @@ -409,7 +409,7 @@ class F4mFD(FragmentFD): # In tests, segments may be truncated, and thus # FlvReader may not be able to parse the whole # chunk. If so, write the segment as is - # See https://github.com/rg3/youtube-dl/issues/9214 + # See https://github.com/ytdl-org/youtube-dl/issues/9214 dest_stream.write(down_data) break raise diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 4def8e2d5..419e73576 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -152,8 +152,8 @@ class HlsFD(FragmentFD): except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. - # See https://github.com/rg3/youtube-dl/issues/10165, - # https://github.com/rg3/youtube-dl/issues/10448). + # See https://github.com/ytdl-org/youtube-dl/issues/10165, + # https://github.com/ytdl-org/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: self.report_retry_fragment(err, frag_index, count, fragment_retries) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 5b1e96013..08670ee3c 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -111,7 +111,7 @@ class HttpFD(FileDownloader): # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see - # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) + # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') if content_range: diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py index 4495ddbb0..854f58767 100644 --- a/youtube_dl/extractor/arkena.py +++ b/youtube_dl/extractor/arkena.py @@ -103,7 +103,7 @@ class ArkenaIE(InfoExtractor): f_url, video_id, mpd_id=kind, fatal=False)) elif kind == 'silverlight': # TODO: process when ism is supported (see - # https://github.com/rg3/youtube-dl/issues/8118) + # https://github.com/ytdl-org/youtube-dl/issues/8118) continue else: tbr = float_or_none(f.get('Bitrate'), 1000) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 34f1b3d83..4400ff9c1 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -23,7 +23,7 @@ class BambuserIE(InfoExtractor): _TEST = { 'url': 'http://bambuser.com/v/4050584', - # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 + # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388 # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641', 'info_dict': { 'id': '4050584', @@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor): }, 'params': { # It doesn't respect the 'Range' header, it would download the whole video - # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 + # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59 'skip_download': True, }, } diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index d479d2577..e76507951 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -208,7 +208,7 @@ class BBCCoUkIE(InfoExtractor): }, 'skip': 'Now it\'s really geo-restricted', }, { - # compact player (https://github.com/rg3/youtube-dl/issues/8147) + # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 465ae396e..c0345e2c3 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -126,7 +126,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'playlist_mincount': 7, }, { - # playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965) + # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', 'info_dict': { 'id': '1522758701001', @@ -155,10 +155,10 @@ class BrightcoveLegacyIE(InfoExtractor): {params} """ - # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 + # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 object_str = re.sub(r'(', lambda m: m.group(1) + '/>', object_str) - # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 + # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 object_str = object_str.replace('<--', ' %s' % ( - srt_subtitles_timecode(start), - srt_subtitles_timecode(end)), - text, - os.linesep, - )) + alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) + ssa += os.linesep + 'Dialogue:Marked=0,%s,%s,Default,%s%s' % ( + self._ass_subtitles_timecode(start), + self._ass_subtitles_timecode(end), + '{\\a%d}' % alignment if alignment != 2 else '', + text.replace('\n', '\\N').replace('', '{\\i1}').replace('', '{\\i0}')) if sub_lang == 'vostf': sub_lang = 'fr' @@ -91,8 +106,8 @@ class ADNIE(InfoExtractor): 'ext': 'json', 'data': json.dumps(sub), }, { - 'ext': 'srt', - 'data': srt, + 'ext': 'ssa', + 'data': ssa, }]) return subtitles @@ -100,7 +115,15 @@ class ADNIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_config = self._parse_json(self._search_regex( - r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) + r'playerConfig\s*=\s*({.+});', webpage, + 'player config', default='{}'), video_id, fatal=False) + if not player_config: + config_url = urljoin(self._BASE_URL, self._search_regex( + r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', + webpage, 'config url')) + player_config = self._download_json( + config_url, video_id, + 'Downloading player config JSON metadata')['player'] video_info = {} video_info_str = self._search_regex( @@ -129,12 +152,15 @@ class ADNIE(InfoExtractor): encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() links_data = self._download_json( - urljoin(self._BASE_URL, links_url), video_id, headers={ + urljoin(self._BASE_URL, links_url), video_id, + 'Downloading links JSON metadata', headers={ 'Authorization': 'Bearer ' + authorization, }) links = links_data.get('links') or {} metas = metas or links_data.get('meta') or {} - sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token + sub_path = sub_path or links_data.get('subtitles') or \ + 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id + sub_path += '&token=' + token error = links_data.get('error') title = metas.get('title') or video_info['title'] @@ -142,9 +168,11 @@ class ADNIE(InfoExtractor): for format_id, qualities in links.items(): if not isinstance(qualities, dict): continue - for load_balancer_url in qualities.values(): + for quality, load_balancer_url in qualities.items(): load_balancer_data = self._download_json( - load_balancer_url, video_id, fatal=False) or {} + load_balancer_url, video_id, + 'Downloading %s %s JSON metadata' % (format_id, quality), + fatal=False) or {} m3u8_url = load_balancer_data.get('location') if not m3u8_url: continue From 2bbde1d09afb2225fb7bd245bcf77a0715a58f29 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Apr 2019 17:59:20 +0100 Subject: [PATCH 205/785] [adn] fix subtitle compatibility with ffmpeg --- youtube_dl/extractor/adn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 2eb4d1dc7..1e04a55a6 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -81,10 +81,10 @@ class ADNIE(InfoExtractor): ssa = '''[Script Info] ScriptType:V4.00 [V4 Styles] -Format:Name,Fontname,Fontsize,PrimaryColour,Bold,BorderStyle,Outline,Alignment,MarginL,MarginR,MarginV -Style:Default,Arial,18,16777215,-1,1,1,2,20,20,20 +Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding +Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0 [Events] -Format:Marked,Start,End,Style,Text''' +Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for current in sub: start, end, text, line_align, position_align = ( float_or_none(current.get('startTime')), @@ -94,7 +94,7 @@ Format:Marked,Start,End,Style,Text''' if start is None or end is None or text is None: continue alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) - ssa += os.linesep + 'Dialogue:Marked=0,%s,%s,Default,%s%s' % ( + ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( self._ass_subtitles_timecode(start), self._ass_subtitles_timecode(end), '{\\a%d}' % alignment if alignment != 2 else '', From 69e6efac1669da68c0746419657160311cde2671 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 08:26:04 +0100 Subject: [PATCH 206/785] [teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) --- youtube_dl/extractor/teamcoco.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 73469cc5d..7640cf00a 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -16,7 +16,7 @@ from ..utils import ( class TeamcocoIE(TurnerBaseIE): - _VALID_URL = r'https?://teamcoco\.com/(?P([^/]+/)*[^/?#]+)' + _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P([^/]+/)*[^/?#]+)' _TESTS = [ { 'url': 'http://teamcoco.com/video/mary-kay-remote', @@ -79,15 +79,20 @@ class TeamcocoIE(TurnerBaseIE): }, { 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv', 'only_matching': True, + }, { + 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft', + 'only_matching': True, } ] def _graphql_call(self, query_template, object_type, object_id): find_object = 'find' + object_type return self._download_json( - 'http://teamcoco.com/graphql/', object_id, data=json.dumps({ + 'https://teamcoco.com/graphql', object_id, data=json.dumps({ 'query': query_template % (find_object, object_id) - }))['data'][find_object] + }).encode(), headers={ + 'Content-Type': 'application/json', + })['data'][find_object] def _real_extract(self, url): display_id = self._match_id(url) @@ -145,7 +150,12 @@ class TeamcocoIE(TurnerBaseIE): 'accessTokenType': 'jws', })) else: - video_sources = self._graphql_call('''{ + d = self._download_json( + 'https://teamcoco.com/_truman/d/' + video_id, + video_id, fatal=False) or {} + video_sources = d.get('meta') or {} + if not video_sources: + video_sources = self._graphql_call('''{ %s(id: "%s") { src } From afb74964162eaee64c5c9b72990837daae945fec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 11:45:49 +0100 Subject: [PATCH 207/785] [adultswim] fix extraction(closes #18025) --- youtube_dl/extractor/adultswim.py | 194 ++++++++++++++++++------------ 1 file changed, 118 insertions(+), 76 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 88c96a950..8d1d9ac7d 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -1,13 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .turner import TurnerBaseIE from ..utils import ( + determine_ext, + float_or_none, int_or_none, + mimetype2ext, + parse_age_limit, + parse_iso8601, strip_or_none, - url_or_none, + try_get, ) @@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Rick and Morty - Pilot', 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', - 'timestamp': 1493267400, - 'upload_date': '20170427', + 'timestamp': 1543294800, + 'upload_date': '20181127', }, 'params': { # m3u8 download @@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }, { 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'info_dict': { @@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE): }, { 'url': 'http://www.adultswim.com/videos/attack-on-titan', 'info_dict': { - 'id': 'b7A69dzfRzuaXIECdxW8XQ', + 'id': 'attack-on-titan', 'title': 'Attack on Titan', - 'description': 'md5:6c8e003ea0777b47013e894767f5e114', + 'description': 'md5:41caa9416906d90711e31dc00cb7db7e', }, 'playlist_mincount': 12, }, { @@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }] def _real_extract(self, url): show_path, episode_path = re.match(self._VALID_URL, url).groups() display_id = episode_path or show_path - webpage = self._download_webpage(url, display_id) - initial_data = self._parse_json(self._search_regex( - r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});', - webpage, 'initial data'), display_id) - - is_stream = show_path == 'streams' - if is_stream: - if not episode_path: - episode_path = 'live-stream' - - video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path) - video_id = video_data.get('stream') - - if not video_id: - entries = [] - for episode in video_data.get('archiveEpisodes', []): - episode_url = url_or_none(episode.get('url')) - if not episode_url: - continue - entries.append(self.url_result( - episode_url, 'AdultSwim', episode.get('id'))) - return self.playlist_result( - entries, video_data.get('id'), video_data.get('title'), - strip_or_none(video_data.get('description'))) + query = '''query { + getShowBySlug(slug:"%s") { + %%s + } +}''' % show_path + if episode_path: + query = query % '''title + getVideoBySlug(slug:"%s") { + _id + auth + description + duration + episodeNumber + launchDate + mediaID + seasonNumber + poster + title + tvRating + }''' % episode_path + ['getVideoBySlug'] else: - show_data = initial_data['show'] + query = query % '''metaDescription + title + videos(first:1000,sort:["episode_number"]) { + edges { + node { + _id + slug + } + } + }''' + show_data = self._download_json( + 'https://www.adultswim.com/api/search', display_id, + data=json.dumps({'query': query}).encode(), + headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] + if episode_path: + video_data = show_data['getVideoBySlug'] + video_id = video_data['_id'] + episode_title = title = video_data['title'] + series = show_data.get('title') + if series: + title = '%s - %s' % (series, title) + info = { + 'id': video_id, + 'title': title, + 'description': strip_or_none(video_data.get('description')), + 'duration': float_or_none(video_data.get('duration')), + 'formats': [], + 'subtitles': {}, + 'age_limit': parse_age_limit(video_data.get('tvRating')), + 'thumbnail': video_data.get('poster'), + 'timestamp': parse_iso8601(video_data.get('launchDate')), + 'series': series, + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode': episode_title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + } - if not episode_path: - entries = [] - for video in show_data.get('videos', []): - slug = video.get('slug') - if not slug: + auth = video_data.get('auth') + media_id = video_data.get('mediaID') + if media_id: + info.update(self._extract_ngtv_info(media_id, { + # CDN_TOKEN_APP_ID from: + # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js + 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': auth, + })) + + if not auth: + extract_data = self._download_json( + 'https://www.adultswim.com/api/shows/v1/videos/' + video_id, + video_id, query={'fields': 'stream'}, fatal=False) or {} + assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] + for asset in assets: + asset_url = asset.get('url') + if not asset_url: continue - entries.append(self.url_result( - 'http://adultswim.com/videos/%s/%s' % (show_path, slug), - 'AdultSwim', video.get('id'))) - return self.playlist_result( - entries, show_data.get('id'), show_data.get('title'), - strip_or_none(show_data.get('metadata', {}).get('description'))) + ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) + if ext == 'm3u8': + info['formats'].extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + continue + # info['formats'].extend(self._extract_f4m_formats( + # asset_url, video_id, f4m_id='hds', fatal=False)) + elif ext in ('scc', 'ttml', 'vtt'): + info['subtitles'].setdefault('en', []).append({ + 'url': asset_url, + }) + self._sort_formats(info['formats']) - video_data = show_data['sluggedVideo'] - video_id = video_data['id'] - - info = self._extract_cvp_info( - 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id, - video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', - 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', - }, - }, { - 'url': url, - 'site_name': 'AdultSwim', - 'auth_required': video_data.get('auth'), - }) - - info.update({ - 'id': video_id, - 'display_id': display_id, - 'description': info.get('description') or strip_or_none(video_data.get('description')), - }) - if not is_stream: - info.update({ - 'duration': info.get('duration') or int_or_none(video_data.get('duration')), - 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')), - 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')), - 'episode': info['title'], - 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')), - }) - - info['series'] = video_data.get('collection_title') or info.get('series') - if info['series'] and info['series'] != info['title']: - info['title'] = '%s - %s' % (info['series'], info['title']) - - return info + return info + else: + entries = [] + for edge in show_data.get('videos', {}).get('edges', []): + video = edge.get('node') or {} + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('_id'))) + return self.playlist_result( + entries, show_path, show_data.get('title'), + strip_or_none(show_data.get('metaDescription'))) From 19041a38773b1022480d50587da7db4a0e6fa869 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 16:18:57 +0100 Subject: [PATCH 208/785] [youtube] extract srv[1-3] subtitle formats(#20566) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 886fc1591..132572c88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -484,7 +484,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False From a7978f8e2acc8c34989b7f289a16180e71902193 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 18:08:43 +0100 Subject: [PATCH 209/785] [hbo] fix extraction and extract subtitles(closes #14629)(closes #13709) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/hbo.py | 102 +++++++++++------------------ 2 files changed, 39 insertions(+), 68 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 254dbc946..c1c5d1953 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -441,10 +441,7 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import ( - HBOIE, - HBOEpisodeIE, -) +from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 859ad5429..44440233d 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -4,16 +4,28 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( xpath_text, xpath_element, int_or_none, parse_duration, + urljoin, ) -class HBOBaseIE(InfoExtractor): +class HBOIE(InfoExtractor): + IE_NAME = 'hbo' + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer', + 'md5': '8126210656f433c452a21367f9ad85b3', + 'info_dict': { + 'id': '22113301', + 'ext': 'mp4', + 'title': 'Game of Thrones - Trailer', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + } _FORMATS_INFO = { 'pro7': { 'width': 1280, @@ -53,10 +65,17 @@ class HBOBaseIE(InfoExtractor): }, } - def _extract_from_id(self, video_id): - video_data = self._download_xml( - 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) - title = xpath_text(video_data, 'title', 'title', True) + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + location_path = self._parse_json(self._html_search_regex( + r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl'] + video_data = self._download_xml(urljoin(url, location_path), display_id) + video_id = xpath_text(video_data, 'id', fatal=True) + episode_title = title = xpath_text(video_data, 'title', fatal=True) + series = xpath_text(video_data, 'program') + if series: + title = '%s - %s' % (series, title) formats = [] for source in xpath_element(video_data, 'videos', 'sources', True): @@ -128,68 +147,23 @@ class HBOBaseIE(InfoExtractor): 'width': width, }) + subtitles = None + caption_url = xpath_text(video_data, 'captionUrl') + if caption_url: + subtitles = { + 'en': [{ + 'url': caption_url, + 'ext': 'ttml' + }], + } + return { 'id': video_id, 'title': title, 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), + 'series': series, + 'episode': episode_title, 'formats': formats, 'thumbnails': thumbnails, + 'subtitles': subtitles, } - - -class HBOIE(HBOBaseIE): - IE_NAME = 'hbo' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 1072, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_from_id(video_id) - - -class HBOEpisodeIE(HBOBaseIE): - IE_NAME = 'hbo:episode' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P[0-9a-z-]+))(?:\.html)?' - - _TESTS = [{ - 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', - 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb', - 'info_dict': { - 'id': '1439518', - 'display_id': 'ep-52-inside-the-episode', - 'ext': 'mp4', - 'title': 'Ep. 52: Inside the Episode', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 240, - }, - }, { - 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver', - 'only_matching': True, - }] - - def _real_extract(self, url): - path, display_id = re.match(self._VALID_URL, url).groups() - - content = self._download_json( - 'http://www.hbo.com/api/content/' + path, display_id)['content'] - - video_id = compat_str((content.get('parsed', {}).get( - 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId']) - - info_dict = self._extract_from_id(video_id) - info_dict['display_id'] = display_id - - return info_dict From 4810655cd65f9bdde1ad240adf7334828243fb0a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 19:35:35 +0100 Subject: [PATCH 210/785] [bfi:player] Add new extractor(#19235) --- youtube_dl/extractor/bfi.py | 37 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/bfi.py diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py new file mode 100644 index 000000000..60c8944b5 --- /dev/null +++ b/youtube_dl/extractor/bfi.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class BFIPlayerIE(InfoExtractor): + IE_NAME = 'bfi:player' + _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P[\w-]+)-online' + _TEST = { + 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', + 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', + 'info_dict': { + 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', + 'ext': 'mp4', + 'title': 'Computer Doctor', + 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', + }, + 'skip': 'BFI Player films cannot be played outside of the UK', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + entries = [] + for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): + player_attr = extract_attributes(player_el) + ooyala_id = player_attr.get('data-video-id') + if not ooyala_id: + continue + entries.append(self.url_result( + 'ooyala:' + ooyala_id, 'Ooyala', + ooyala_id, player_attr.get('data-label'))) + return self.playlist_result(entries) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c1c5d1953..01119f2bb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -107,6 +107,7 @@ from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE from .beatport import BeatportIE from .bet import BetIE +from .bfi import BFIPlayerIE from .bigflix import BigflixIE from .bild import BildIE from .bilibili import ( From 9f182c23ba74f76ff716940b2a0568f428d6c2b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Apr 2019 09:22:25 +0100 Subject: [PATCH 211/785] [vrv] add basic support for individual movie links(#19229) --- youtube_dl/extractor/vrv.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 6c060ae76..c11da97de 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -150,9 +150,10 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + object_data = self._call_cms(self._get_cms_resource( + 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] + resource_path = object_data['__links__']['resource']['href'] + video_data = self._call_cms(resource_path, video_id, 'video') title = video_data['title'] streams_path = video_data['__links__'].get('streams', {}).get('href') From b9aad6c427ea083143d1e53c0f087ad508bd706a Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Sat, 9 Feb 2019 10:15:53 +0100 Subject: [PATCH 212/785] [dvtv] Fix extraction (closes #18514) --- youtube_dl/extractor/dvtv.py | 120 +++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index 20996962a..bc68d07d1 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -10,7 +10,9 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + try_get, unescapeHTML, + parse_iso8601, ) @@ -28,6 +30,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', 'duration': 1484, + 'upload_date': '20141217', + 'timestamp': 1418792400, } }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', @@ -84,6 +88,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta', 'duration': 1103, + 'upload_date': '20170511', + 'timestamp': 1494514200, }, 'params': { 'skip_download': True, @@ -91,43 +97,62 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'only_matching': True, + }, { + # Test live stream video (liveStarter) parsing + 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/', + 'md5': '2e552e483f2414851ca50467054f9d5d', + 'info_dict': { + 'id': '8d116360288011e98c840cc47ab5f122', + 'ext': 'mp4', + 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu', + 'upload_date': '20190204', + 'timestamp': 1549289591, + }, + 'params': { + # Video content is no longer available + 'skip_download': True, + }, }] - def _parse_video_metadata(self, js, video_id, live_js=None): + def _parse_video_metadata(self, js, video_id, timestamp): + data = self._parse_json(js, video_id, transform_source=js_to_json) - if live_js: - data.update(self._parse_json( - live_js, video_id, transform_source=js_to_json)) + + live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) + if live_starter: + data.update(live_starter) title = unescapeHTML(data['title']) formats = [] - for video in data['sources']: - video_url = video.get('file') - if not video_url: - continue - video_type = video.get('type') - ext = determine_ext(video_url, mimetype2ext(video_type)) - if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif video_type == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - else: - label = video.get('label') - height = self._search_regex( - r'^(\d+)[pP]', label or '', 'height', default=None) - format_id = ['http'] - for f in (ext, label): - if f: - format_id.append(f) - formats.append({ - 'url': video_url, - 'format_id': '-'.join(format_id), - 'height': int_or_none(height), - }) + + for tracks in data.get('tracks', {}).values(): + for video in tracks: + video_url = video.get('src') + if not video_url: + continue + video_type = video.get('type') + ext = determine_ext(video_url, mimetype2ext(video_type)) + if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif video_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + else: + label = video.get('label') + height = self._search_regex( + r'^(\d+)[pP]', label or '', 'height', default=None) + format_id = ['http'] + for f in (ext, label): + if f: + format_id.append(f) + formats.append({ + 'url': video_url, + 'format_id': '-'.join(format_id), + 'height': int_or_none(height), + }) self._sort_formats(formats) return { @@ -136,7 +161,7 @@ class DVTVIE(InfoExtractor): 'description': data.get('description'), 'thumbnail': data.get('image'), 'duration': int_or_none(data.get('duration')), - 'timestamp': int_or_none(data.get('pubtime')), + 'timestamp': int_or_none(timestamp), 'formats': formats } @@ -145,32 +170,33 @@ class DVTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - # live content - live_item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - # single video - item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - if item: - return self._parse_video_metadata(item, video_id, live_item) + timestamp = parse_iso8601(self._html_search_meta( + 'article:published_time', webpage, 'published time', default=None)) # playlist items = re.findall( - r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);", + r"(?s)playlist\.push\(({.+?})\);", webpage) - if not items: - items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage) if items: return { '_type': 'playlist', 'id': video_id, 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id) for i in items] + 'entries': [self._parse_video_metadata(i, video_id, timestamp) for i in items] } + # single video + item = self._search_regex( + r'(?s)BBXPlayer.setup\((.+?)\);', + webpage, 'video', default=None) + + if item: + # remove function calls (ex. htmldeentitize) + # TODO this should be fixed in a general way in the js_to_json + item = re.sub(r'\w+?\((.+)\)', r'\1', item) + + if item: + return self._parse_video_metadata(item, video_id, timestamp) + raise ExtractorError('Could not find neither video nor playlist') From 19591facea9abe965a734224bea33948bb57b5f4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Apr 2019 16:29:30 +0100 Subject: [PATCH 213/785] [dvtv] remove unnecessary comments and spaces --- youtube_dl/extractor/dvtv.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index bc68d07d1..de7f6d670 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -19,9 +19,7 @@ from ..utils import ( class DVTVIE(InfoExtractor): IE_NAME = 'dvtv' IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P[0-9a-f]{32})' - _TESTS = [{ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'md5': '67cb83e4a955d36e1b5d31993134a0c2', @@ -36,7 +34,7 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { - 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'id': '973eb3bc854e11e498be002590604f2e', }, 'playlist': [{ @@ -115,17 +113,14 @@ class DVTVIE(InfoExtractor): }] def _parse_video_metadata(self, js, video_id, timestamp): - data = self._parse_json(js, video_id, transform_source=js_to_json) + title = unescapeHTML(data['title']) live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) if live_starter: data.update(live_starter) - title = unescapeHTML(data['title']) - formats = [] - for tracks in data.get('tracks', {}).values(): for video in tracks: video_url = video.get('src') @@ -167,36 +162,23 @@ class DVTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - timestamp = parse_iso8601(self._html_search_meta( 'article:published_time', webpage, 'published time', default=None)) - # playlist - items = re.findall( - r"(?s)playlist\.push\(({.+?})\);", - webpage) - + items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) if items: - return { - '_type': 'playlist', - 'id': video_id, - 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id, timestamp) for i in items] - } + return self.playlist_result( + [self._parse_video_metadata(i, video_id, timestamp) for i in items], + video_id, self._html_search_meta('twitter:title', webpage)) - # single video item = self._search_regex( - r'(?s)BBXPlayer.setup\((.+?)\);', + r'(?s)BBXPlayer\.setup\((.+?)\);', webpage, 'video', default=None) - if item: # remove function calls (ex. htmldeentitize) # TODO this should be fixed in a general way in the js_to_json item = re.sub(r'\w+?\((.+)\)', r'\1', item) - - if item: return self._parse_video_metadata(item, video_id, timestamp) raise ExtractorError('Could not find neither video nor playlist') From c701472fc99c90f6ef4fdf0f0169a342f2e9e892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:15:25 +0700 Subject: [PATCH 214/785] [platzi] Add extractor (closes #20562) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/platzi.py | 217 +++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 youtube_dl/extractor/platzi.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 01119f2bb..41ab36213 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -868,6 +868,10 @@ from .picarto import ( from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py new file mode 100644 index 000000000..557b2b5ad --- /dev/null +++ b/youtube_dl/extractor/platzi.py @@ -0,0 +1,217 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_or_none, + try_get, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class PlatziIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P\d+)-[^/?\#&]+ + ''' + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in compat_str(urlh.geturl()): + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) From 059cd768b97188fbbf3262696e6511f3aa1795e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:17:54 +0700 Subject: [PATCH 215/785] [vk] Remove unused import --- youtube_dl/extractor/vk.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 39376e39f..1072550f1 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -6,10 +6,7 @@ import re import sys from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( clean_html, ExtractorError, From f412970164c349bea81bfd9d95e56fec7d16c8a1 Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Sun, 7 Apr 2019 02:28:31 +0700 Subject: [PATCH 216/785] [README.md] Fix lists formatting (closes #20558) Lists have to be separated from the previous paragraph by a blank line in certain variants of Markdown, otherwise they are not interpreted as lists. This change ensures that that the youtube-dl.1 man page, which is generated from README.md with the help of pandoc, is formatted correctly. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e476045b2..92c3a92a1 100644 --- a/README.md +++ b/README.md @@ -642,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can also use special names to select particular edge case formats: + - `best`: Select the best quality format represented by a single file with video and audio. - `worst`: Select the worst quality format represented by a single file with video and audio. - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. @@ -658,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + - `filesize`: The number of bytes, if known in advance - `width`: Width of the video, if known - `height`: Height of the video, if known @@ -668,6 +670,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `fps`: Frame rate Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: + - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use From f4da80803619aea52bb116f6191f78e1bd77d2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:58:40 +0700 Subject: [PATCH 217/785] [xvideos] Extract all thumbnails (closes #20432) --- youtube_dl/extractor/xvideos.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index ec2d913fc..166bcf443 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -57,10 +57,17 @@ class XVideosIE(InfoExtractor): webpage, 'title', default=None, group='title') or self._og_search_title(webpage) - thumbnail = self._search_regex( - (r'setThumbUrl\(\s*(["\'])(?P(?:(?!\1).)+)\1', - r'url_bigthumb=(?P.+?)&'), - webpage, 'thumbnail', fatal=False, group='thumbnail') + thumbnails = [] + for preference, thumbnail in enumerate(('', '169')): + thumbnail_url = self._search_regex( + r'setThumbUrl%s\(\s*(["\'])(?P(?:(?!\1).)+)\1' % thumbnail, + webpage, 'thumbnail', default=None, group='thumbnail') + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'preference': preference, + }) + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( @@ -98,6 +105,6 @@ class XVideosIE(InfoExtractor): 'formats': formats, 'title': title, 'duration': duration, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'age_limit': 18, } From 8410653f24ae73e4bc52c11bc0ec595e42567038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 03:18:10 +0700 Subject: [PATCH 218/785] [ruutu] Add support for audio podcasts (closes #20473, closes #20545) --- youtube_dl/extractor/ruutu.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index f530f0083..f05401b36 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -59,6 +59,20 @@ class RuutuIE(InfoExtractor): 'url': 'http://www.ruutu.fi/video/3193728', 'only_matching': True, }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': ['HTTP Error 502: Bad Gateway'], + } ] def _real_extract(self, url): @@ -94,6 +108,12 @@ class RuutuIE(InfoExtractor): continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': From aa5338118e77380c753b397c6f544fcfdef1cb22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 04:16:45 +0700 Subject: [PATCH 219/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3a0b6634f..41adb4204 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version + +Core ++ [downloader/external] Pass rtmp_conn to ffmpeg + +Extractors ++ [ruutu] Add support for audio podcasts (#20473, #20545) ++ [xvideos] Extract all thumbnails (#20432) ++ [platzi] Add support for platzi.com (#20562) +* [dvtv] Fix extraction (#18514, #19174) ++ [vrv] Add basic support for individual movie links (#19229) ++ [bfi:player] Add support for player.bfi.org.uk (#19235) +* [hbo] Fix extraction and extract subtitles (#14629, #13709) +* [youtube] Extract srv[1-3] subtitle formats (#20566) +* [adultswim] Fix extraction (#18025) +* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339) +* [adn] Fix subtitle compatibility with ffmpeg +* [adn] Fix extraction and add support for positioning styles (#20549) +* [vk] Use unique video id (#17848) +* [newstube] Fix extraction +* [rtl2] Actualize extraction ++ [adobeconnect] Add support for adobeconnect.com (#20283) ++ [gaia] Add support for authentication (#14605) ++ [mediasite] Add support for dashed ids and named catalogs (#20531) + + version 2019.04.01 Core From a46d9e5b41bcfdf23038fa9b36aec5d4ba9c6de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 04:19:46 +0700 Subject: [PATCH 220/785] release 2019.04.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 6 +++++- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index cc68279d0..5469c73cf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.01*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.07*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.04.01 +[debug] youtube-dl version 2019.04.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 41adb4204..421f247fd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.04.07 Core + [downloader/external] Pass rtmp_conn to ffmpeg diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 12df319eb..df272c479 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **acast:channel** - **AddAnime** - **ADN**: Anime Digital Network + - **AdobeConnect** - **AdobeTV** - **AdobeTVChannel** - **AdobeTVShow** @@ -101,6 +102,7 @@ - **Bellator** - **BellMedia** - **Bet** + - **bfi:player** - **Bigflix** - **Bild**: Bild.de - **BiliBili** @@ -345,7 +347,6 @@ - **Groupon** - **Hark** - **hbo** - - **hbo:episode** - **HearThisAt** - **Heise** - **HellPorno** @@ -489,6 +490,7 @@ - **Mediaset** - **Mediasite** - **MediasiteCatalog** + - **MediasiteNamedCatalog** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 @@ -671,6 +673,8 @@ - **Piksel** - **Pinkbike** - **Pladform** + - **Platzi** + - **PlatziCourse** - **play.fm** - **PlayPlusTV** - **PlaysTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f872cd137..5c7d550f5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.04.01' +__version__ = '2019.04.07' From bf6fb8b9dc921a30df24d9789d5bbb0ac5b370b0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:38:40 -0400 Subject: [PATCH 221/785] [openload] add tests --- youtube_dl/extractor/openload.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 25b3bfdbd..130165b8c 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -350,6 +350,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.space/f/IY4eZSst3u8/', 'only_matching': True, + }, { + 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', + 'only_matching': True, + }, { + 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 9ed06812ec8da6f1364acd00261935c334994e62 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:59:41 -0400 Subject: [PATCH 222/785] [streamango] add support for streamcherry.com --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index efb259f96..f1e17dd88 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -41,6 +41,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', 'only_matching': True, + }, { + 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', + 'only_matching': True, }] def _real_extract(self, url): From d562cac9dc67bfa2306c6225c261390162527d9e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Apr 2019 12:39:48 +0100 Subject: [PATCH 223/785] [stv:player] Add new extractor(closes #20586) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/stv.py | 94 ++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 youtube_dl/extractor/stv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 41ab36213..cc19af5c4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1093,6 +1093,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stv import STVPlayerIE from .sunporno import SunPornoIE from .svt import ( SVTIE, diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py new file mode 100644 index 000000000..ccb074cd4 --- /dev/null +++ b/youtube_dl/extractor/stv.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse +) +from ..utils import ( + extract_attributes, + float_or_none, + int_or_none, + str_or_none, +) + + +class STVPlayerIE(InfoExtractor): + IE_NAME = 'stv:player' + _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' + _TEST = { + 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', + 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'info_dict': { + 'id': '6016487034001', + 'ext': 'mp4', + 'upload_date': '20190321', + 'title': 'Interview with the cast ahead of new Victoria', + 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', + 'timestamp': 1553179628, + 'uploader_id': '1486976045', + }, + 'skip': 'this resource is unavailable outside of the UK', + } + _PUBLISHER_ID = '1486976045' + _PTYPE_MAP = { + 'episode': 'episodes', + 'video': 'shortform', + } + + def _real_extract(self, url): + ptype, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( + r'itemprop="embedURL"[^>]+href="([^"]+)', + webpage, 'embed URL', default=None)).query) + publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + + player_attr = extract_attributes(self._search_regex( + r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + + info = {} + duration = ref_id = series = video_id = None + api_ref_id = player_attr.get('data-player-api-refid') + if api_ref_id: + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), + api_ref_id, fatal=False) + if resp: + result = resp.get('results') or {} + video = result.get('video') or {} + video_id = str_or_none(video.get('id')) + ref_id = video.get('guid') + duration = video.get('length') + programme = result.get('programme') or {} + series = programme.get('name') or programme.get('shortName') + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) + info.update({ + 'description': result.get('summary'), + 'subtitles': subtitles, + 'view_count': int_or_none(result.get('views')), + }) + if not video_id: + video_id = qs.get('videoId', [None])[0] or self._search_regex( + r' Date: Sun, 7 Apr 2019 21:05:50 +0700 Subject: [PATCH 224/785] [tiktok] Add support for new URL schema (closes #20573) --- youtube_dl/extractor/tiktok.py | 35 +++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index 083e9f36d..66088b9ab 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/v| + (?:www\.)?tiktok\.com/share/video + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'md5': 'd584b572e92fcd48888051f238022420', 'info_dict': { @@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, 'repost_count': int, } - } + }, { + 'url': 'https://www.tiktok.com/share/video/6606727368545406213', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://m.tiktok.com/v/%s.html' % video_id, video_id) data = self._parse_json(self._search_regex( r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) return self._extract_aweme(data) class TikTokUserIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/h5/share/usr| + (?:www\.)?tiktok\.com/share/user + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', 'info_dict': { 'id': '188294915489964032', }, 'playlist_mincount': 24, - } + }, { + 'url': 'https://www.tiktok.com/share/user/188294915489964032', + 'only_matching': True, + }] def _real_extract(self, url): user_id = self._match_id(url) From 9045d28b5e3eba03908a82dcb868bd1649650ddb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Apr 2019 21:31:01 +0100 Subject: [PATCH 225/785] [aol] restrict url regex and improve format extraction --- youtube_dl/extractor/aol.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index cb9279193..dffa9733d 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -12,12 +16,12 @@ from ..utils import ( class AolIE(InfoExtractor): - IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P[^/?#&]+)' + IE_NAME = 'aol.com' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.com/video/(?:[^/]+/)*)(?P[0-9a-f]+)' _TESTS = [{ # video with 5min ID - 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', + 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { 'id': '518167793', @@ -34,7 +38,7 @@ class AolIE(InfoExtractor): } }, { # video with vidible ID - 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', + 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', 'ext': 'mp4', @@ -49,17 +53,17 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { - 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, }, { - 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', - 'only_matching': True, - }, { - 'url': 'http://on.aol.com/video/519442220', + 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', 'only_matching': True, }, { 'url': 'aol-video:5707d6b8e4b090497b04f706', 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', + 'only_matching': True, }] def _real_extract(self, url): @@ -73,7 +77,7 @@ class AolIE(InfoExtractor): video_data = response['data'] formats = [] - m3u8_url = video_data.get('videoMasterPlaylist') + m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) if m3u8_url: formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) @@ -96,6 +100,12 @@ class AolIE(InfoExtractor): 'width': int(mobj.group(1)), 'height': int(mobj.group(2)), }) + else: + qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + f.update({ + 'width': int_or_none(qs.get('w', [None])[0]), + 'height': int_or_none(qs.get('h', [None])[0]), + }) formats.append(f) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) From 9c017253e83ac3dfd363566ccbb9fc63f4e2ac07 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 8 Apr 2019 16:34:03 +0100 Subject: [PATCH 226/785] [jwplatfom] do not match manifest URLs(#20596) --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index d19a6a774..647b905f1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', From 5ca3459828cc0d752f02dab3e9c02cca85185cbb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:20:26 +0100 Subject: [PATCH 227/785] [kaltura] sanitize embed URLs --- youtube_dl/extractor/kaltura.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index fdf7f5bbc..79162f665 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -145,6 +145,8 @@ class KalturaIE(InfoExtractor): ) if mobj: embed_info = mobj.groupdict() + for k, v in embed_info.items(): + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( From 4bc12b8f819cd0a393e5800d4dc2ecf24401e199 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:21:46 +0100 Subject: [PATCH 228/785] [dispeak] improve mp4 bitrate extraction --- youtube_dl/extractor/dispeak.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index c05f601e2..c345e0274 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor): stream_name = xpath_text(a_format, 'streamName', fatal=True) video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') url = video_root + video_path - vbr = xpath_text(a_format, 'bitrate') + bitrate = xpath_text(a_format, 'bitrate') + tbr = int_or_none(bitrate) + vbr = int_or_none(self._search_regex( + r'-(\d+)\.mp4', video_path, 'vbr', default=None)) + abr = tbr - vbr if tbr and vbr else None video_formats.append({ + 'format_id': bitrate, 'url': url, - 'vbr': int_or_none(vbr), + 'tbr': tbr, + 'vbr': vbr, + 'abr': abr, }) return video_formats From 118f7add3b9690884edb4dc887995f3815243c78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:23:47 +0100 Subject: [PATCH 229/785] [gdc] add support for kaltura embeds and update tests(closes #20575) --- youtube_dl/extractor/gdcvault.py | 96 ++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 8806dc48a..2f555c1d4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,22 +3,24 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .kaltura import KalturaIE from ..utils import ( HEADRequest, sanitized_Request, + smuggle_url, urlencode_postdata, ) class GDCVaultIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)/(?P(\w|-)+)?' + _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)(?:/(?P[\w-]+))?' _NETRC_MACHINE = 'gdcvault' _TESTS = [ { 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 'md5': '7ce8388f544c88b7ac11c7ab1b593704', 'info_dict': { - 'id': '1019721', + 'id': '201311826596_AWNY', 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'ext': 'mp4', 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' @@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor): { 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 'info_dict': { - 'id': '1015683', + 'id': '201203272_1330951438328RSXR', 'display_id': 'Embracing-the-Dark-Art-of', 'ext': 'flv', 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' @@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor): 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', 'info_dict': { - 'id': '1023460', + 'id': '840376_BQRC', 'ext': 'mp4', 'display_id': 'Tenacious-Design-and-The-Interface', 'title': 'Tenacious Design and The Interface of \'Destiny\'', @@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor): # Multiple audios 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', 'info_dict': { - 'id': '1014631', - 'ext': 'flv', + 'id': '12396_1299111843500GMPX', + 'ext': 'mp4', 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', }, - 'params': { - 'skip_download': True, # Requires rtmpdump - 'format': 'jp', # The japanese audio - } + # 'params': { + # 'skip_download': True, # Requires rtmpdump + # 'format': 'jp', # The japanese audio + # } }, { # gdc-player.html 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', 'info_dict': { - 'id': '1435', + 'id': '9350_1238021887562UHXB', 'display_id': 'An-American-engine-in-Tokyo', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', }, + }, + { + # Kaltura Embed + 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling', + 'info_dict': { + 'id': '0_h1fg8j3p', + 'ext': 'mp4', + 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)', + 'timestamp': 1554401811, + 'upload_date': '20190404', + 'uploader_id': 'joe@blazestreaming.com', + }, 'params': { - 'skip_download': True, # Requires rtmpdump + 'format': 'mp4-408', }, }, ] @@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor): return start_page def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - display_id = mobj.group('name') or video_id + video_id, name = re.match(self._VALID_URL, url).groups() + display_id = name or video_id webpage_url = 'http://www.gdcvault.com/play/' + video_id start_page = self._download_webpage(webpage_url, display_id) @@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor): start_page, 'url', default=None) if direct_url: title = self._html_search_regex( - r'Session Name\s*(.*?)', + r'Session Name:?\s*(.*?)', start_page, 'title') video_url = 'http://www.gdcvault.com' + direct_url # resolve the url so that we can detect the correct extension - head = self._request_webpage(HEADRequest(video_url), video_id) - video_url = head.geturl() + video_url = self._request_webpage( + HEADRequest(video_url), video_id).geturl() return { 'id': video_id, @@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor): 'title': title, } - PLAYER_REGEX = r'' - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root', default=None) - if xml_root is None: - # Probably need to authenticate - login_res = self._login(webpage_url, display_id) - if login_res is None: - self.report_warning('Could not login.') - else: - start_page = login_res - # Grab the url from the authenticated page - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root') + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') - xml_name = self._html_search_regex( - r'', + r'', + r'', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) From 427cc215310804127b55744fcc3664ede38a4a0d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 10 Jun 2019 15:17:26 +0100 Subject: [PATCH 369/785] [biqle] remove unnecessary regex group --- youtube_dl/extractor/biqle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index c5c374845..af21e3ee5 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -42,7 +42,7 @@ class BIQLEIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) embed_url = self._proto_relative_url(self._search_regex( - r'', + r'', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) From 8361e7f93489f226542517216b2127ff170ca996 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 12 Jun 2019 21:41:46 +0100 Subject: [PATCH 370/785] [toutv] update client key(closes #21370) --- youtube_dl/extractor/toutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 00f58a087..44b022fca 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -38,7 +38,7 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] - _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' + _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4' def _real_initialize(self): email, password = self._get_login_info() From 28cc2241e44ff0c0704cfffaca6d47d377041aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:56:17 +0700 Subject: [PATCH 371/785] [utils] Restrict parse_codecs and add theora as known vcodec (#21381) --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 11 +++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 71980b3fc..659c6ece5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -822,6 +822,15 @@ class TestUtil(unittest.TestCase): 'vcodec': 'av01.0.05M.08', 'acodec': 'none', }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ead9bd862..a1f586b80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2529,7 +2529,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): @@ -2540,13 +2540,8 @@ def parse_codecs(codecs_str): if not vcodec and not acodec: if len(splited_codecs) == 2: return { - 'vcodec': vcodec, - 'acodec': acodec, - } - elif len(splited_codecs) == 1: - return { - 'vcodec': 'none', - 'acodec': vcodec, + 'vcodec': splited_codecs[0], + 'acodec': splited_codecs[1], } else: return { From b85eae0f057a0afdf1da9d6034c19327c8de33cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:59:05 +0700 Subject: [PATCH 372/785] [youtube] Hardcode codec metadata for av01 video only formats (closes #21381) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 963c73a2d..7b630b191 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -500,6 +500,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') From 7c24a58bdb60af80137beac85c8804c70194a455 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 16 Jun 2019 06:32:17 +0100 Subject: [PATCH 373/785] [sixplay] add support for rtlmost.hu(#21405) --- youtube_dl/extractor/sixplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 2a72af11b..7ec66ecf3 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -19,7 +19,7 @@ from ..utils import ( class SixPlayIE(InfoExtractor): IE_NAME = '6play' - _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P[0-9]+)' + _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P[0-9]+)' _TESTS = [{ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', @@ -35,6 +35,9 @@ class SixPlayIE(InfoExtractor): }, { 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', 'only_matching': True, + }, { + 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', + 'only_matching': True, }] def _real_extract(self, url): @@ -43,6 +46,7 @@ class SixPlayIE(InfoExtractor): '6play.fr': ('6play', 'm6web'), 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), + 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), }.get(domain, ('6play', 'm6web')) data = self._download_json( From c40714cdee0ce3de1a5f6e17a61d3ee4c610ae63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 20 Jun 2019 00:57:58 +0700 Subject: [PATCH 374/785] [youtube] Make --write-annotations non fatal (closes #21452) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7b630b191..1010c8616 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1581,8 +1581,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return video_id def _extract_annotations(self, video_id): - url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + return self._download_webpage( + 'https://www.youtube.com/annotations_invideo', video_id, + note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + query={ + 'features': 1, + 'legacy': 1, + 'video_id': video_id, + }) @staticmethod def _extract_chapters(description, duration): From abefc03f517e9208b9d0c35e7e683941a40bb152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 22:58:42 +0700 Subject: [PATCH 375/785] [youtube] Update signature function patterns (closes #21469, closes #21476) --- youtube_dl/extractor/youtube.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1010c8616..83b6ac134 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1312,11 +1312,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + # Obsolete patterns + r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From bc6438c092be6ca63843a349eee1db2b5d398d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:01:09 +0700 Subject: [PATCH 376/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c4d485ff1..10394a3b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) + +Extractors +* [youtube] Update signature function patterns (#21469, #21476) +* [youtube] Make --write-annotations non fatal (#21452) ++ [sixplay] Add support for rtlmost.hu (#21405) +* [youtube] Hardcode codec metadata for av01 video only formats (#21381) +* [toutv] Update client key (#21370) ++ [biqle] Add support for new embed domain +* [cbs] Improve DRM protected videos detection (#21339) + + version 2019.06.08 Core From 9842d29d660b1ffe7873823542085879ba9d86a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:04:09 +0700 Subject: [PATCH 377/785] release 2019.06.21 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 104ad598c..7a2b16827 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index f711af040..d6180e672 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index ae865a8b0..7cb981abf 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8246b570e..802fa2313 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 292c2e697..5153864a1 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 10394a3b6..2d9988da3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.21 Core * [utils] Restrict parse_codecs and add theora as known vcodec (#21381) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6aa666bc9..33474a452 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.08' +__version__ = '2019.06.21' From 4681441d2faf54615962029c7240601e339281bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:07:26 +0700 Subject: [PATCH 378/785] [crunchyroll:playlist] Fix and relax title extraction (closes #21291, closes #21443) --- youtube_dl/extractor/crunchyroll.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 588c3c71b..75b56ee42 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -661,9 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): webpage = self._download_webpage( self._add_skip_wall(url), show_id, headers=self.geo_verification_headers()) - title = self._html_search_regex( - r'(?s)]*>\s*(.*?)', - webpage, 'title') + title = self._html_search_meta('name', webpage, default=None) + episode_paths = re.findall( r'(?s)
  • ]+>.*? Date: Sat, 22 Jun 2019 00:15:52 +0700 Subject: [PATCH 379/785] [crunchyroll] Move Accept-Language workaround to video extractor since it causes playlists not to list any videos --- youtube_dl/extractor/crunchyroll.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 75b56ee42..85a9a577f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -103,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor): def _real_initialize(self): self._login() - def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) - else sanitized_Request(url_or_request)) - # Accept-Language must be set explicitly to accept any language to avoid issues - # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. - # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction - # should be imposed or not (from what I can see it just takes the first language - # ignoring the priority and requires it to correspond the IP). By the way this causes - # Crunchyroll to not work in georestriction cases in some browsers that don't place - # the locale lang first in header. However allowing any language seems to workaround the issue. - request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) - @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) @@ -269,6 +256,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): '1080': ('80', '108'), } + def _download_webpage(self, url_or_request, *args, **kwargs): + request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + else sanitized_Request(url_or_request)) + # Accept-Language must be set explicitly to accept any language to avoid issues + # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. + # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction + # should be imposed or not (from what I can see it just takes the first language + # ignoring the priority and requires it to correspond the IP). By the way this causes + # Crunchyroll to not work in georestriction cases in some browsers that don't place + # the locale lang first in header. However allowing any language seems to workaround the issue. + request.add_header('Accept-Language', '*') + return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) + def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(compat_b64decode(data)) iv = bytes_to_intlist(compat_b64decode(iv)) From 9c2aaac2685b34143ed770d5e0c7f3906ab1107d Mon Sep 17 00:00:00 2001 From: Emmanuel Froissart Date: Wed, 12 Jun 2019 13:55:07 +0200 Subject: [PATCH 380/785] [tf1] Fix wat id extraction (closes #21365) --- youtube_dl/extractor/tf1.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 903f47380..091350848 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import js_to_json + class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" @@ -43,12 +45,40 @@ class TF1IE(InfoExtractor): }, { 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html', 'only_matching': True, + }, { + 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html', + 'info_dict': { + 'id': '13641379', + 'ext': 'mp4', + 'title': 'md5:f392bc52245dc5ad43771650c96fb620', + 'description': 'md5:44bc54f0a21322f5b91d68e76a544eae', + 'upload_date': '20190611', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, + }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - webpage, 'wat id', group='id') + vids_data_string = self._html_search_regex( + r'', + webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None + if vids_data_string is not None: + vids_data = self._parse_json( + vids_data_string, video_id, + transform_source=js_to_json) + video_data = [v for v in vids_data.values() + if 'slug' in v and v['slug'] == video_id] + if len(video_data) > 0 and 'streamId' in video_data[0]: + wat_id = video_data[0]['streamId'] + if wat_id is None: + wat_id = self._html_search_regex( + [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' + ], + webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 1c11204056566c2983f0a837897d882581880f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:37:49 +0700 Subject: [PATCH 381/785] [tf1] Improve extraction and fix issues (closes #21372) --- youtube_dl/extractor/tf1.py | 42 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 091350848..55e2a0721 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -2,8 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor - -from ..utils import js_to_json +from ..compat import compat_str class TF1IE(InfoExtractor): @@ -62,23 +61,32 @@ class TF1IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - vids_data_string = self._html_search_regex( - r'', - webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None - if vids_data_string is not None: - vids_data = self._parse_json( - vids_data_string, video_id, - transform_source=js_to_json) - video_data = [v for v in vids_data.values() - if 'slug' in v and v['slug'] == video_id] - if len(video_data) > 0 and 'streamId' in video_data[0]: - wat_id = video_data[0]['streamId'] - if wat_id is None: + + data = self._parse_json( + self._search_regex( + r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|)', webpage, + 'data', default='{}'), video_id, fatal=False) + + if data: + try: + wat_id = next( + video.get('streamId') + for key, video in data.items() + if isinstance(video, dict) + and video.get('slug') == video_id) + if not isinstance(wat_id, compat_str) or not wat_id.isdigit(): + wat_id = None + except StopIteration: + pass + + if not wat_id: wat_id = self._html_search_regex( - [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' - ], + (r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2'), webpage, 'wat id', group='id') + return self.url_result('wat:%s' % wat_id, 'Wat') From 31ce6e996666e7512990da01ef58785933dcb2be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 02:22:07 +0700 Subject: [PATCH 382/785] [youtube] Add another signature function pattern --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 83b6ac134..b570d5bae 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1314,6 +1314,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', From 21b08463a777a79876721e49d3d07a19bc3fe05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 05:34:46 +0700 Subject: [PATCH 383/785] [pornhub] Rework extractors (closes #11922, closes #16078, closes #17454, closes #17936) --- youtube_dl/extractor/pornhub.py | 157 +++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index cb59d526f..72c351d56 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -387,17 +387,81 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): }] -class PornHubUserVideosIE(PornHubPlaylistBaseIE): +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'playlist_mincount': 0, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'playlist_mincount': 0, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserIE, cls).suitable(url)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('id') + return self.url_result( + '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + user_id = mobj.group('id') + + page_url = self._make_page_url(url) + + entries = [] + for page_num in itertools.count(1): + try: + webpage = self._download_webpage( + page_url, user_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + entries.extend(page_entries) + if not self._has_more(webpage): + break + + return self.playlist_result(orderedSet(entries), user_id) + + +class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' _TESTS = [{ - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'info_dict': { - 'id': 'zoe_ph', - }, - 'playlist_mincount': 171, + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 149, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -426,26 +490,69 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, }] - def _real_extract(self, url): + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserVideosIE, cls).suitable(url)) + + def _make_page_url(self, url): + return url + + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }] + + def _make_page_url(self, url): mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - user_id = mobj.group('id') + return '%s/ajax' % mobj.group('url') - entries = [] - for page_num in itertools.count(1): - try: - webpage = self._download_webpage( - url, user_id, 'Downloading page %d' % page_num, - query={'page': page_num}) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - break - raise - page_entries = self._extract_entries(webpage, host) - if not page_entries: - break - entries.extend(page_entries) - - return self.playlist_result(entries, user_id) + @staticmethod + def _has_more(webpage): + return True From 1f7a563ab0efd0745ea66c354255844a9bd36c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 06:01:43 +0700 Subject: [PATCH 384/785] [pornhub] Add support for downloading single pages and search pages (closes #15570) --- youtube_dl/extractor/pornhub.py | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 72c351d56..7de585604 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -409,14 +409,14 @@ class PornHubUserIE(PornHubPlaylistBaseIE): @classmethod def suitable(cls, url): return (False - if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubUserIE, cls).suitable(url)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), video_id=user_id) @@ -426,10 +426,13 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): host = mobj.group('host') user_id = mobj.group('id') + page = int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + page_url = self._make_page_url(url) entries = [] - for page_num in itertools.count(1): + for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( page_url, user_id, 'Downloading page %d' % page_num, @@ -448,10 +451,17 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): return self.playlist_result(orderedSet(entries), user_id) -class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' +class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ + (?: + (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| + video/search + ) + ''' _TESTS = [{ - 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', @@ -462,6 +472,12 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): 'id': 'jenny-blighe', }, 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -484,12 +500,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', - 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', @@ -506,9 +516,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): # Newest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', - 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', 'only_matching': True, @@ -521,7 +528,7 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): def suitable(cls, url): return (False if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserVideosIE, cls).suitable(url)) + else super(PornHubPagedVideosIE, cls).suitable(url)) def _make_page_url(self, url): return url From 9634de178d35c5cd767b183c2be82b14bef84209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 08:37:07 +0700 Subject: [PATCH 385/785] [pornhub] Add support for more paged video sources --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/pornhub.py | 101 +++++++++++++++++------------ 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1ed8a4b2..9cd7d3ac4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,8 +892,9 @@ from .porncom import PornComIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, - PornHubPlaylistIE, - PornHubUserVideosIE, + PornHubUserIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7de585604..11b8cfcf7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -372,23 +372,8 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): entries, playlist_id, title, playlist.get('description')) -class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/playlist/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/4667351', - 'info_dict': { - 'id': '4667351', - 'title': 'Nataly Hot', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://de.pornhub.com/playlist/4667351', - 'only_matching': True, - }] - - class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -400,23 +385,20 @@ class PornHubUserIE(PornHubPlaylistBaseIE): 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/users/russianveet69', - 'playlist_mincount': 0, + 'only_matching': True, }, { 'url': 'https://www.pornhub.com/channels/povd', - 'playlist_mincount': 0, + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return (False - if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserIE, cls).suitable(url)) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) @@ -424,7 +406,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') - user_id = mobj.group('id') + item_id = mobj.group('id') page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) @@ -435,7 +417,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, user_id, 'Downloading page %d' % page_num, + page_url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -448,18 +430,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), user_id) + return self.playlist_result(orderedSet(entries), item_id) -class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ - (?: - (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| - video/search - ) - ''' +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?P(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -469,20 +444,20 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 149, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', 'info_dict': { - 'id': 'povd', + 'id': 'channels/povd/videos', }, 'playlist_mincount': 293, }, { @@ -522,13 +497,55 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': 'playlist/44121572', + }, + 'playlist_mincount': 132, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, }] @classmethod def suitable(cls, url): return (False - if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubPagedVideosIE, cls).suitable(url)) + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) def _make_page_url(self, url): return url From 091c9b43164f6f3b31f5f911c88a4aeaa0358429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jun 2019 02:13:46 +0700 Subject: [PATCH 386/785] [vimeo:likes] Implement extrator in terms of channel extractor This allows to obtain videos' ids before extraction (#21493) --- youtube_dl/extractor/vimeo.py | 50 +++++------------------------------ 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a41178bab..aeee7df8f 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,7 +16,6 @@ from ..utils import ( determine_ext, ExtractorError, js_to_json, - InAdvancePagedList, int_or_none, merge_dicts, NO_DEFAULT, @@ -1065,7 +1064,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') -class VimeoLikesIE(InfoExtractor): +class VimeoLikesIE(VimeoChannelIE): _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' @@ -1073,55 +1072,20 @@ class VimeoLikesIE(InfoExtractor): 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, 'info_dict': { - 'id': 'user755559_likes', - 'description': 'See all the videos urza likes', - 'title': 'Videos urza likes', + 'id': 'user755559', + 'title': 'urza’s Likes', }, }, { 'url': 'https://vimeo.com/stormlapse/likes', 'only_matching': True, }] + def _page_url(self, base_url, pagenum): + return '%s/page:%d/' % (base_url, pagenum) + def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id) - page_count = self._int( - self._search_regex( - r'''(?x)
  • - .*?
  • \s* - ''', webpage, 'page count', default=1), - 'page count', fatal=True) - PAGE_SIZE = 12 - title = self._html_search_regex( - r'(?s)

    (.+?)

    ', webpage, 'title', fatal=False) - description = self._html_search_meta('description', webpage) - - def _get_page(idx): - page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % ( - user_id, idx + 1) - webpage = self._download_webpage( - page_url, user_id, - note='Downloading page %d/%d' % (idx + 1, page_count)) - video_list = self._search_regex( - r'(?s)
      ]*>(.*?)
    ', - webpage, 'video content') - paths = re.findall( - r']*>\s* Date: Sun, 23 Jun 2019 02:16:09 +0700 Subject: [PATCH 387/785] [vimeo:channel,group] Make title extraction no fatal --- youtube_dl/extractor/vimeo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index aeee7df8f..b5b44a79a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -813,7 +813,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex( + self._TITLE_RE, webpage, 'list title', fatal=False) def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -954,7 +955,7 @@ class VimeoGroupsIE(VimeoAlbumIE): }] def _extract_list_title(self, webpage): - return self._og_search_title(webpage) + return self._og_search_title(webpage, fatal=False) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 695720ebe81166b3ee249eb3916e3c7819ef57a8 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Sat, 22 Jun 2019 22:31:43 +0100 Subject: [PATCH 388/785] [openload] Add support for oload.life (#21495) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32d546e4e..b2918dc85 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -244,7 +244,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -363,6 +363,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.website/embed/drTBl1aOTvk/', 'only_matching': True, + }, { + 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 3031b7c4ed3a446dc83123ce34780f4db56ad4ef Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Sun, 23 Jun 2019 19:04:05 +0900 Subject: [PATCH 389/785] [brightcove:new] Add support for playlists (#21331) --- youtube_dl/extractor/brightcove.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c0345e2c3..58ec5c979 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -483,7 +483,7 @@ class BrightcoveLegacyIE(InfoExtractor): class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' - _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*videoId=(?P\d+|ref:[^&]+)' + _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*(?Pvideo|playlist)Id=(?P\d+|ref:[^&]+)' _TESTS = [{ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'md5': 'c8100925723840d4b0d243f7025703be', @@ -516,6 +516,21 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } + }, { + # playlist stream + 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', + 'info_dict': { + 'id': '5718313430001', + 'title': 'No Audio Playlist', + }, + 'playlist_count': 7, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', + 'only_matching': True, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -715,7 +730,7 @@ class BrightcoveNewIE(AdobePassIE): 'ip_blocks': smuggled_data.get('geo_ip_blocks'), }) - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' @@ -736,7 +751,7 @@ class BrightcoveNewIE(AdobePassIE): r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = { 'Accept': 'application/json;pk=%s' % policy_key, } @@ -771,5 +786,12 @@ class BrightcoveNewIE(AdobePassIE): 'tveToken': tve_token, }) + if content_type == 'playlist': + return self.playlist_result( + [self._parse_brightcove_metadata(vid, vid.get('id'), headers) + for vid in json_data.get('videos', []) if vid.get('id')], + json_data.get('id'), json_data.get('name'), + json_data.get('description')) + return self._parse_brightcove_metadata( json_data, video_id, headers=headers) From 27cef8885de4ffaa33f96973df3c50b62504bd49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Jun 2019 23:01:43 +0700 Subject: [PATCH 390/785] [beeg] Add support for api/v6 v2 URLs (closes #21511) --- youtube_dl/extractor/beeg.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 192f11ea6..c15a0ac8f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, unified_timestamp, @@ -11,6 +14,7 @@ from ..utils import ( class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P\d+)' _TESTS = [{ + # api/v6 v1 'url': 'http://beeg.com/5416503', 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { @@ -24,6 +28,10 @@ class BeegIE(InfoExtractor): 'tags': list, 'age_limit': 18, } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -41,11 +49,22 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} + for api_path in ('', 'api.'): video = self._download_json( 'https://%sbeeg.com/api/v6/%s/video/%s' % (api_path, beeg_version, video_id), video_id, - fatal=api_path == 'api.') + fatal=api_path == 'api.', query=query) if video: break From 1d83e9bd4b2dbc854f6f8b7f4baa14602a288c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Jun 2019 00:12:31 +0700 Subject: [PATCH 391/785] [nfb] Remove extractor (closes #21518) Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/nfb.py | 112 ----------------------------- 2 files changed, 113 deletions(-) delete mode 100644 youtube_dl/extractor/nfb.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9cd7d3ac4..530474f3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -745,7 +745,6 @@ from .nexx import ( NexxIE, NexxEmbedIE, ) -from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE from .nhl import NHLIE diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py deleted file mode 100644 index adcc636bc..000000000 --- a/youtube_dl/extractor/nfb.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - determine_ext, - int_or_none, - qualities, - urlencode_postdata, - xpath_text, -) - - -class NFBIE(InfoExtractor): - IE_NAME = 'nfb' - IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' - - _TEST = { - 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', - 'info_dict': { - 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'flv', - 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:6b8e32dde3abf91e58857b174916620c', - 'duration': 3128, - 'creator': 'Mark Sandiford', - 'uploader': 'Mark Sandiford', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_xml( - 'https://www.nfb.ca/film/%s/player_config' % video_id, - video_id, 'Downloading player config XML', - data=urlencode_postdata({'getConfig': 'true'}), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' - }) - - title, description, thumbnail, duration, uploader, author = [None] * 6 - thumbnails, formats = [[]] * 2 - subtitles = {} - - for media in config.findall('./player/stream/media'): - if media.get('type') == 'posterImage': - quality_key = qualities(('low', 'high')) - thumbnails = [] - for asset in media.findall('assets/asset'): - asset_url = xpath_text(asset, 'default/url', default=None) - if not asset_url: - continue - quality = asset.get('quality') - thumbnails.append({ - 'url': asset_url, - 'id': quality, - 'preference': quality_key(quality), - }) - elif media.get('type') == 'video': - title = xpath_text(media, 'title', fatal=True) - for asset in media.findall('assets/asset'): - quality = asset.get('quality') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', quality or '', 'height', default=None)) - for node in asset: - streamer = xpath_text(node, 'streamerURI', default=None) - if not streamer: - continue - play_path = xpath_text(node, 'url', default=None) - if not play_path: - continue - formats.append({ - 'url': streamer, - 'app': streamer.split('/', 3)[3], - 'play_path': play_path, - 'rtmp_live': False, - 'ext': 'flv', - 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, - 'height': height, - }) - self._sort_formats(formats) - description = clean_html(xpath_text(media, 'description')) - uploader = xpath_text(media, 'author') - duration = int_or_none(media.get('duration')) - for subtitle in media.findall('./subtitles/subtitle'): - subtitle_url = xpath_text(subtitle, 'url', default=None) - if not subtitle_url: - continue - lang = xpath_text(subtitle, 'lang', default='en') - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'duration': duration, - 'creator': uploader, - 'uploader': uploader, - 'formats': formats, - 'subtitles': subtitles, - } From 509bcec37ba26a8c7bc263cf8067495ec7cf120a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 27 Jun 2019 12:06:09 +0100 Subject: [PATCH 392/785] [fusion] fix extraction(closes #17775)(closes #21269) --- youtube_dl/extractor/fusion.py | 69 +++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index 25e284d46..a3f44b812 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -1,35 +1,84 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import ( + determine_ext, + int_or_none, + mimetype2ext, + parse_iso8601, +) class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P\d+)' _TESTS = [{ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', 'info_dict': { - 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', + 'id': '3145868', 'ext': 'mp4', 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', 'duration': 140.0, + 'timestamp': 1442589635, + 'uploader': 'UNIVISON', + 'upload_date': '20150918', }, 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Anvato'], }, { 'url': 'http://fusion.tv/video/201781', 'only_matching': True, + }, { + 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) + video = self._download_json( + 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id) - ooyala_code = self._search_regex( - r'data-ooyala-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') + info = { + 'id': video_id, + 'title': video['title'], + 'description': video.get('excerpt'), + 'timestamp': parse_iso8601(video.get('published')), + 'series': video.get('show'), + } - return OoyalaIE._build_url_result(ooyala_code) + formats = [] + src = video.get('src') or {} + for f_id, f in src.items(): + for q_id, q in f.items(): + q_url = q.get('url') + if not q_url: + continue + ext = determine_ext(q_url, mimetype2ext(q.get('type'))) + if ext == 'smil': + formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False)) + elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'): + formats.extend(self._extract_m3u8_formats( + q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': '-'.join([f_id, q_id]), + 'url': q_url, + 'width': int_or_none(q.get('width')), + 'height': int_or_none(q.get('height')), + 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')), + 'ext': 'mp4' if ext == 'm3u8' else ext, + 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + else: + info.update({ + '_type': 'url', + 'url': 'anvato:uni:' + video['video_ids']['anvato'], + 'ie_key': 'Anvato', + }) + + return info From f5629946608861097b6ce5095efb9a9e8ac7f056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:18:10 +0700 Subject: [PATCH 393/785] [drtv] Relax _VALID_URL --- youtube_dl/extractor/drtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 0c7e350f0..218f10209 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -24,7 +24,7 @@ from ..utils import ( class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _GEO_BYPASS = False _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' @@ -80,6 +80,9 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'only_matching': True, }] def _real_extract(self, url): From 6625bf200d08baf64764e99caa48b4fb3a48ff8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 27 Jun 2019 17:24:46 +0200 Subject: [PATCH 394/785] [mixer:vod] Relax _VALID_URL (closes #21531) (#21536) --- youtube_dl/extractor/beampro.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index 2eaec1ab4..e264a145f 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\w+)' + _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', 'info_dict': { @@ -119,7 +119,10 @@ class BeamProVodIE(BeamProBaseIE): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', + 'only_matching': True, + }] @staticmethod def _extract_format(vod, vod_type): From 4f71473ef186c0797596e96755e86df80f357a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:59:30 +0700 Subject: [PATCH 395/785] [go] Add support for disneynow.com (closes #21528) --- youtube_dl/extractor/go.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 206d89e82..5916f9a8f 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,7 @@ class GoIE(AdobePassIE): 'resource_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -71,6 +71,9 @@ class GoIE(AdobePassIE): # brand 008 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', 'only_matching': True, + }, { + 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', + 'only_matching': True, }] def _extract_videos(self, brand, video_id='-1', show_id='-1'): @@ -89,7 +92,7 @@ class GoIE(AdobePassIE): # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', - default=None) + default=video_id) if not site_info: brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', From 232331c0d2f446af760403ed5a0439cdc3deb112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:55:15 +0700 Subject: [PATCH 396/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2d9988da3..985d14a28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version + +Extractors ++ [go] Add support for disneynow.com (#21528) +* [mixer:vod] Relax URL regular expression (#21531, #21536) +* [drtv] Relax URL regular expression +* [fusion] Fix extraction (#17775, #21269) +- [nfb] Remove extractor (#21518) ++ [beeg] Add support for api/v6 v2 URLs (#21511) ++ [brightcove:new] Add support for playlists (#21331) ++ [openload] Add support for oload.life (#21495) +* [vimeo:channel,group] Make title extraction non fatal +* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) ++ [pornhub] Add support for more paged video sources ++ [pornhub] Add support for downloading single pages and search pages (#15570) +* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) ++ [youtube] Add another signature function pattern +* [tf1] Fix extraction (#21365, #21372) +* [crunchyroll] Move Accept-Language workaround to video extractor since + it causes playlists not to list any videos +* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) + + version 2019.06.21 Core From 8c8cae91ece9841567aa48095245f92ae8f4b295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:57:33 +0700 Subject: [PATCH 397/785] release 2019.06.27 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 6 +++--- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 7a2b16827..d7c15e85a 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index d6180e672..741862590 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7cb981abf..4fb035ea4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 802fa2313..73ed62012 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5153864a1..a9d3653e2 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 985d14a28..4ae3d6c7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.27 Extractors + [go] Add support for disneynow.com (#21528) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bfd15b4dc..55ae43144 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -581,7 +581,6 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** - - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** - **nhl.com** @@ -692,8 +691,9 @@ - **PornerBros** - **PornHd** - **PornHub**: PornHub and Thumbzilla - - **PornHubPlaylist** - - **PornHubUserVideos** + - **PornHubPagedVideoList** + - **PornHubUser** + - **PornHubUserVideosUpload** - **Pornotube** - **PornoVoisines** - **PornoXO** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 33474a452..01896873d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.21' +__version__ = '2019.06.27' From f7a147e3b63a3165c425c56ee19e66f86900128c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Jun 2019 00:32:43 +0700 Subject: [PATCH 398/785] [utils] Introduce random_user_agent and use as default User-Agent (closes #21546) --- youtube_dl/extractor/openload.py | 1595 +----------------------------- youtube_dl/utils.py | 1586 ++++++++++++++++++++++++++++- 2 files changed, 1590 insertions(+), 1591 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b2918dc85..237b0d8fb 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import json import os -import random import re import subprocess import tempfile @@ -380,1595 +379,15 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '74.0.3729.129', - '76.0.3780.3', - '76.0.3780.2', - '74.0.3729.128', - '76.0.3780.1', - '76.0.3780.0', - '75.0.3770.15', - '74.0.3729.127', - '74.0.3729.126', - '76.0.3779.1', - '76.0.3779.0', - '75.0.3770.14', - '74.0.3729.125', - '76.0.3778.1', - '76.0.3778.0', - '75.0.3770.13', - '74.0.3729.124', - '74.0.3729.123', - '73.0.3683.121', - '76.0.3777.1', - '76.0.3777.0', - '75.0.3770.12', - '74.0.3729.122', - '76.0.3776.4', - '75.0.3770.11', - '74.0.3729.121', - '76.0.3776.3', - '76.0.3776.2', - '73.0.3683.120', - '74.0.3729.120', - '74.0.3729.119', - '74.0.3729.118', - '76.0.3776.1', - '76.0.3776.0', - '76.0.3775.5', - '75.0.3770.10', - '74.0.3729.117', - '76.0.3775.4', - '76.0.3775.3', - '74.0.3729.116', - '75.0.3770.9', - '76.0.3775.2', - '76.0.3775.1', - '76.0.3775.0', - '75.0.3770.8', - '74.0.3729.115', - '74.0.3729.114', - '76.0.3774.1', - '76.0.3774.0', - '75.0.3770.7', - '74.0.3729.113', - '74.0.3729.112', - '74.0.3729.111', - '76.0.3773.1', - '76.0.3773.0', - '75.0.3770.6', - '74.0.3729.110', - '74.0.3729.109', - '76.0.3772.1', - '76.0.3772.0', - '75.0.3770.5', - '74.0.3729.108', - '74.0.3729.107', - '76.0.3771.1', - '76.0.3771.0', - '75.0.3770.4', - '74.0.3729.106', - '74.0.3729.105', - '75.0.3770.3', - '74.0.3729.104', - '74.0.3729.103', - '74.0.3729.102', - '75.0.3770.2', - '74.0.3729.101', - '75.0.3770.1', - '75.0.3770.0', - '74.0.3729.100', - '75.0.3769.5', - '75.0.3769.4', - '74.0.3729.99', - '75.0.3769.3', - '75.0.3769.2', - '75.0.3768.6', - '74.0.3729.98', - '75.0.3769.1', - '75.0.3769.0', - '74.0.3729.97', - '73.0.3683.119', - '73.0.3683.118', - '74.0.3729.96', - '75.0.3768.5', - '75.0.3768.4', - '75.0.3768.3', - '75.0.3768.2', - '74.0.3729.95', - '74.0.3729.94', - '75.0.3768.1', - '75.0.3768.0', - '74.0.3729.93', - '74.0.3729.92', - '73.0.3683.117', - '74.0.3729.91', - '75.0.3766.3', - '74.0.3729.90', - '75.0.3767.2', - '75.0.3767.1', - '75.0.3767.0', - '74.0.3729.89', - '73.0.3683.116', - '75.0.3766.2', - '74.0.3729.88', - '75.0.3766.1', - '75.0.3766.0', - '74.0.3729.87', - '73.0.3683.115', - '74.0.3729.86', - '75.0.3765.1', - '75.0.3765.0', - '74.0.3729.85', - '73.0.3683.114', - '74.0.3729.84', - '75.0.3764.1', - '75.0.3764.0', - '74.0.3729.83', - '73.0.3683.113', - '75.0.3763.2', - '75.0.3761.4', - '74.0.3729.82', - '75.0.3763.1', - '75.0.3763.0', - '74.0.3729.81', - '73.0.3683.112', - '75.0.3762.1', - '75.0.3762.0', - '74.0.3729.80', - '75.0.3761.3', - '74.0.3729.79', - '73.0.3683.111', - '75.0.3761.2', - '74.0.3729.78', - '74.0.3729.77', - '75.0.3761.1', - '75.0.3761.0', - '73.0.3683.110', - '74.0.3729.76', - '74.0.3729.75', - '75.0.3760.0', - '74.0.3729.74', - '75.0.3759.8', - '75.0.3759.7', - '75.0.3759.6', - '74.0.3729.73', - '75.0.3759.5', - '74.0.3729.72', - '73.0.3683.109', - '75.0.3759.4', - '75.0.3759.3', - '74.0.3729.71', - '75.0.3759.2', - '74.0.3729.70', - '73.0.3683.108', - '74.0.3729.69', - '75.0.3759.1', - '75.0.3759.0', - '74.0.3729.68', - '73.0.3683.107', - '74.0.3729.67', - '75.0.3758.1', - '75.0.3758.0', - '74.0.3729.66', - '73.0.3683.106', - '74.0.3729.65', - '75.0.3757.1', - '75.0.3757.0', - '74.0.3729.64', - '73.0.3683.105', - '74.0.3729.63', - '75.0.3756.1', - '75.0.3756.0', - '74.0.3729.62', - '73.0.3683.104', - '75.0.3755.3', - '75.0.3755.2', - '73.0.3683.103', - '75.0.3755.1', - '75.0.3755.0', - '74.0.3729.61', - '73.0.3683.102', - '74.0.3729.60', - '75.0.3754.2', - '74.0.3729.59', - '75.0.3753.4', - '74.0.3729.58', - '75.0.3754.1', - '75.0.3754.0', - '74.0.3729.57', - '73.0.3683.101', - '75.0.3753.3', - '75.0.3752.2', - '75.0.3753.2', - '74.0.3729.56', - '75.0.3753.1', - '75.0.3753.0', - '74.0.3729.55', - '73.0.3683.100', - '74.0.3729.54', - '75.0.3752.1', - '75.0.3752.0', - '74.0.3729.53', - '73.0.3683.99', - '74.0.3729.52', - '75.0.3751.1', - '75.0.3751.0', - '74.0.3729.51', - '73.0.3683.98', - '74.0.3729.50', - '75.0.3750.0', - '74.0.3729.49', - '74.0.3729.48', - '74.0.3729.47', - '75.0.3749.3', - '74.0.3729.46', - '73.0.3683.97', - '75.0.3749.2', - '74.0.3729.45', - '75.0.3749.1', - '75.0.3749.0', - '74.0.3729.44', - '73.0.3683.96', - '74.0.3729.43', - '74.0.3729.42', - '75.0.3748.1', - '75.0.3748.0', - '74.0.3729.41', - '75.0.3747.1', - '73.0.3683.95', - '75.0.3746.4', - '74.0.3729.40', - '74.0.3729.39', - '75.0.3747.0', - '75.0.3746.3', - '75.0.3746.2', - '74.0.3729.38', - '75.0.3746.1', - '75.0.3746.0', - '74.0.3729.37', - '73.0.3683.94', - '75.0.3745.5', - '75.0.3745.4', - '75.0.3745.3', - '75.0.3745.2', - '74.0.3729.36', - '75.0.3745.1', - '75.0.3745.0', - '75.0.3744.2', - '74.0.3729.35', - '73.0.3683.93', - '74.0.3729.34', - '75.0.3744.1', - '75.0.3744.0', - '74.0.3729.33', - '73.0.3683.92', - '74.0.3729.32', - '74.0.3729.31', - '73.0.3683.91', - '75.0.3741.2', - '75.0.3740.5', - '74.0.3729.30', - '75.0.3741.1', - '75.0.3741.0', - '74.0.3729.29', - '75.0.3740.4', - '73.0.3683.90', - '74.0.3729.28', - '75.0.3740.3', - '73.0.3683.89', - '75.0.3740.2', - '74.0.3729.27', - '75.0.3740.1', - '75.0.3740.0', - '74.0.3729.26', - '73.0.3683.88', - '73.0.3683.87', - '74.0.3729.25', - '75.0.3739.1', - '75.0.3739.0', - '73.0.3683.86', - '74.0.3729.24', - '73.0.3683.85', - '75.0.3738.4', - '75.0.3738.3', - '75.0.3738.2', - '75.0.3738.1', - '75.0.3738.0', - '74.0.3729.23', - '73.0.3683.84', - '74.0.3729.22', - '74.0.3729.21', - '75.0.3737.1', - '75.0.3737.0', - '74.0.3729.20', - '73.0.3683.83', - '74.0.3729.19', - '75.0.3736.1', - '75.0.3736.0', - '74.0.3729.18', - '73.0.3683.82', - '74.0.3729.17', - '75.0.3735.1', - '75.0.3735.0', - '74.0.3729.16', - '73.0.3683.81', - '75.0.3734.1', - '75.0.3734.0', - '74.0.3729.15', - '73.0.3683.80', - '74.0.3729.14', - '75.0.3733.1', - '75.0.3733.0', - '75.0.3732.1', - '74.0.3729.13', - '74.0.3729.12', - '73.0.3683.79', - '74.0.3729.11', - '75.0.3732.0', - '74.0.3729.10', - '73.0.3683.78', - '74.0.3729.9', - '74.0.3729.8', - '74.0.3729.7', - '75.0.3731.3', - '75.0.3731.2', - '75.0.3731.0', - '74.0.3729.6', - '73.0.3683.77', - '73.0.3683.76', - '75.0.3730.5', - '75.0.3730.4', - '73.0.3683.75', - '74.0.3729.5', - '73.0.3683.74', - '75.0.3730.3', - '75.0.3730.2', - '74.0.3729.4', - '73.0.3683.73', - '73.0.3683.72', - '75.0.3730.1', - '75.0.3730.0', - '74.0.3729.3', - '73.0.3683.71', - '74.0.3729.2', - '73.0.3683.70', - '74.0.3729.1', - '74.0.3729.0', - '74.0.3726.4', - '73.0.3683.69', - '74.0.3726.3', - '74.0.3728.0', - '74.0.3726.2', - '73.0.3683.68', - '74.0.3726.1', - '74.0.3726.0', - '74.0.3725.4', - '73.0.3683.67', - '73.0.3683.66', - '74.0.3725.3', - '74.0.3725.2', - '74.0.3725.1', - '74.0.3724.8', - '74.0.3725.0', - '73.0.3683.65', - '74.0.3724.7', - '74.0.3724.6', - '74.0.3724.5', - '74.0.3724.4', - '74.0.3724.3', - '74.0.3724.2', - '74.0.3724.1', - '74.0.3724.0', - '73.0.3683.64', - '74.0.3723.1', - '74.0.3723.0', - '73.0.3683.63', - '74.0.3722.1', - '74.0.3722.0', - '73.0.3683.62', - '74.0.3718.9', - '74.0.3702.3', - '74.0.3721.3', - '74.0.3721.2', - '74.0.3721.1', - '74.0.3721.0', - '74.0.3720.6', - '73.0.3683.61', - '72.0.3626.122', - '73.0.3683.60', - '74.0.3720.5', - '72.0.3626.121', - '74.0.3718.8', - '74.0.3720.4', - '74.0.3720.3', - '74.0.3718.7', - '74.0.3720.2', - '74.0.3720.1', - '74.0.3720.0', - '74.0.3718.6', - '74.0.3719.5', - '73.0.3683.59', - '74.0.3718.5', - '74.0.3718.4', - '74.0.3719.4', - '74.0.3719.3', - '74.0.3719.2', - '74.0.3719.1', - '73.0.3683.58', - '74.0.3719.0', - '73.0.3683.57', - '73.0.3683.56', - '74.0.3718.3', - '73.0.3683.55', - '74.0.3718.2', - '74.0.3718.1', - '74.0.3718.0', - '73.0.3683.54', - '74.0.3717.2', - '73.0.3683.53', - '74.0.3717.1', - '74.0.3717.0', - '73.0.3683.52', - '74.0.3716.1', - '74.0.3716.0', - '73.0.3683.51', - '74.0.3715.1', - '74.0.3715.0', - '73.0.3683.50', - '74.0.3711.2', - '74.0.3714.2', - '74.0.3713.3', - '74.0.3714.1', - '74.0.3714.0', - '73.0.3683.49', - '74.0.3713.1', - '74.0.3713.0', - '72.0.3626.120', - '73.0.3683.48', - '74.0.3712.2', - '74.0.3712.1', - '74.0.3712.0', - '73.0.3683.47', - '72.0.3626.119', - '73.0.3683.46', - '74.0.3710.2', - '72.0.3626.118', - '74.0.3711.1', - '74.0.3711.0', - '73.0.3683.45', - '72.0.3626.117', - '74.0.3710.1', - '74.0.3710.0', - '73.0.3683.44', - '72.0.3626.116', - '74.0.3709.1', - '74.0.3709.0', - '74.0.3704.9', - '73.0.3683.43', - '72.0.3626.115', - '74.0.3704.8', - '74.0.3704.7', - '74.0.3708.0', - '74.0.3706.7', - '74.0.3704.6', - '73.0.3683.42', - '72.0.3626.114', - '74.0.3706.6', - '72.0.3626.113', - '74.0.3704.5', - '74.0.3706.5', - '74.0.3706.4', - '74.0.3706.3', - '74.0.3706.2', - '74.0.3706.1', - '74.0.3706.0', - '73.0.3683.41', - '72.0.3626.112', - '74.0.3705.1', - '74.0.3705.0', - '73.0.3683.40', - '72.0.3626.111', - '73.0.3683.39', - '74.0.3704.4', - '73.0.3683.38', - '74.0.3704.3', - '74.0.3704.2', - '74.0.3704.1', - '74.0.3704.0', - '73.0.3683.37', - '72.0.3626.110', - '72.0.3626.109', - '74.0.3703.3', - '74.0.3703.2', - '73.0.3683.36', - '74.0.3703.1', - '74.0.3703.0', - '73.0.3683.35', - '72.0.3626.108', - '74.0.3702.2', - '74.0.3699.3', - '74.0.3702.1', - '74.0.3702.0', - '73.0.3683.34', - '72.0.3626.107', - '73.0.3683.33', - '74.0.3701.1', - '74.0.3701.0', - '73.0.3683.32', - '73.0.3683.31', - '72.0.3626.105', - '74.0.3700.1', - '74.0.3700.0', - '73.0.3683.29', - '72.0.3626.103', - '74.0.3699.2', - '74.0.3699.1', - '74.0.3699.0', - '73.0.3683.28', - '72.0.3626.102', - '73.0.3683.27', - '73.0.3683.26', - '74.0.3698.0', - '74.0.3696.2', - '72.0.3626.101', - '73.0.3683.25', - '74.0.3696.1', - '74.0.3696.0', - '74.0.3694.8', - '72.0.3626.100', - '74.0.3694.7', - '74.0.3694.6', - '74.0.3694.5', - '74.0.3694.4', - '72.0.3626.99', - '72.0.3626.98', - '74.0.3694.3', - '73.0.3683.24', - '72.0.3626.97', - '72.0.3626.96', - '72.0.3626.95', - '73.0.3683.23', - '72.0.3626.94', - '73.0.3683.22', - '73.0.3683.21', - '72.0.3626.93', - '74.0.3694.2', - '72.0.3626.92', - '74.0.3694.1', - '74.0.3694.0', - '74.0.3693.6', - '73.0.3683.20', - '72.0.3626.91', - '74.0.3693.5', - '74.0.3693.4', - '74.0.3693.3', - '74.0.3693.2', - '73.0.3683.19', - '74.0.3693.1', - '74.0.3693.0', - '73.0.3683.18', - '72.0.3626.90', - '74.0.3692.1', - '74.0.3692.0', - '73.0.3683.17', - '72.0.3626.89', - '74.0.3687.3', - '74.0.3691.1', - '74.0.3691.0', - '73.0.3683.16', - '72.0.3626.88', - '72.0.3626.87', - '73.0.3683.15', - '74.0.3690.1', - '74.0.3690.0', - '73.0.3683.14', - '72.0.3626.86', - '73.0.3683.13', - '73.0.3683.12', - '74.0.3689.1', - '74.0.3689.0', - '73.0.3683.11', - '72.0.3626.85', - '73.0.3683.10', - '72.0.3626.84', - '73.0.3683.9', - '74.0.3688.1', - '74.0.3688.0', - '73.0.3683.8', - '72.0.3626.83', - '74.0.3687.2', - '74.0.3687.1', - '74.0.3687.0', - '73.0.3683.7', - '72.0.3626.82', - '74.0.3686.4', - '72.0.3626.81', - '74.0.3686.3', - '74.0.3686.2', - '74.0.3686.1', - '74.0.3686.0', - '73.0.3683.6', - '72.0.3626.80', - '74.0.3685.1', - '74.0.3685.0', - '73.0.3683.5', - '72.0.3626.79', - '74.0.3684.1', - '74.0.3684.0', - '73.0.3683.4', - '72.0.3626.78', - '72.0.3626.77', - '73.0.3683.3', - '73.0.3683.2', - '72.0.3626.76', - '73.0.3683.1', - '73.0.3683.0', - '72.0.3626.75', - '71.0.3578.141', - '73.0.3682.1', - '73.0.3682.0', - '72.0.3626.74', - '71.0.3578.140', - '73.0.3681.4', - '73.0.3681.3', - '73.0.3681.2', - '73.0.3681.1', - '73.0.3681.0', - '72.0.3626.73', - '71.0.3578.139', - '72.0.3626.72', - '72.0.3626.71', - '73.0.3680.1', - '73.0.3680.0', - '72.0.3626.70', - '71.0.3578.138', - '73.0.3678.2', - '73.0.3679.1', - '73.0.3679.0', - '72.0.3626.69', - '71.0.3578.137', - '73.0.3678.1', - '73.0.3678.0', - '71.0.3578.136', - '73.0.3677.1', - '73.0.3677.0', - '72.0.3626.68', - '72.0.3626.67', - '71.0.3578.135', - '73.0.3676.1', - '73.0.3676.0', - '73.0.3674.2', - '72.0.3626.66', - '71.0.3578.134', - '73.0.3674.1', - '73.0.3674.0', - '72.0.3626.65', - '71.0.3578.133', - '73.0.3673.2', - '73.0.3673.1', - '73.0.3673.0', - '72.0.3626.64', - '71.0.3578.132', - '72.0.3626.63', - '72.0.3626.62', - '72.0.3626.61', - '72.0.3626.60', - '73.0.3672.1', - '73.0.3672.0', - '72.0.3626.59', - '71.0.3578.131', - '73.0.3671.3', - '73.0.3671.2', - '73.0.3671.1', - '73.0.3671.0', - '72.0.3626.58', - '71.0.3578.130', - '73.0.3670.1', - '73.0.3670.0', - '72.0.3626.57', - '71.0.3578.129', - '73.0.3669.1', - '73.0.3669.0', - '72.0.3626.56', - '71.0.3578.128', - '73.0.3668.2', - '73.0.3668.1', - '73.0.3668.0', - '72.0.3626.55', - '71.0.3578.127', - '73.0.3667.2', - '73.0.3667.1', - '73.0.3667.0', - '72.0.3626.54', - '71.0.3578.126', - '73.0.3666.1', - '73.0.3666.0', - '72.0.3626.53', - '71.0.3578.125', - '73.0.3665.4', - '73.0.3665.3', - '72.0.3626.52', - '73.0.3665.2', - '73.0.3664.4', - '73.0.3665.1', - '73.0.3665.0', - '72.0.3626.51', - '71.0.3578.124', - '72.0.3626.50', - '73.0.3664.3', - '73.0.3664.2', - '73.0.3664.1', - '73.0.3664.0', - '73.0.3663.2', - '72.0.3626.49', - '71.0.3578.123', - '73.0.3663.1', - '73.0.3663.0', - '72.0.3626.48', - '71.0.3578.122', - '73.0.3662.1', - '73.0.3662.0', - '72.0.3626.47', - '71.0.3578.121', - '73.0.3661.1', - '72.0.3626.46', - '73.0.3661.0', - '72.0.3626.45', - '71.0.3578.120', - '73.0.3660.2', - '73.0.3660.1', - '73.0.3660.0', - '72.0.3626.44', - '71.0.3578.119', - '73.0.3659.1', - '73.0.3659.0', - '72.0.3626.43', - '71.0.3578.118', - '73.0.3658.1', - '73.0.3658.0', - '72.0.3626.42', - '71.0.3578.117', - '73.0.3657.1', - '73.0.3657.0', - '72.0.3626.41', - '71.0.3578.116', - '73.0.3656.1', - '73.0.3656.0', - '72.0.3626.40', - '71.0.3578.115', - '73.0.3655.1', - '73.0.3655.0', - '72.0.3626.39', - '71.0.3578.114', - '73.0.3654.1', - '73.0.3654.0', - '72.0.3626.38', - '71.0.3578.113', - '73.0.3653.1', - '73.0.3653.0', - '72.0.3626.37', - '71.0.3578.112', - '73.0.3652.1', - '73.0.3652.0', - '72.0.3626.36', - '71.0.3578.111', - '73.0.3651.1', - '73.0.3651.0', - '72.0.3626.35', - '71.0.3578.110', - '73.0.3650.1', - '73.0.3650.0', - '72.0.3626.34', - '71.0.3578.109', - '73.0.3649.1', - '73.0.3649.0', - '72.0.3626.33', - '71.0.3578.108', - '73.0.3648.2', - '73.0.3648.1', - '73.0.3648.0', - '72.0.3626.32', - '71.0.3578.107', - '73.0.3647.2', - '73.0.3647.1', - '73.0.3647.0', - '72.0.3626.31', - '71.0.3578.106', - '73.0.3635.3', - '73.0.3646.2', - '73.0.3646.1', - '73.0.3646.0', - '72.0.3626.30', - '71.0.3578.105', - '72.0.3626.29', - '73.0.3645.2', - '73.0.3645.1', - '73.0.3645.0', - '72.0.3626.28', - '71.0.3578.104', - '72.0.3626.27', - '72.0.3626.26', - '72.0.3626.25', - '72.0.3626.24', - '73.0.3644.0', - '73.0.3643.2', - '72.0.3626.23', - '71.0.3578.103', - '73.0.3643.1', - '73.0.3643.0', - '72.0.3626.22', - '71.0.3578.102', - '73.0.3642.1', - '73.0.3642.0', - '72.0.3626.21', - '71.0.3578.101', - '73.0.3641.1', - '73.0.3641.0', - '72.0.3626.20', - '71.0.3578.100', - '72.0.3626.19', - '73.0.3640.1', - '73.0.3640.0', - '72.0.3626.18', - '73.0.3639.1', - '71.0.3578.99', - '73.0.3639.0', - '72.0.3626.17', - '73.0.3638.2', - '72.0.3626.16', - '73.0.3638.1', - '73.0.3638.0', - '72.0.3626.15', - '71.0.3578.98', - '73.0.3635.2', - '71.0.3578.97', - '73.0.3637.1', - '73.0.3637.0', - '72.0.3626.14', - '71.0.3578.96', - '71.0.3578.95', - '72.0.3626.13', - '71.0.3578.94', - '73.0.3636.2', - '71.0.3578.93', - '73.0.3636.1', - '73.0.3636.0', - '72.0.3626.12', - '71.0.3578.92', - '73.0.3635.1', - '73.0.3635.0', - '72.0.3626.11', - '71.0.3578.91', - '73.0.3634.2', - '73.0.3634.1', - '73.0.3634.0', - '72.0.3626.10', - '71.0.3578.90', - '71.0.3578.89', - '73.0.3633.2', - '73.0.3633.1', - '73.0.3633.0', - '72.0.3610.4', - '72.0.3626.9', - '71.0.3578.88', - '73.0.3632.5', - '73.0.3632.4', - '73.0.3632.3', - '73.0.3632.2', - '73.0.3632.1', - '73.0.3632.0', - '72.0.3626.8', - '71.0.3578.87', - '73.0.3631.2', - '73.0.3631.1', - '73.0.3631.0', - '72.0.3626.7', - '71.0.3578.86', - '72.0.3626.6', - '73.0.3630.1', - '73.0.3630.0', - '72.0.3626.5', - '71.0.3578.85', - '72.0.3626.4', - '73.0.3628.3', - '73.0.3628.2', - '73.0.3629.1', - '73.0.3629.0', - '72.0.3626.3', - '71.0.3578.84', - '73.0.3628.1', - '73.0.3628.0', - '71.0.3578.83', - '73.0.3627.1', - '73.0.3627.0', - '72.0.3626.2', - '71.0.3578.82', - '71.0.3578.81', - '71.0.3578.80', - '72.0.3626.1', - '72.0.3626.0', - '71.0.3578.79', - '70.0.3538.124', - '71.0.3578.78', - '72.0.3623.4', - '72.0.3625.2', - '72.0.3625.1', - '72.0.3625.0', - '71.0.3578.77', - '70.0.3538.123', - '72.0.3624.4', - '72.0.3624.3', - '72.0.3624.2', - '71.0.3578.76', - '72.0.3624.1', - '72.0.3624.0', - '72.0.3623.3', - '71.0.3578.75', - '70.0.3538.122', - '71.0.3578.74', - '72.0.3623.2', - '72.0.3610.3', - '72.0.3623.1', - '72.0.3623.0', - '72.0.3622.3', - '72.0.3622.2', - '71.0.3578.73', - '70.0.3538.121', - '72.0.3622.1', - '72.0.3622.0', - '71.0.3578.72', - '70.0.3538.120', - '72.0.3621.1', - '72.0.3621.0', - '71.0.3578.71', - '70.0.3538.119', - '72.0.3620.1', - '72.0.3620.0', - '71.0.3578.70', - '70.0.3538.118', - '71.0.3578.69', - '72.0.3619.1', - '72.0.3619.0', - '71.0.3578.68', - '70.0.3538.117', - '71.0.3578.67', - '72.0.3618.1', - '72.0.3618.0', - '71.0.3578.66', - '70.0.3538.116', - '72.0.3617.1', - '72.0.3617.0', - '71.0.3578.65', - '70.0.3538.115', - '72.0.3602.3', - '71.0.3578.64', - '72.0.3616.1', - '72.0.3616.0', - '71.0.3578.63', - '70.0.3538.114', - '71.0.3578.62', - '72.0.3615.1', - '72.0.3615.0', - '71.0.3578.61', - '70.0.3538.113', - '72.0.3614.1', - '72.0.3614.0', - '71.0.3578.60', - '70.0.3538.112', - '72.0.3613.1', - '72.0.3613.0', - '71.0.3578.59', - '70.0.3538.111', - '72.0.3612.2', - '72.0.3612.1', - '72.0.3612.0', - '70.0.3538.110', - '71.0.3578.58', - '70.0.3538.109', - '72.0.3611.2', - '72.0.3611.1', - '72.0.3611.0', - '71.0.3578.57', - '70.0.3538.108', - '72.0.3610.2', - '71.0.3578.56', - '71.0.3578.55', - '72.0.3610.1', - '72.0.3610.0', - '71.0.3578.54', - '70.0.3538.107', - '71.0.3578.53', - '72.0.3609.3', - '71.0.3578.52', - '72.0.3609.2', - '71.0.3578.51', - '72.0.3608.5', - '72.0.3609.1', - '72.0.3609.0', - '71.0.3578.50', - '70.0.3538.106', - '72.0.3608.4', - '72.0.3608.3', - '72.0.3608.2', - '71.0.3578.49', - '72.0.3608.1', - '72.0.3608.0', - '70.0.3538.105', - '71.0.3578.48', - '72.0.3607.1', - '72.0.3607.0', - '71.0.3578.47', - '70.0.3538.104', - '72.0.3606.2', - '72.0.3606.1', - '72.0.3606.0', - '71.0.3578.46', - '70.0.3538.103', - '70.0.3538.102', - '72.0.3605.3', - '72.0.3605.2', - '72.0.3605.1', - '72.0.3605.0', - '71.0.3578.45', - '70.0.3538.101', - '71.0.3578.44', - '71.0.3578.43', - '70.0.3538.100', - '70.0.3538.99', - '71.0.3578.42', - '72.0.3604.1', - '72.0.3604.0', - '71.0.3578.41', - '70.0.3538.98', - '71.0.3578.40', - '72.0.3603.2', - '72.0.3603.1', - '72.0.3603.0', - '71.0.3578.39', - '70.0.3538.97', - '72.0.3602.2', - '71.0.3578.38', - '71.0.3578.37', - '72.0.3602.1', - '72.0.3602.0', - '71.0.3578.36', - '70.0.3538.96', - '72.0.3601.1', - '72.0.3601.0', - '71.0.3578.35', - '70.0.3538.95', - '72.0.3600.1', - '72.0.3600.0', - '71.0.3578.34', - '70.0.3538.94', - '72.0.3599.3', - '72.0.3599.2', - '72.0.3599.1', - '72.0.3599.0', - '71.0.3578.33', - '70.0.3538.93', - '72.0.3598.1', - '72.0.3598.0', - '71.0.3578.32', - '70.0.3538.87', - '72.0.3597.1', - '72.0.3597.0', - '72.0.3596.2', - '71.0.3578.31', - '70.0.3538.86', - '71.0.3578.30', - '71.0.3578.29', - '72.0.3596.1', - '72.0.3596.0', - '71.0.3578.28', - '70.0.3538.85', - '72.0.3595.2', - '72.0.3591.3', - '72.0.3595.1', - '72.0.3595.0', - '71.0.3578.27', - '70.0.3538.84', - '72.0.3594.1', - '72.0.3594.0', - '71.0.3578.26', - '70.0.3538.83', - '72.0.3593.2', - '72.0.3593.1', - '72.0.3593.0', - '71.0.3578.25', - '70.0.3538.82', - '72.0.3589.3', - '72.0.3592.2', - '72.0.3592.1', - '72.0.3592.0', - '71.0.3578.24', - '72.0.3589.2', - '70.0.3538.81', - '70.0.3538.80', - '72.0.3591.2', - '72.0.3591.1', - '72.0.3591.0', - '71.0.3578.23', - '70.0.3538.79', - '71.0.3578.22', - '72.0.3590.1', - '72.0.3590.0', - '71.0.3578.21', - '70.0.3538.78', - '70.0.3538.77', - '72.0.3589.1', - '72.0.3589.0', - '71.0.3578.20', - '70.0.3538.76', - '71.0.3578.19', - '70.0.3538.75', - '72.0.3588.1', - '72.0.3588.0', - '71.0.3578.18', - '70.0.3538.74', - '72.0.3586.2', - '72.0.3587.0', - '71.0.3578.17', - '70.0.3538.73', - '72.0.3586.1', - '72.0.3586.0', - '71.0.3578.16', - '70.0.3538.72', - '72.0.3585.1', - '72.0.3585.0', - '71.0.3578.15', - '70.0.3538.71', - '71.0.3578.14', - '72.0.3584.1', - '72.0.3584.0', - '71.0.3578.13', - '70.0.3538.70', - '72.0.3583.2', - '71.0.3578.12', - '72.0.3583.1', - '72.0.3583.0', - '71.0.3578.11', - '70.0.3538.69', - '71.0.3578.10', - '72.0.3582.0', - '72.0.3581.4', - '71.0.3578.9', - '70.0.3538.67', - '72.0.3581.3', - '72.0.3581.2', - '72.0.3581.1', - '72.0.3581.0', - '71.0.3578.8', - '70.0.3538.66', - '72.0.3580.1', - '72.0.3580.0', - '71.0.3578.7', - '70.0.3538.65', - '71.0.3578.6', - '72.0.3579.1', - '72.0.3579.0', - '71.0.3578.5', - '70.0.3538.64', - '71.0.3578.4', - '71.0.3578.3', - '71.0.3578.2', - '71.0.3578.1', - '71.0.3578.0', - '70.0.3538.63', - '69.0.3497.128', - '70.0.3538.62', - '70.0.3538.61', - '70.0.3538.60', - '70.0.3538.59', - '71.0.3577.1', - '71.0.3577.0', - '70.0.3538.58', - '69.0.3497.127', - '71.0.3576.2', - '71.0.3576.1', - '71.0.3576.0', - '70.0.3538.57', - '70.0.3538.56', - '71.0.3575.2', - '70.0.3538.55', - '69.0.3497.126', - '70.0.3538.54', - '71.0.3575.1', - '71.0.3575.0', - '71.0.3574.1', - '71.0.3574.0', - '70.0.3538.53', - '69.0.3497.125', - '70.0.3538.52', - '71.0.3573.1', - '71.0.3573.0', - '70.0.3538.51', - '69.0.3497.124', - '71.0.3572.1', - '71.0.3572.0', - '70.0.3538.50', - '69.0.3497.123', - '71.0.3571.2', - '70.0.3538.49', - '69.0.3497.122', - '71.0.3571.1', - '71.0.3571.0', - '70.0.3538.48', - '69.0.3497.121', - '71.0.3570.1', - '71.0.3570.0', - '70.0.3538.47', - '69.0.3497.120', - '71.0.3568.2', - '71.0.3569.1', - '71.0.3569.0', - '70.0.3538.46', - '69.0.3497.119', - '70.0.3538.45', - '71.0.3568.1', - '71.0.3568.0', - '70.0.3538.44', - '69.0.3497.118', - '70.0.3538.43', - '70.0.3538.42', - '71.0.3567.1', - '71.0.3567.0', - '70.0.3538.41', - '69.0.3497.117', - '71.0.3566.1', - '71.0.3566.0', - '70.0.3538.40', - '69.0.3497.116', - '71.0.3565.1', - '71.0.3565.0', - '70.0.3538.39', - '69.0.3497.115', - '71.0.3564.1', - '71.0.3564.0', - '70.0.3538.38', - '69.0.3497.114', - '71.0.3563.0', - '71.0.3562.2', - '70.0.3538.37', - '69.0.3497.113', - '70.0.3538.36', - '70.0.3538.35', - '71.0.3562.1', - '71.0.3562.0', - '70.0.3538.34', - '69.0.3497.112', - '70.0.3538.33', - '71.0.3561.1', - '71.0.3561.0', - '70.0.3538.32', - '69.0.3497.111', - '71.0.3559.6', - '71.0.3560.1', - '71.0.3560.0', - '71.0.3559.5', - '71.0.3559.4', - '70.0.3538.31', - '69.0.3497.110', - '71.0.3559.3', - '70.0.3538.30', - '69.0.3497.109', - '71.0.3559.2', - '71.0.3559.1', - '71.0.3559.0', - '70.0.3538.29', - '69.0.3497.108', - '71.0.3558.2', - '71.0.3558.1', - '71.0.3558.0', - '70.0.3538.28', - '69.0.3497.107', - '71.0.3557.2', - '71.0.3557.1', - '71.0.3557.0', - '70.0.3538.27', - '69.0.3497.106', - '71.0.3554.4', - '70.0.3538.26', - '71.0.3556.1', - '71.0.3556.0', - '70.0.3538.25', - '71.0.3554.3', - '69.0.3497.105', - '71.0.3554.2', - '70.0.3538.24', - '69.0.3497.104', - '71.0.3555.2', - '70.0.3538.23', - '71.0.3555.1', - '71.0.3555.0', - '70.0.3538.22', - '69.0.3497.103', - '71.0.3554.1', - '71.0.3554.0', - '70.0.3538.21', - '69.0.3497.102', - '71.0.3553.3', - '70.0.3538.20', - '69.0.3497.101', - '71.0.3553.2', - '69.0.3497.100', - '71.0.3553.1', - '71.0.3553.0', - '70.0.3538.19', - '69.0.3497.99', - '69.0.3497.98', - '69.0.3497.97', - '71.0.3552.6', - '71.0.3552.5', - '71.0.3552.4', - '71.0.3552.3', - '71.0.3552.2', - '71.0.3552.1', - '71.0.3552.0', - '70.0.3538.18', - '69.0.3497.96', - '71.0.3551.3', - '71.0.3551.2', - '71.0.3551.1', - '71.0.3551.0', - '70.0.3538.17', - '69.0.3497.95', - '71.0.3550.3', - '71.0.3550.2', - '71.0.3550.1', - '71.0.3550.0', - '70.0.3538.16', - '69.0.3497.94', - '71.0.3549.1', - '71.0.3549.0', - '70.0.3538.15', - '69.0.3497.93', - '69.0.3497.92', - '71.0.3548.1', - '71.0.3548.0', - '70.0.3538.14', - '69.0.3497.91', - '71.0.3547.1', - '71.0.3547.0', - '70.0.3538.13', - '69.0.3497.90', - '71.0.3546.2', - '69.0.3497.89', - '71.0.3546.1', - '71.0.3546.0', - '70.0.3538.12', - '69.0.3497.88', - '71.0.3545.4', - '71.0.3545.3', - '71.0.3545.2', - '71.0.3545.1', - '71.0.3545.0', - '70.0.3538.11', - '69.0.3497.87', - '71.0.3544.5', - '71.0.3544.4', - '71.0.3544.3', - '71.0.3544.2', - '71.0.3544.1', - '71.0.3544.0', - '69.0.3497.86', - '70.0.3538.10', - '69.0.3497.85', - '70.0.3538.9', - '69.0.3497.84', - '71.0.3543.4', - '70.0.3538.8', - '71.0.3543.3', - '71.0.3543.2', - '71.0.3543.1', - '71.0.3543.0', - '70.0.3538.7', - '69.0.3497.83', - '71.0.3542.2', - '71.0.3542.1', - '71.0.3542.0', - '70.0.3538.6', - '69.0.3497.82', - '69.0.3497.81', - '71.0.3541.1', - '71.0.3541.0', - '70.0.3538.5', - '69.0.3497.80', - '71.0.3540.1', - '71.0.3540.0', - '70.0.3538.4', - '69.0.3497.79', - '70.0.3538.3', - '71.0.3539.1', - '71.0.3539.0', - '69.0.3497.78', - '68.0.3440.134', - '69.0.3497.77', - '70.0.3538.2', - '70.0.3538.1', - '70.0.3538.0', - '69.0.3497.76', - '68.0.3440.133', - '69.0.3497.75', - '70.0.3537.2', - '70.0.3537.1', - '70.0.3537.0', - '69.0.3497.74', - '68.0.3440.132', - '70.0.3536.0', - '70.0.3535.5', - '70.0.3535.4', - '70.0.3535.3', - '69.0.3497.73', - '68.0.3440.131', - '70.0.3532.8', - '70.0.3532.7', - '69.0.3497.72', - '69.0.3497.71', - '70.0.3535.2', - '70.0.3535.1', - '70.0.3535.0', - '69.0.3497.70', - '68.0.3440.130', - '69.0.3497.69', - '68.0.3440.129', - '70.0.3534.4', - '70.0.3534.3', - '70.0.3534.2', - '70.0.3534.1', - '70.0.3534.0', - '69.0.3497.68', - '68.0.3440.128', - '70.0.3533.2', - '70.0.3533.1', - '70.0.3533.0', - '69.0.3497.67', - '68.0.3440.127', - '70.0.3532.6', - '70.0.3532.5', - '70.0.3532.4', - '69.0.3497.66', - '68.0.3440.126', - '70.0.3532.3', - '70.0.3532.2', - '70.0.3532.1', - '69.0.3497.60', - '69.0.3497.65', - '69.0.3497.64', - '70.0.3532.0', - '70.0.3531.0', - '70.0.3530.4', - '70.0.3530.3', - '70.0.3530.2', - '69.0.3497.58', - '68.0.3440.125', - '69.0.3497.57', - '69.0.3497.56', - '69.0.3497.55', - '69.0.3497.54', - '70.0.3530.1', - '70.0.3530.0', - '69.0.3497.53', - '68.0.3440.124', - '69.0.3497.52', - '70.0.3529.3', - '70.0.3529.2', - '70.0.3529.1', - '70.0.3529.0', - '69.0.3497.51', - '70.0.3528.4', - '68.0.3440.123', - '70.0.3528.3', - '70.0.3528.2', - '70.0.3528.1', - '70.0.3528.0', - '69.0.3497.50', - '68.0.3440.122', - '70.0.3527.1', - '70.0.3527.0', - '69.0.3497.49', - '68.0.3440.121', - '70.0.3526.1', - '70.0.3526.0', - '68.0.3440.120', - '69.0.3497.48', - '69.0.3497.47', - '68.0.3440.119', - '68.0.3440.118', - '70.0.3525.5', - '70.0.3525.4', - '70.0.3525.3', - '68.0.3440.117', - '69.0.3497.46', - '70.0.3525.2', - '70.0.3525.1', - '70.0.3525.0', - '69.0.3497.45', - '68.0.3440.116', - '70.0.3524.4', - '70.0.3524.3', - '69.0.3497.44', - '70.0.3524.2', - '70.0.3524.1', - '70.0.3524.0', - '70.0.3523.2', - '69.0.3497.43', - '68.0.3440.115', - '70.0.3505.9', - '69.0.3497.42', - '70.0.3505.8', - '70.0.3523.1', - '70.0.3523.0', - '69.0.3497.41', - '68.0.3440.114', - '70.0.3505.7', - '69.0.3497.40', - '70.0.3522.1', - '70.0.3522.0', - '70.0.3521.2', - '69.0.3497.39', - '68.0.3440.113', - '70.0.3505.6', - '70.0.3521.1', - '70.0.3521.0', - '69.0.3497.38', - '68.0.3440.112', - '70.0.3520.1', - '70.0.3520.0', - '69.0.3497.37', - '68.0.3440.111', - '70.0.3519.3', - '70.0.3519.2', - '70.0.3519.1', - '70.0.3519.0', - '69.0.3497.36', - '68.0.3440.110', - '70.0.3518.1', - '70.0.3518.0', - '69.0.3497.35', - '69.0.3497.34', - '68.0.3440.109', - '70.0.3517.1', - '70.0.3517.0', - '69.0.3497.33', - '68.0.3440.108', - '69.0.3497.32', - '70.0.3516.3', - '70.0.3516.2', - '70.0.3516.1', - '70.0.3516.0', - '69.0.3497.31', - '68.0.3440.107', - '70.0.3515.4', - '68.0.3440.106', - '70.0.3515.3', - '70.0.3515.2', - '70.0.3515.1', - '70.0.3515.0', - '69.0.3497.30', - '68.0.3440.105', - '68.0.3440.104', - '70.0.3514.2', - '70.0.3514.1', - '70.0.3514.0', - '69.0.3497.29', - '68.0.3440.103', - '70.0.3513.1', - '70.0.3513.0', - '69.0.3497.28', - ) - @classmethod def _extract_urls(cls, webpage): return re.findall( r']+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' % (cls._DOMAINS, cls._EMBED_WORD), webpage) - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) return webpage def _real_extract(self, url): @@ -1977,16 +396,13 @@ class OpenloadIE(InfoExtractor): video_id = mobj.group('id') url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - headers = { - 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), - } for path in (self._EMBED_WORD, self._STREAM_WORD): page_url = url_pattern % path last = path == self._STREAM_WORD webpage = self._download_webpage( page_url, video_id, 'Downloading %s webpage' % path, - headers=headers, fatal=last) + fatal=last) if not webpage: continue if 'File not found' in webpage or 'deleted by the owner' in webpage: @@ -1995,7 +411,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True, video_id=video_id) break - webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers) + webpage = self._extract_decrypted_page(page_url, webpage, video_id) for element_id in self._URL_IDS: decoded_id = get_element_by_id(element_id, webpage) if decoded_id: @@ -2026,7 +442,6 @@ class OpenloadIE(InfoExtractor): 'url': video_url, 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), 'subtitles': subtitles, - 'http_headers': headers, } @@ -2061,5 +476,5 @@ class VerystreamIE(OpenloadIE): 'only_matching': True, }] - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a1f586b80..798757241 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -81,8 +81,1592 @@ def register_socks_protocols(): # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) + +def random_user_agent(): + _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' + _CHROME_VERSIONS = ( + '74.0.3729.129', + '76.0.3780.3', + '76.0.3780.2', + '74.0.3729.128', + '76.0.3780.1', + '76.0.3780.0', + '75.0.3770.15', + '74.0.3729.127', + '74.0.3729.126', + '76.0.3779.1', + '76.0.3779.0', + '75.0.3770.14', + '74.0.3729.125', + '76.0.3778.1', + '76.0.3778.0', + '75.0.3770.13', + '74.0.3729.124', + '74.0.3729.123', + '73.0.3683.121', + '76.0.3777.1', + '76.0.3777.0', + '75.0.3770.12', + '74.0.3729.122', + '76.0.3776.4', + '75.0.3770.11', + '74.0.3729.121', + '76.0.3776.3', + '76.0.3776.2', + '73.0.3683.120', + '74.0.3729.120', + '74.0.3729.119', + '74.0.3729.118', + '76.0.3776.1', + '76.0.3776.0', + '76.0.3775.5', + '75.0.3770.10', + '74.0.3729.117', + '76.0.3775.4', + '76.0.3775.3', + '74.0.3729.116', + '75.0.3770.9', + '76.0.3775.2', + '76.0.3775.1', + '76.0.3775.0', + '75.0.3770.8', + '74.0.3729.115', + '74.0.3729.114', + '76.0.3774.1', + '76.0.3774.0', + '75.0.3770.7', + '74.0.3729.113', + '74.0.3729.112', + '74.0.3729.111', + '76.0.3773.1', + '76.0.3773.0', + '75.0.3770.6', + '74.0.3729.110', + '74.0.3729.109', + '76.0.3772.1', + '76.0.3772.0', + '75.0.3770.5', + '74.0.3729.108', + '74.0.3729.107', + '76.0.3771.1', + '76.0.3771.0', + '75.0.3770.4', + '74.0.3729.106', + '74.0.3729.105', + '75.0.3770.3', + '74.0.3729.104', + '74.0.3729.103', + '74.0.3729.102', + '75.0.3770.2', + '74.0.3729.101', + '75.0.3770.1', + '75.0.3770.0', + '74.0.3729.100', + '75.0.3769.5', + '75.0.3769.4', + '74.0.3729.99', + '75.0.3769.3', + '75.0.3769.2', + '75.0.3768.6', + '74.0.3729.98', + '75.0.3769.1', + '75.0.3769.0', + '74.0.3729.97', + '73.0.3683.119', + '73.0.3683.118', + '74.0.3729.96', + '75.0.3768.5', + '75.0.3768.4', + '75.0.3768.3', + '75.0.3768.2', + '74.0.3729.95', + '74.0.3729.94', + '75.0.3768.1', + '75.0.3768.0', + '74.0.3729.93', + '74.0.3729.92', + '73.0.3683.117', + '74.0.3729.91', + '75.0.3766.3', + '74.0.3729.90', + '75.0.3767.2', + '75.0.3767.1', + '75.0.3767.0', + '74.0.3729.89', + '73.0.3683.116', + '75.0.3766.2', + '74.0.3729.88', + '75.0.3766.1', + '75.0.3766.0', + '74.0.3729.87', + '73.0.3683.115', + '74.0.3729.86', + '75.0.3765.1', + '75.0.3765.0', + '74.0.3729.85', + '73.0.3683.114', + '74.0.3729.84', + '75.0.3764.1', + '75.0.3764.0', + '74.0.3729.83', + '73.0.3683.113', + '75.0.3763.2', + '75.0.3761.4', + '74.0.3729.82', + '75.0.3763.1', + '75.0.3763.0', + '74.0.3729.81', + '73.0.3683.112', + '75.0.3762.1', + '75.0.3762.0', + '74.0.3729.80', + '75.0.3761.3', + '74.0.3729.79', + '73.0.3683.111', + '75.0.3761.2', + '74.0.3729.78', + '74.0.3729.77', + '75.0.3761.1', + '75.0.3761.0', + '73.0.3683.110', + '74.0.3729.76', + '74.0.3729.75', + '75.0.3760.0', + '74.0.3729.74', + '75.0.3759.8', + '75.0.3759.7', + '75.0.3759.6', + '74.0.3729.73', + '75.0.3759.5', + '74.0.3729.72', + '73.0.3683.109', + '75.0.3759.4', + '75.0.3759.3', + '74.0.3729.71', + '75.0.3759.2', + '74.0.3729.70', + '73.0.3683.108', + '74.0.3729.69', + '75.0.3759.1', + '75.0.3759.0', + '74.0.3729.68', + '73.0.3683.107', + '74.0.3729.67', + '75.0.3758.1', + '75.0.3758.0', + '74.0.3729.66', + '73.0.3683.106', + '74.0.3729.65', + '75.0.3757.1', + '75.0.3757.0', + '74.0.3729.64', + '73.0.3683.105', + '74.0.3729.63', + '75.0.3756.1', + '75.0.3756.0', + '74.0.3729.62', + '73.0.3683.104', + '75.0.3755.3', + '75.0.3755.2', + '73.0.3683.103', + '75.0.3755.1', + '75.0.3755.0', + '74.0.3729.61', + '73.0.3683.102', + '74.0.3729.60', + '75.0.3754.2', + '74.0.3729.59', + '75.0.3753.4', + '74.0.3729.58', + '75.0.3754.1', + '75.0.3754.0', + '74.0.3729.57', + '73.0.3683.101', + '75.0.3753.3', + '75.0.3752.2', + '75.0.3753.2', + '74.0.3729.56', + '75.0.3753.1', + '75.0.3753.0', + '74.0.3729.55', + '73.0.3683.100', + '74.0.3729.54', + '75.0.3752.1', + '75.0.3752.0', + '74.0.3729.53', + '73.0.3683.99', + '74.0.3729.52', + '75.0.3751.1', + '75.0.3751.0', + '74.0.3729.51', + '73.0.3683.98', + '74.0.3729.50', + '75.0.3750.0', + '74.0.3729.49', + '74.0.3729.48', + '74.0.3729.47', + '75.0.3749.3', + '74.0.3729.46', + '73.0.3683.97', + '75.0.3749.2', + '74.0.3729.45', + '75.0.3749.1', + '75.0.3749.0', + '74.0.3729.44', + '73.0.3683.96', + '74.0.3729.43', + '74.0.3729.42', + '75.0.3748.1', + '75.0.3748.0', + '74.0.3729.41', + '75.0.3747.1', + '73.0.3683.95', + '75.0.3746.4', + '74.0.3729.40', + '74.0.3729.39', + '75.0.3747.0', + '75.0.3746.3', + '75.0.3746.2', + '74.0.3729.38', + '75.0.3746.1', + '75.0.3746.0', + '74.0.3729.37', + '73.0.3683.94', + '75.0.3745.5', + '75.0.3745.4', + '75.0.3745.3', + '75.0.3745.2', + '74.0.3729.36', + '75.0.3745.1', + '75.0.3745.0', + '75.0.3744.2', + '74.0.3729.35', + '73.0.3683.93', + '74.0.3729.34', + '75.0.3744.1', + '75.0.3744.0', + '74.0.3729.33', + '73.0.3683.92', + '74.0.3729.32', + '74.0.3729.31', + '73.0.3683.91', + '75.0.3741.2', + '75.0.3740.5', + '74.0.3729.30', + '75.0.3741.1', + '75.0.3741.0', + '74.0.3729.29', + '75.0.3740.4', + '73.0.3683.90', + '74.0.3729.28', + '75.0.3740.3', + '73.0.3683.89', + '75.0.3740.2', + '74.0.3729.27', + '75.0.3740.1', + '75.0.3740.0', + '74.0.3729.26', + '73.0.3683.88', + '73.0.3683.87', + '74.0.3729.25', + '75.0.3739.1', + '75.0.3739.0', + '73.0.3683.86', + '74.0.3729.24', + '73.0.3683.85', + '75.0.3738.4', + '75.0.3738.3', + '75.0.3738.2', + '75.0.3738.1', + '75.0.3738.0', + '74.0.3729.23', + '73.0.3683.84', + '74.0.3729.22', + '74.0.3729.21', + '75.0.3737.1', + '75.0.3737.0', + '74.0.3729.20', + '73.0.3683.83', + '74.0.3729.19', + '75.0.3736.1', + '75.0.3736.0', + '74.0.3729.18', + '73.0.3683.82', + '74.0.3729.17', + '75.0.3735.1', + '75.0.3735.0', + '74.0.3729.16', + '73.0.3683.81', + '75.0.3734.1', + '75.0.3734.0', + '74.0.3729.15', + '73.0.3683.80', + '74.0.3729.14', + '75.0.3733.1', + '75.0.3733.0', + '75.0.3732.1', + '74.0.3729.13', + '74.0.3729.12', + '73.0.3683.79', + '74.0.3729.11', + '75.0.3732.0', + '74.0.3729.10', + '73.0.3683.78', + '74.0.3729.9', + '74.0.3729.8', + '74.0.3729.7', + '75.0.3731.3', + '75.0.3731.2', + '75.0.3731.0', + '74.0.3729.6', + '73.0.3683.77', + '73.0.3683.76', + '75.0.3730.5', + '75.0.3730.4', + '73.0.3683.75', + '74.0.3729.5', + '73.0.3683.74', + '75.0.3730.3', + '75.0.3730.2', + '74.0.3729.4', + '73.0.3683.73', + '73.0.3683.72', + '75.0.3730.1', + '75.0.3730.0', + '74.0.3729.3', + '73.0.3683.71', + '74.0.3729.2', + '73.0.3683.70', + '74.0.3729.1', + '74.0.3729.0', + '74.0.3726.4', + '73.0.3683.69', + '74.0.3726.3', + '74.0.3728.0', + '74.0.3726.2', + '73.0.3683.68', + '74.0.3726.1', + '74.0.3726.0', + '74.0.3725.4', + '73.0.3683.67', + '73.0.3683.66', + '74.0.3725.3', + '74.0.3725.2', + '74.0.3725.1', + '74.0.3724.8', + '74.0.3725.0', + '73.0.3683.65', + '74.0.3724.7', + '74.0.3724.6', + '74.0.3724.5', + '74.0.3724.4', + '74.0.3724.3', + '74.0.3724.2', + '74.0.3724.1', + '74.0.3724.0', + '73.0.3683.64', + '74.0.3723.1', + '74.0.3723.0', + '73.0.3683.63', + '74.0.3722.1', + '74.0.3722.0', + '73.0.3683.62', + '74.0.3718.9', + '74.0.3702.3', + '74.0.3721.3', + '74.0.3721.2', + '74.0.3721.1', + '74.0.3721.0', + '74.0.3720.6', + '73.0.3683.61', + '72.0.3626.122', + '73.0.3683.60', + '74.0.3720.5', + '72.0.3626.121', + '74.0.3718.8', + '74.0.3720.4', + '74.0.3720.3', + '74.0.3718.7', + '74.0.3720.2', + '74.0.3720.1', + '74.0.3720.0', + '74.0.3718.6', + '74.0.3719.5', + '73.0.3683.59', + '74.0.3718.5', + '74.0.3718.4', + '74.0.3719.4', + '74.0.3719.3', + '74.0.3719.2', + '74.0.3719.1', + '73.0.3683.58', + '74.0.3719.0', + '73.0.3683.57', + '73.0.3683.56', + '74.0.3718.3', + '73.0.3683.55', + '74.0.3718.2', + '74.0.3718.1', + '74.0.3718.0', + '73.0.3683.54', + '74.0.3717.2', + '73.0.3683.53', + '74.0.3717.1', + '74.0.3717.0', + '73.0.3683.52', + '74.0.3716.1', + '74.0.3716.0', + '73.0.3683.51', + '74.0.3715.1', + '74.0.3715.0', + '73.0.3683.50', + '74.0.3711.2', + '74.0.3714.2', + '74.0.3713.3', + '74.0.3714.1', + '74.0.3714.0', + '73.0.3683.49', + '74.0.3713.1', + '74.0.3713.0', + '72.0.3626.120', + '73.0.3683.48', + '74.0.3712.2', + '74.0.3712.1', + '74.0.3712.0', + '73.0.3683.47', + '72.0.3626.119', + '73.0.3683.46', + '74.0.3710.2', + '72.0.3626.118', + '74.0.3711.1', + '74.0.3711.0', + '73.0.3683.45', + '72.0.3626.117', + '74.0.3710.1', + '74.0.3710.0', + '73.0.3683.44', + '72.0.3626.116', + '74.0.3709.1', + '74.0.3709.0', + '74.0.3704.9', + '73.0.3683.43', + '72.0.3626.115', + '74.0.3704.8', + '74.0.3704.7', + '74.0.3708.0', + '74.0.3706.7', + '74.0.3704.6', + '73.0.3683.42', + '72.0.3626.114', + '74.0.3706.6', + '72.0.3626.113', + '74.0.3704.5', + '74.0.3706.5', + '74.0.3706.4', + '74.0.3706.3', + '74.0.3706.2', + '74.0.3706.1', + '74.0.3706.0', + '73.0.3683.41', + '72.0.3626.112', + '74.0.3705.1', + '74.0.3705.0', + '73.0.3683.40', + '72.0.3626.111', + '73.0.3683.39', + '74.0.3704.4', + '73.0.3683.38', + '74.0.3704.3', + '74.0.3704.2', + '74.0.3704.1', + '74.0.3704.0', + '73.0.3683.37', + '72.0.3626.110', + '72.0.3626.109', + '74.0.3703.3', + '74.0.3703.2', + '73.0.3683.36', + '74.0.3703.1', + '74.0.3703.0', + '73.0.3683.35', + '72.0.3626.108', + '74.0.3702.2', + '74.0.3699.3', + '74.0.3702.1', + '74.0.3702.0', + '73.0.3683.34', + '72.0.3626.107', + '73.0.3683.33', + '74.0.3701.1', + '74.0.3701.0', + '73.0.3683.32', + '73.0.3683.31', + '72.0.3626.105', + '74.0.3700.1', + '74.0.3700.0', + '73.0.3683.29', + '72.0.3626.103', + '74.0.3699.2', + '74.0.3699.1', + '74.0.3699.0', + '73.0.3683.28', + '72.0.3626.102', + '73.0.3683.27', + '73.0.3683.26', + '74.0.3698.0', + '74.0.3696.2', + '72.0.3626.101', + '73.0.3683.25', + '74.0.3696.1', + '74.0.3696.0', + '74.0.3694.8', + '72.0.3626.100', + '74.0.3694.7', + '74.0.3694.6', + '74.0.3694.5', + '74.0.3694.4', + '72.0.3626.99', + '72.0.3626.98', + '74.0.3694.3', + '73.0.3683.24', + '72.0.3626.97', + '72.0.3626.96', + '72.0.3626.95', + '73.0.3683.23', + '72.0.3626.94', + '73.0.3683.22', + '73.0.3683.21', + '72.0.3626.93', + '74.0.3694.2', + '72.0.3626.92', + '74.0.3694.1', + '74.0.3694.0', + '74.0.3693.6', + '73.0.3683.20', + '72.0.3626.91', + '74.0.3693.5', + '74.0.3693.4', + '74.0.3693.3', + '74.0.3693.2', + '73.0.3683.19', + '74.0.3693.1', + '74.0.3693.0', + '73.0.3683.18', + '72.0.3626.90', + '74.0.3692.1', + '74.0.3692.0', + '73.0.3683.17', + '72.0.3626.89', + '74.0.3687.3', + '74.0.3691.1', + '74.0.3691.0', + '73.0.3683.16', + '72.0.3626.88', + '72.0.3626.87', + '73.0.3683.15', + '74.0.3690.1', + '74.0.3690.0', + '73.0.3683.14', + '72.0.3626.86', + '73.0.3683.13', + '73.0.3683.12', + '74.0.3689.1', + '74.0.3689.0', + '73.0.3683.11', + '72.0.3626.85', + '73.0.3683.10', + '72.0.3626.84', + '73.0.3683.9', + '74.0.3688.1', + '74.0.3688.0', + '73.0.3683.8', + '72.0.3626.83', + '74.0.3687.2', + '74.0.3687.1', + '74.0.3687.0', + '73.0.3683.7', + '72.0.3626.82', + '74.0.3686.4', + '72.0.3626.81', + '74.0.3686.3', + '74.0.3686.2', + '74.0.3686.1', + '74.0.3686.0', + '73.0.3683.6', + '72.0.3626.80', + '74.0.3685.1', + '74.0.3685.0', + '73.0.3683.5', + '72.0.3626.79', + '74.0.3684.1', + '74.0.3684.0', + '73.0.3683.4', + '72.0.3626.78', + '72.0.3626.77', + '73.0.3683.3', + '73.0.3683.2', + '72.0.3626.76', + '73.0.3683.1', + '73.0.3683.0', + '72.0.3626.75', + '71.0.3578.141', + '73.0.3682.1', + '73.0.3682.0', + '72.0.3626.74', + '71.0.3578.140', + '73.0.3681.4', + '73.0.3681.3', + '73.0.3681.2', + '73.0.3681.1', + '73.0.3681.0', + '72.0.3626.73', + '71.0.3578.139', + '72.0.3626.72', + '72.0.3626.71', + '73.0.3680.1', + '73.0.3680.0', + '72.0.3626.70', + '71.0.3578.138', + '73.0.3678.2', + '73.0.3679.1', + '73.0.3679.0', + '72.0.3626.69', + '71.0.3578.137', + '73.0.3678.1', + '73.0.3678.0', + '71.0.3578.136', + '73.0.3677.1', + '73.0.3677.0', + '72.0.3626.68', + '72.0.3626.67', + '71.0.3578.135', + '73.0.3676.1', + '73.0.3676.0', + '73.0.3674.2', + '72.0.3626.66', + '71.0.3578.134', + '73.0.3674.1', + '73.0.3674.0', + '72.0.3626.65', + '71.0.3578.133', + '73.0.3673.2', + '73.0.3673.1', + '73.0.3673.0', + '72.0.3626.64', + '71.0.3578.132', + '72.0.3626.63', + '72.0.3626.62', + '72.0.3626.61', + '72.0.3626.60', + '73.0.3672.1', + '73.0.3672.0', + '72.0.3626.59', + '71.0.3578.131', + '73.0.3671.3', + '73.0.3671.2', + '73.0.3671.1', + '73.0.3671.0', + '72.0.3626.58', + '71.0.3578.130', + '73.0.3670.1', + '73.0.3670.0', + '72.0.3626.57', + '71.0.3578.129', + '73.0.3669.1', + '73.0.3669.0', + '72.0.3626.56', + '71.0.3578.128', + '73.0.3668.2', + '73.0.3668.1', + '73.0.3668.0', + '72.0.3626.55', + '71.0.3578.127', + '73.0.3667.2', + '73.0.3667.1', + '73.0.3667.0', + '72.0.3626.54', + '71.0.3578.126', + '73.0.3666.1', + '73.0.3666.0', + '72.0.3626.53', + '71.0.3578.125', + '73.0.3665.4', + '73.0.3665.3', + '72.0.3626.52', + '73.0.3665.2', + '73.0.3664.4', + '73.0.3665.1', + '73.0.3665.0', + '72.0.3626.51', + '71.0.3578.124', + '72.0.3626.50', + '73.0.3664.3', + '73.0.3664.2', + '73.0.3664.1', + '73.0.3664.0', + '73.0.3663.2', + '72.0.3626.49', + '71.0.3578.123', + '73.0.3663.1', + '73.0.3663.0', + '72.0.3626.48', + '71.0.3578.122', + '73.0.3662.1', + '73.0.3662.0', + '72.0.3626.47', + '71.0.3578.121', + '73.0.3661.1', + '72.0.3626.46', + '73.0.3661.0', + '72.0.3626.45', + '71.0.3578.120', + '73.0.3660.2', + '73.0.3660.1', + '73.0.3660.0', + '72.0.3626.44', + '71.0.3578.119', + '73.0.3659.1', + '73.0.3659.0', + '72.0.3626.43', + '71.0.3578.118', + '73.0.3658.1', + '73.0.3658.0', + '72.0.3626.42', + '71.0.3578.117', + '73.0.3657.1', + '73.0.3657.0', + '72.0.3626.41', + '71.0.3578.116', + '73.0.3656.1', + '73.0.3656.0', + '72.0.3626.40', + '71.0.3578.115', + '73.0.3655.1', + '73.0.3655.0', + '72.0.3626.39', + '71.0.3578.114', + '73.0.3654.1', + '73.0.3654.0', + '72.0.3626.38', + '71.0.3578.113', + '73.0.3653.1', + '73.0.3653.0', + '72.0.3626.37', + '71.0.3578.112', + '73.0.3652.1', + '73.0.3652.0', + '72.0.3626.36', + '71.0.3578.111', + '73.0.3651.1', + '73.0.3651.0', + '72.0.3626.35', + '71.0.3578.110', + '73.0.3650.1', + '73.0.3650.0', + '72.0.3626.34', + '71.0.3578.109', + '73.0.3649.1', + '73.0.3649.0', + '72.0.3626.33', + '71.0.3578.108', + '73.0.3648.2', + '73.0.3648.1', + '73.0.3648.0', + '72.0.3626.32', + '71.0.3578.107', + '73.0.3647.2', + '73.0.3647.1', + '73.0.3647.0', + '72.0.3626.31', + '71.0.3578.106', + '73.0.3635.3', + '73.0.3646.2', + '73.0.3646.1', + '73.0.3646.0', + '72.0.3626.30', + '71.0.3578.105', + '72.0.3626.29', + '73.0.3645.2', + '73.0.3645.1', + '73.0.3645.0', + '72.0.3626.28', + '71.0.3578.104', + '72.0.3626.27', + '72.0.3626.26', + '72.0.3626.25', + '72.0.3626.24', + '73.0.3644.0', + '73.0.3643.2', + '72.0.3626.23', + '71.0.3578.103', + '73.0.3643.1', + '73.0.3643.0', + '72.0.3626.22', + '71.0.3578.102', + '73.0.3642.1', + '73.0.3642.0', + '72.0.3626.21', + '71.0.3578.101', + '73.0.3641.1', + '73.0.3641.0', + '72.0.3626.20', + '71.0.3578.100', + '72.0.3626.19', + '73.0.3640.1', + '73.0.3640.0', + '72.0.3626.18', + '73.0.3639.1', + '71.0.3578.99', + '73.0.3639.0', + '72.0.3626.17', + '73.0.3638.2', + '72.0.3626.16', + '73.0.3638.1', + '73.0.3638.0', + '72.0.3626.15', + '71.0.3578.98', + '73.0.3635.2', + '71.0.3578.97', + '73.0.3637.1', + '73.0.3637.0', + '72.0.3626.14', + '71.0.3578.96', + '71.0.3578.95', + '72.0.3626.13', + '71.0.3578.94', + '73.0.3636.2', + '71.0.3578.93', + '73.0.3636.1', + '73.0.3636.0', + '72.0.3626.12', + '71.0.3578.92', + '73.0.3635.1', + '73.0.3635.0', + '72.0.3626.11', + '71.0.3578.91', + '73.0.3634.2', + '73.0.3634.1', + '73.0.3634.0', + '72.0.3626.10', + '71.0.3578.90', + '71.0.3578.89', + '73.0.3633.2', + '73.0.3633.1', + '73.0.3633.0', + '72.0.3610.4', + '72.0.3626.9', + '71.0.3578.88', + '73.0.3632.5', + '73.0.3632.4', + '73.0.3632.3', + '73.0.3632.2', + '73.0.3632.1', + '73.0.3632.0', + '72.0.3626.8', + '71.0.3578.87', + '73.0.3631.2', + '73.0.3631.1', + '73.0.3631.0', + '72.0.3626.7', + '71.0.3578.86', + '72.0.3626.6', + '73.0.3630.1', + '73.0.3630.0', + '72.0.3626.5', + '71.0.3578.85', + '72.0.3626.4', + '73.0.3628.3', + '73.0.3628.2', + '73.0.3629.1', + '73.0.3629.0', + '72.0.3626.3', + '71.0.3578.84', + '73.0.3628.1', + '73.0.3628.0', + '71.0.3578.83', + '73.0.3627.1', + '73.0.3627.0', + '72.0.3626.2', + '71.0.3578.82', + '71.0.3578.81', + '71.0.3578.80', + '72.0.3626.1', + '72.0.3626.0', + '71.0.3578.79', + '70.0.3538.124', + '71.0.3578.78', + '72.0.3623.4', + '72.0.3625.2', + '72.0.3625.1', + '72.0.3625.0', + '71.0.3578.77', + '70.0.3538.123', + '72.0.3624.4', + '72.0.3624.3', + '72.0.3624.2', + '71.0.3578.76', + '72.0.3624.1', + '72.0.3624.0', + '72.0.3623.3', + '71.0.3578.75', + '70.0.3538.122', + '71.0.3578.74', + '72.0.3623.2', + '72.0.3610.3', + '72.0.3623.1', + '72.0.3623.0', + '72.0.3622.3', + '72.0.3622.2', + '71.0.3578.73', + '70.0.3538.121', + '72.0.3622.1', + '72.0.3622.0', + '71.0.3578.72', + '70.0.3538.120', + '72.0.3621.1', + '72.0.3621.0', + '71.0.3578.71', + '70.0.3538.119', + '72.0.3620.1', + '72.0.3620.0', + '71.0.3578.70', + '70.0.3538.118', + '71.0.3578.69', + '72.0.3619.1', + '72.0.3619.0', + '71.0.3578.68', + '70.0.3538.117', + '71.0.3578.67', + '72.0.3618.1', + '72.0.3618.0', + '71.0.3578.66', + '70.0.3538.116', + '72.0.3617.1', + '72.0.3617.0', + '71.0.3578.65', + '70.0.3538.115', + '72.0.3602.3', + '71.0.3578.64', + '72.0.3616.1', + '72.0.3616.0', + '71.0.3578.63', + '70.0.3538.114', + '71.0.3578.62', + '72.0.3615.1', + '72.0.3615.0', + '71.0.3578.61', + '70.0.3538.113', + '72.0.3614.1', + '72.0.3614.0', + '71.0.3578.60', + '70.0.3538.112', + '72.0.3613.1', + '72.0.3613.0', + '71.0.3578.59', + '70.0.3538.111', + '72.0.3612.2', + '72.0.3612.1', + '72.0.3612.0', + '70.0.3538.110', + '71.0.3578.58', + '70.0.3538.109', + '72.0.3611.2', + '72.0.3611.1', + '72.0.3611.0', + '71.0.3578.57', + '70.0.3538.108', + '72.0.3610.2', + '71.0.3578.56', + '71.0.3578.55', + '72.0.3610.1', + '72.0.3610.0', + '71.0.3578.54', + '70.0.3538.107', + '71.0.3578.53', + '72.0.3609.3', + '71.0.3578.52', + '72.0.3609.2', + '71.0.3578.51', + '72.0.3608.5', + '72.0.3609.1', + '72.0.3609.0', + '71.0.3578.50', + '70.0.3538.106', + '72.0.3608.4', + '72.0.3608.3', + '72.0.3608.2', + '71.0.3578.49', + '72.0.3608.1', + '72.0.3608.0', + '70.0.3538.105', + '71.0.3578.48', + '72.0.3607.1', + '72.0.3607.0', + '71.0.3578.47', + '70.0.3538.104', + '72.0.3606.2', + '72.0.3606.1', + '72.0.3606.0', + '71.0.3578.46', + '70.0.3538.103', + '70.0.3538.102', + '72.0.3605.3', + '72.0.3605.2', + '72.0.3605.1', + '72.0.3605.0', + '71.0.3578.45', + '70.0.3538.101', + '71.0.3578.44', + '71.0.3578.43', + '70.0.3538.100', + '70.0.3538.99', + '71.0.3578.42', + '72.0.3604.1', + '72.0.3604.0', + '71.0.3578.41', + '70.0.3538.98', + '71.0.3578.40', + '72.0.3603.2', + '72.0.3603.1', + '72.0.3603.0', + '71.0.3578.39', + '70.0.3538.97', + '72.0.3602.2', + '71.0.3578.38', + '71.0.3578.37', + '72.0.3602.1', + '72.0.3602.0', + '71.0.3578.36', + '70.0.3538.96', + '72.0.3601.1', + '72.0.3601.0', + '71.0.3578.35', + '70.0.3538.95', + '72.0.3600.1', + '72.0.3600.0', + '71.0.3578.34', + '70.0.3538.94', + '72.0.3599.3', + '72.0.3599.2', + '72.0.3599.1', + '72.0.3599.0', + '71.0.3578.33', + '70.0.3538.93', + '72.0.3598.1', + '72.0.3598.0', + '71.0.3578.32', + '70.0.3538.87', + '72.0.3597.1', + '72.0.3597.0', + '72.0.3596.2', + '71.0.3578.31', + '70.0.3538.86', + '71.0.3578.30', + '71.0.3578.29', + '72.0.3596.1', + '72.0.3596.0', + '71.0.3578.28', + '70.0.3538.85', + '72.0.3595.2', + '72.0.3591.3', + '72.0.3595.1', + '72.0.3595.0', + '71.0.3578.27', + '70.0.3538.84', + '72.0.3594.1', + '72.0.3594.0', + '71.0.3578.26', + '70.0.3538.83', + '72.0.3593.2', + '72.0.3593.1', + '72.0.3593.0', + '71.0.3578.25', + '70.0.3538.82', + '72.0.3589.3', + '72.0.3592.2', + '72.0.3592.1', + '72.0.3592.0', + '71.0.3578.24', + '72.0.3589.2', + '70.0.3538.81', + '70.0.3538.80', + '72.0.3591.2', + '72.0.3591.1', + '72.0.3591.0', + '71.0.3578.23', + '70.0.3538.79', + '71.0.3578.22', + '72.0.3590.1', + '72.0.3590.0', + '71.0.3578.21', + '70.0.3538.78', + '70.0.3538.77', + '72.0.3589.1', + '72.0.3589.0', + '71.0.3578.20', + '70.0.3538.76', + '71.0.3578.19', + '70.0.3538.75', + '72.0.3588.1', + '72.0.3588.0', + '71.0.3578.18', + '70.0.3538.74', + '72.0.3586.2', + '72.0.3587.0', + '71.0.3578.17', + '70.0.3538.73', + '72.0.3586.1', + '72.0.3586.0', + '71.0.3578.16', + '70.0.3538.72', + '72.0.3585.1', + '72.0.3585.0', + '71.0.3578.15', + '70.0.3538.71', + '71.0.3578.14', + '72.0.3584.1', + '72.0.3584.0', + '71.0.3578.13', + '70.0.3538.70', + '72.0.3583.2', + '71.0.3578.12', + '72.0.3583.1', + '72.0.3583.0', + '71.0.3578.11', + '70.0.3538.69', + '71.0.3578.10', + '72.0.3582.0', + '72.0.3581.4', + '71.0.3578.9', + '70.0.3538.67', + '72.0.3581.3', + '72.0.3581.2', + '72.0.3581.1', + '72.0.3581.0', + '71.0.3578.8', + '70.0.3538.66', + '72.0.3580.1', + '72.0.3580.0', + '71.0.3578.7', + '70.0.3538.65', + '71.0.3578.6', + '72.0.3579.1', + '72.0.3579.0', + '71.0.3578.5', + '70.0.3538.64', + '71.0.3578.4', + '71.0.3578.3', + '71.0.3578.2', + '71.0.3578.1', + '71.0.3578.0', + '70.0.3538.63', + '69.0.3497.128', + '70.0.3538.62', + '70.0.3538.61', + '70.0.3538.60', + '70.0.3538.59', + '71.0.3577.1', + '71.0.3577.0', + '70.0.3538.58', + '69.0.3497.127', + '71.0.3576.2', + '71.0.3576.1', + '71.0.3576.0', + '70.0.3538.57', + '70.0.3538.56', + '71.0.3575.2', + '70.0.3538.55', + '69.0.3497.126', + '70.0.3538.54', + '71.0.3575.1', + '71.0.3575.0', + '71.0.3574.1', + '71.0.3574.0', + '70.0.3538.53', + '69.0.3497.125', + '70.0.3538.52', + '71.0.3573.1', + '71.0.3573.0', + '70.0.3538.51', + '69.0.3497.124', + '71.0.3572.1', + '71.0.3572.0', + '70.0.3538.50', + '69.0.3497.123', + '71.0.3571.2', + '70.0.3538.49', + '69.0.3497.122', + '71.0.3571.1', + '71.0.3571.0', + '70.0.3538.48', + '69.0.3497.121', + '71.0.3570.1', + '71.0.3570.0', + '70.0.3538.47', + '69.0.3497.120', + '71.0.3568.2', + '71.0.3569.1', + '71.0.3569.0', + '70.0.3538.46', + '69.0.3497.119', + '70.0.3538.45', + '71.0.3568.1', + '71.0.3568.0', + '70.0.3538.44', + '69.0.3497.118', + '70.0.3538.43', + '70.0.3538.42', + '71.0.3567.1', + '71.0.3567.0', + '70.0.3538.41', + '69.0.3497.117', + '71.0.3566.1', + '71.0.3566.0', + '70.0.3538.40', + '69.0.3497.116', + '71.0.3565.1', + '71.0.3565.0', + '70.0.3538.39', + '69.0.3497.115', + '71.0.3564.1', + '71.0.3564.0', + '70.0.3538.38', + '69.0.3497.114', + '71.0.3563.0', + '71.0.3562.2', + '70.0.3538.37', + '69.0.3497.113', + '70.0.3538.36', + '70.0.3538.35', + '71.0.3562.1', + '71.0.3562.0', + '70.0.3538.34', + '69.0.3497.112', + '70.0.3538.33', + '71.0.3561.1', + '71.0.3561.0', + '70.0.3538.32', + '69.0.3497.111', + '71.0.3559.6', + '71.0.3560.1', + '71.0.3560.0', + '71.0.3559.5', + '71.0.3559.4', + '70.0.3538.31', + '69.0.3497.110', + '71.0.3559.3', + '70.0.3538.30', + '69.0.3497.109', + '71.0.3559.2', + '71.0.3559.1', + '71.0.3559.0', + '70.0.3538.29', + '69.0.3497.108', + '71.0.3558.2', + '71.0.3558.1', + '71.0.3558.0', + '70.0.3538.28', + '69.0.3497.107', + '71.0.3557.2', + '71.0.3557.1', + '71.0.3557.0', + '70.0.3538.27', + '69.0.3497.106', + '71.0.3554.4', + '70.0.3538.26', + '71.0.3556.1', + '71.0.3556.0', + '70.0.3538.25', + '71.0.3554.3', + '69.0.3497.105', + '71.0.3554.2', + '70.0.3538.24', + '69.0.3497.104', + '71.0.3555.2', + '70.0.3538.23', + '71.0.3555.1', + '71.0.3555.0', + '70.0.3538.22', + '69.0.3497.103', + '71.0.3554.1', + '71.0.3554.0', + '70.0.3538.21', + '69.0.3497.102', + '71.0.3553.3', + '70.0.3538.20', + '69.0.3497.101', + '71.0.3553.2', + '69.0.3497.100', + '71.0.3553.1', + '71.0.3553.0', + '70.0.3538.19', + '69.0.3497.99', + '69.0.3497.98', + '69.0.3497.97', + '71.0.3552.6', + '71.0.3552.5', + '71.0.3552.4', + '71.0.3552.3', + '71.0.3552.2', + '71.0.3552.1', + '71.0.3552.0', + '70.0.3538.18', + '69.0.3497.96', + '71.0.3551.3', + '71.0.3551.2', + '71.0.3551.1', + '71.0.3551.0', + '70.0.3538.17', + '69.0.3497.95', + '71.0.3550.3', + '71.0.3550.2', + '71.0.3550.1', + '71.0.3550.0', + '70.0.3538.16', + '69.0.3497.94', + '71.0.3549.1', + '71.0.3549.0', + '70.0.3538.15', + '69.0.3497.93', + '69.0.3497.92', + '71.0.3548.1', + '71.0.3548.0', + '70.0.3538.14', + '69.0.3497.91', + '71.0.3547.1', + '71.0.3547.0', + '70.0.3538.13', + '69.0.3497.90', + '71.0.3546.2', + '69.0.3497.89', + '71.0.3546.1', + '71.0.3546.0', + '70.0.3538.12', + '69.0.3497.88', + '71.0.3545.4', + '71.0.3545.3', + '71.0.3545.2', + '71.0.3545.1', + '71.0.3545.0', + '70.0.3538.11', + '69.0.3497.87', + '71.0.3544.5', + '71.0.3544.4', + '71.0.3544.3', + '71.0.3544.2', + '71.0.3544.1', + '71.0.3544.0', + '69.0.3497.86', + '70.0.3538.10', + '69.0.3497.85', + '70.0.3538.9', + '69.0.3497.84', + '71.0.3543.4', + '70.0.3538.8', + '71.0.3543.3', + '71.0.3543.2', + '71.0.3543.1', + '71.0.3543.0', + '70.0.3538.7', + '69.0.3497.83', + '71.0.3542.2', + '71.0.3542.1', + '71.0.3542.0', + '70.0.3538.6', + '69.0.3497.82', + '69.0.3497.81', + '71.0.3541.1', + '71.0.3541.0', + '70.0.3538.5', + '69.0.3497.80', + '71.0.3540.1', + '71.0.3540.0', + '70.0.3538.4', + '69.0.3497.79', + '70.0.3538.3', + '71.0.3539.1', + '71.0.3539.0', + '69.0.3497.78', + '68.0.3440.134', + '69.0.3497.77', + '70.0.3538.2', + '70.0.3538.1', + '70.0.3538.0', + '69.0.3497.76', + '68.0.3440.133', + '69.0.3497.75', + '70.0.3537.2', + '70.0.3537.1', + '70.0.3537.0', + '69.0.3497.74', + '68.0.3440.132', + '70.0.3536.0', + '70.0.3535.5', + '70.0.3535.4', + '70.0.3535.3', + '69.0.3497.73', + '68.0.3440.131', + '70.0.3532.8', + '70.0.3532.7', + '69.0.3497.72', + '69.0.3497.71', + '70.0.3535.2', + '70.0.3535.1', + '70.0.3535.0', + '69.0.3497.70', + '68.0.3440.130', + '69.0.3497.69', + '68.0.3440.129', + '70.0.3534.4', + '70.0.3534.3', + '70.0.3534.2', + '70.0.3534.1', + '70.0.3534.0', + '69.0.3497.68', + '68.0.3440.128', + '70.0.3533.2', + '70.0.3533.1', + '70.0.3533.0', + '69.0.3497.67', + '68.0.3440.127', + '70.0.3532.6', + '70.0.3532.5', + '70.0.3532.4', + '69.0.3497.66', + '68.0.3440.126', + '70.0.3532.3', + '70.0.3532.2', + '70.0.3532.1', + '69.0.3497.60', + '69.0.3497.65', + '69.0.3497.64', + '70.0.3532.0', + '70.0.3531.0', + '70.0.3530.4', + '70.0.3530.3', + '70.0.3530.2', + '69.0.3497.58', + '68.0.3440.125', + '69.0.3497.57', + '69.0.3497.56', + '69.0.3497.55', + '69.0.3497.54', + '70.0.3530.1', + '70.0.3530.0', + '69.0.3497.53', + '68.0.3440.124', + '69.0.3497.52', + '70.0.3529.3', + '70.0.3529.2', + '70.0.3529.1', + '70.0.3529.0', + '69.0.3497.51', + '70.0.3528.4', + '68.0.3440.123', + '70.0.3528.3', + '70.0.3528.2', + '70.0.3528.1', + '70.0.3528.0', + '69.0.3497.50', + '68.0.3440.122', + '70.0.3527.1', + '70.0.3527.0', + '69.0.3497.49', + '68.0.3440.121', + '70.0.3526.1', + '70.0.3526.0', + '68.0.3440.120', + '69.0.3497.48', + '69.0.3497.47', + '68.0.3440.119', + '68.0.3440.118', + '70.0.3525.5', + '70.0.3525.4', + '70.0.3525.3', + '68.0.3440.117', + '69.0.3497.46', + '70.0.3525.2', + '70.0.3525.1', + '70.0.3525.0', + '69.0.3497.45', + '68.0.3440.116', + '70.0.3524.4', + '70.0.3524.3', + '69.0.3497.44', + '70.0.3524.2', + '70.0.3524.1', + '70.0.3524.0', + '70.0.3523.2', + '69.0.3497.43', + '68.0.3440.115', + '70.0.3505.9', + '69.0.3497.42', + '70.0.3505.8', + '70.0.3523.1', + '70.0.3523.0', + '69.0.3497.41', + '68.0.3440.114', + '70.0.3505.7', + '69.0.3497.40', + '70.0.3522.1', + '70.0.3522.0', + '70.0.3521.2', + '69.0.3497.39', + '68.0.3440.113', + '70.0.3505.6', + '70.0.3521.1', + '70.0.3521.0', + '69.0.3497.38', + '68.0.3440.112', + '70.0.3520.1', + '70.0.3520.0', + '69.0.3497.37', + '68.0.3440.111', + '70.0.3519.3', + '70.0.3519.2', + '70.0.3519.1', + '70.0.3519.0', + '69.0.3497.36', + '68.0.3440.110', + '70.0.3518.1', + '70.0.3518.0', + '69.0.3497.35', + '69.0.3497.34', + '68.0.3440.109', + '70.0.3517.1', + '70.0.3517.0', + '69.0.3497.33', + '68.0.3440.108', + '69.0.3497.32', + '70.0.3516.3', + '70.0.3516.2', + '70.0.3516.1', + '70.0.3516.0', + '69.0.3497.31', + '68.0.3440.107', + '70.0.3515.4', + '68.0.3440.106', + '70.0.3515.3', + '70.0.3515.2', + '70.0.3515.1', + '70.0.3515.0', + '69.0.3497.30', + '68.0.3440.105', + '68.0.3440.104', + '70.0.3514.2', + '70.0.3514.1', + '70.0.3514.0', + '69.0.3497.29', + '68.0.3440.103', + '70.0.3513.1', + '70.0.3513.0', + '69.0.3497.28', + ) + return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + + std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0', + 'User-Agent': random_user_agent(), 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From c5606802474822887b75af7de23de6679264c0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Jun 2019 00:33:35 +0700 Subject: [PATCH 399/785] [soundcloud] Update client id --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 277c3c7b4..3a8626e02 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -221,7 +221,7 @@ class SoundcloudIE(InfoExtractor): } ] - _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7' + _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' @staticmethod def _extract_urls(webpage): From 5e3da0d42b3d16465a95451276f021ecd0b7bd75 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Jul 2019 08:37:21 +0100 Subject: [PATCH 400/785] [dailymotion] add support embed with DM.player js call --- youtube_dl/extractor/dailymotion.py | 12 +++++++++--- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1a2c1308a..3d3d78041 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -137,10 +137,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor): @staticmethod def _extract_urls(webpage): + urls = [] # Look for embedded Dailymotion player - matches = re.findall( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) - return list(map(lambda m: unescapeHTML(m[1]), matches)) + # https://developer.dailymotion.com/player#player-parameters + for mobj in re.finditer( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage): + urls.append(unescapeHTML(mobj.group('url'))) + for mobj in re.finditer( + r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): + urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eeb0d25f6..77e217460 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2104,6 +2104,23 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to download MPD manifest'], }, + { + # DailyMotion embed with DM.player + 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804', + 'info_dict': { + 'id': 'k6aKkGHd9FJs4mtJN39', + 'ext': 'mp4', + 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final', + 'description': 'This video is private.', + 'uploader_id': 'x1jf30l', + 'uploader': 'beIN SPORTS USA', + 'upload_date': '20190528', + 'timestamp': 1559062971, + }, + 'params': { + 'skip_download': True, + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject From 976e1ff7f9be76588f5e6d4a569a49694072e08b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Jul 2019 12:05:18 +0100 Subject: [PATCH 401/785] [acast] add support for URLs with episode id(closes #21444) --- youtube_dl/extractor/acast.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index c4362be88..b17c792d2 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -7,6 +7,7 @@ import functools from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, float_or_none, int_or_none, try_get, @@ -27,7 +28,7 @@ class ACastIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', - 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', + 'md5': '16d936099ec5ca2d5869e3a813ee8dc4', 'info_dict': { 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'ext': 'mp3', @@ -46,28 +47,37 @@ class ACastIE(InfoExtractor): }, { 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', + 'only_matching': True, }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() s = self._download_json( - 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id), - display_id)['result'] + 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), + display_id) media_url = s['url'] + if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): + episode_url = s.get('episodeUrl') + if episode_url: + display_id = episode_url + else: + channel, display_id = re.match(self._VALID_URL, s['link']).groups() cast_data = self._download_json( 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)['result'] e = cast_data['episode'] - title = e['name'] + title = e.get('name') or s['title'] return { 'id': compat_str(e['id']), 'display_id': display_id, 'url': media_url, 'title': title, - 'description': e.get('description') or e.get('summary'), + 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), 'thumbnail': e.get('image'), - 'timestamp': unified_timestamp(e.get('publishingDate')), - 'duration': float_or_none(s.get('duration') or e.get('duration')), + 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), + 'duration': float_or_none(e.get('duration') or s.get('duration')), 'filesize': int_or_none(e.get('contentLength')), 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), From 4e2491f066f81ee9e941c48a910982ec6ac286b5 Mon Sep 17 00:00:00 2001 From: xyssy <52385286+xyssy@users.noreply.github.com> Date: Mon, 1 Jul 2019 12:05:51 -0500 Subject: [PATCH 402/785] [yourporn] Fix extraction (#21585) --- youtube_dl/extractor/yourporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index b1d1eb6b6..8a2d5f63b 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -37,7 +37,7 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn4/') + video_id)[video_id]).replace('/cdn/', '/cdn5/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', From 918398092c5049a6edf940ebe3c2dd46916ee93c Mon Sep 17 00:00:00 2001 From: Fai <4016742+aicest@users.noreply.github.com> Date: Tue, 2 Jul 2019 01:10:55 +0800 Subject: [PATCH 403/785] [xiami] Update API base URL (#21575) --- youtube_dl/extractor/xiami.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 8333fb534..618da8382 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -7,7 +7,7 @@ from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): - _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' + _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id' def _download_webpage_handle(self, *args, **kwargs): webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) From 9baf69af450a90ead36af6d205cd0afc87b79253 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Mon, 1 Jul 2019 18:11:38 +0100 Subject: [PATCH 404/785] [openload] Add support for oload.biz (#21574) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 237b0d8fb..11e92e471 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -365,6 +365,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', 'only_matching': True, + }, { + 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From d1e41164272a2993816548beebd0d5ef4effafe8 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 1 Jul 2019 19:13:23 +0200 Subject: [PATCH 405/785] [vevo] Add support for embed.vevo.com URLs (#21565) --- youtube_dl/extractor/vevo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 232e05816..4ea9f1b4b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -34,6 +34,7 @@ class VevoIE(VevoBaseIE): (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + https?://embed\.vevo\.com/.*?[?&]isrc=| vevo:) (?P[^&?#]+)''' @@ -144,6 +145,9 @@ class VevoIE(VevoBaseIE): # Geo-restricted to Netherlands/Germany 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 'only_matching': True, + }, { + 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', + 'only_matching': True, }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions From c8343f0a4331bd2f561fd67b9b272afb60147a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Jul 2019 01:07:54 +0700 Subject: [PATCH 406/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4ae3d6c7c..9deeb884a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core ++ [utils] Introduce random_user_agent and use as default User-Agent (#21546) + +Extractors ++ [vevo] Add support for embed.vevo.com URLs (#21565) ++ [openload] Add support for oload.biz (#21574) +* [xiami] Update API base URL (#21575) +* [yourporn] Fix extraction (#21585) ++ [acast] Add support for URLs with episode id (#21444) ++ [dailymotion] Add support for DM.player embeds +* [soundcloud] Update client id + + version 2019.06.27 Extractors From 1335bf10f69b5d2c45b386d3faf71398b9662f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Jul 2019 01:09:59 +0700 Subject: [PATCH 407/785] release 2019.07.02 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d7c15e85a..fb0d33b8f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 741862590..3c95565a6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 4fb035ea4..7410776d7 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 73ed62012..cc52bcca6 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index a9d3653e2..bbd421b1a 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 9deeb884a..5ce78b07a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.02 Core + [utils] Introduce random_user_agent and use as default User-Agent (#21546) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 01896873d..78fe54326 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.27' +__version__ = '2019.07.02' From ff0f4cfeba73d17c74caa05b55da610d903ae4d3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 Jul 2019 22:07:01 +0100 Subject: [PATCH 408/785] [arte] clean extractor(closes #15583)(closes #21614) --- youtube_dl/extractor/arte.py | 330 +++-------------------------- youtube_dl/extractor/extractors.py | 9 - 2 files changed, 29 insertions(+), 310 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ffc321821..2bd3bfe8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -4,17 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - find_xpath_attr, - get_element_by_attribute, int_or_none, - NO_DEFAULT, qualities, try_get, unified_strdate, @@ -25,59 +18,7 @@ from ..utils import ( # add tests. -class ArteTvIE(InfoExtractor): - _VALID_URL = r'https?://videos\.arte\.tv/(?Pfr|de|en|es)/.*-(?P.*?)\.html' - IE_NAME = 'arte.tv' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - lang = mobj.group('lang') - video_id = mobj.group('id') - - ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') - ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') - ref_xml_doc = self._download_xml( - ref_xml_url, video_id, note='Downloading metadata') - config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) - config_xml_url = config_node.attrib['ref'] - config = self._download_xml( - config_xml_url, video_id, note='Downloading configuration') - - formats = [{ - 'format_id': q.attrib['quality'], - # The playpath starts at 'mp4:', if we don't manually - # split the url, rtmpdump will incorrectly parse them - 'url': q.text.split('mp4:', 1)[0], - 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], - 'ext': 'flv', - 'quality': 2 if q.attrib['quality'] == 'hd' else 1, - } for q in config.findall('./urls/url')] - self._sort_formats(formats) - - title = config.find('.//name').text - thumbnail = config.find('.//firstThumbnailUrl').text - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class ArteTVBaseIE(InfoExtractor): - @classmethod - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - lang = mobj.group('lang') - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - if 'vid' in query: - video_id = query['vid'][0] - else: - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') - return video_id, lang - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor): 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) + qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) LANGS = { 'fr': 'F', 'de': 'A', 'en': 'E[ANG]', 'es': 'E[ESP]', + 'it': 'E[ITA]', + 'pl': 'E[POL]', } langcode = LANGS.get(lang, lang) @@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor): l = re.escape(langcode) # Language preference from most to least priority - # Reference: section 5.6.3 of - # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + # Reference: section 6.8 of + # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf PREFERENCES = ( # original version in requested language, without subtitles r'VO{0}$'.format(l), @@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?Pfr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?P\d{6}-\d{3}-[AF])' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', - 'only_matching': True, - }, { - 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', - 'only_matching': True, - }, { - 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn', - 'only_matching': True, + 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', + 'info_dict': { + 'id': '088501-000-A', + 'ext': 'mp4', + 'title': 'Mexico: Stealing Petrol to Survive', + 'upload_date': '20190628', + }, }] - @classmethod - def suitable(cls, url): - return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - entries = [ - self.url_result(url) - for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] - return self.playlist_result(entries) - - -# It also uses the arte_vp_url url from the webpage to extract the information -class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', - 'info_dict': { - 'id': '057405-001-A', - 'ext': 'mp4', - 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', - 'upload_date': '20150716', - }, - }, { - 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'playlist_count': 11, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', - 'only_matching': True, - }] - - -class ArteTVInfoIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:info' - _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', - 'info_dict': { - 'id': '067528-000-A', - 'ext': 'mp4', - 'title': 'Service civique, un cache misère ?', - 'upload_date': '20160403', - }, - }] - - -class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:future' - _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', - 'info_dict': { - 'id': '050940-028-A', - 'ext': 'mp4', - 'title': 'Les écrevisses aussi peuvent être anxieuses', - 'upload_date': '20140902', - }, - }, { - 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', - 'only_matching': True, - }] - - -class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:ddc' - _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' - - _TESTS = [] - - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - if lang == 'folge': - lang = 'de' - elif lang == 'emission': - lang = 'fr' - webpage = self._download_webpage(url, video_id) - scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) - script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') - javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') - json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') - return self._extract_from_json_url(json_url, video_id, lang) - - -class ArteTVConcertIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:concert' - _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', - 'md5': '9ea035b7bd69696b67aa2ccaaa218161', - 'info_dict': { - 'id': '186', - 'ext': 'mp4', - 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', - 'upload_date': '20140128', - 'description': 'md5:486eb08f991552ade77439fe6d82c305', - }, - }] - - -class ArteTVCinemaIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:cinema' - _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', - 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', - 'info_dict': { - 'id': '062494-000-A', - 'ext': 'mp4', - 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', - 'upload_date': '20150807', - }, - }] - - -class ArteTVMagazineIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:magazine' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - # Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..." - 'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium', - 'md5': '2a9369bcccf847d1c741e51416299f25', - 'info_dict': { - 'id': '065965-000-A', - 'ext': 'mp4', - 'title': 'Trepalium - Extrait Ep.01', - 'upload_date': '20160121', - }, - }, { - # Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium" - 'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium', - 'md5': 'fedc64fc7a946110fe311634e79782ca', - 'info_dict': { - 'id': '054813-004_PLUS7-F', - 'ext': 'mp4', - 'title': 'Trepalium (4/6)', - 'description': 'md5:10057003c34d54e95350be4f9b05cb40', - 'upload_date': '20160218', - }, - }, { - 'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis', - 'only_matching': True, - }] + lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url( + 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), + video_id, lang) class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) - http://www\.arte\.tv - /(?:playerv2/embed|arte_vp/index)\.php\?json_url= + https://www\.arte\.tv + /player/v3/index\.php\?json_url= (?P<json_url> - http://arte\.tv/papi/tvguide/videos/stream/player/ - (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* + https?://api\.arte\.tv/api/player/v1/config/ + (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) ) ''' _TESTS = [] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - lang = mobj.group('lang') - json_url = mobj.group('json_url') + json_url, lang, video_id = re.match(self._VALID_URL, url).groups() return self._extract_from_json_url(json_url, video_id, lang) -class TheOperaPlatformIE(ArteTVPlus7IE): - IE_NAME = 'theoperaplatform' - _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello', - 'md5': '970655901fa2e82e04c00b955e9afe7b', - 'info_dict': { - 'id': '060338-009-A', - 'ext': 'mp4', - 'title': 'Verdi - OTELLO', - 'upload_date': '20160927', - }, - }] - - class ArteTVPlaylistIE(ArteTVBaseIE): IE_NAME = 'arte.tv:playlist' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'info_dict': { - 'id': 'PL-013263', - 'title': 'Areva & Uramin', - 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', + 'id': 'RC-016954', + 'title': 'Earn a Living', + 'description': 'md5:d322c55011514b3a7241f7fb80d494c2', }, 'playlist_mincount': 6, - }, { - 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', - 'only_matching': True, }] def _real_extract(self, url): - playlist_id, lang = self._extract_url_info(url) + lang, playlist_id = re.match(self._VALID_URL, url).groups() collection = self._download_json( 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' % (lang, playlist_id), playlist_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 530474f3f..02f17cf0d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -58,17 +58,8 @@ from .ard import ( ARDMediathekIE, ) from .arte import ( - ArteTvIE, ArteTVPlus7IE, - ArteTVCreativeIE, - ArteTVConcertIE, - ArteTVInfoIE, - ArteTVFutureIE, - ArteTVCinemaIE, - ArteTVDDCIE, - ArteTVMagazineIE, ArteTVEmbedIE, - TheOperaPlatformIE, ArteTVPlaylistIE, ) from .asiancrush import ( From e61ac1a09c215d9efb9a65ee798a6c1d6a0863cd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 3 Jul 2019 13:31:47 +0100 Subject: [PATCH 409/785] [tvland] fix extraction(closes #21384) --- youtube_dl/extractor/tvland.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index 957cf1ea2..791144128 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -1,32 +1,35 @@ # coding: utf-8 from __future__ import unicode_literals -from .mtv import MTVServicesInfoExtractor +from .spike import ParamountNetworkIE -class TVLandIE(MTVServicesInfoExtractor): +class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a # proxy it redirects to http://m.tvland.com/app/ - 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', + 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19', 'info_dict': { - 'description': 'md5:80973e81b916a324e05c14a3fb506d29', - 'title': 'The Invasion', + 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a', + 'title': 'The Dog', }, - 'playlist': [], + 'playlist_mincount': 5, }, { - 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', + 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6', 'md5': 'e2c6389401cf485df26c79c247b08713', 'info_dict': { - 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', + 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757', 'ext': 'mp4', - 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies', - 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269', - 'upload_date': '20151228', - 'timestamp': 1451289600, + 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6', + 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2', + 'upload_date': '20190430', + 'timestamp': 1556658000, + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', From 313877c6a2b5ac8b880a9c47e8038ea0cdcf3deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 3 Jul 2019 23:16:40 +0700 Subject: [PATCH 410/785] [vzaar] Fix videos with empty title (closes #21606) --- youtube_dl/extractor/vzaar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 6000671c3..3336e6c15 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -32,6 +32,10 @@ class VzaarIE(InfoExtractor): 'ext': 'mp3', 'title': 'MP3', }, + }, { + # with null videoTitle + 'url': 'https://view.vzaar.com/20313539/download', + 'only_matching': True, }] @staticmethod @@ -45,7 +49,7 @@ class VzaarIE(InfoExtractor): video_data = self._download_json( 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - title = video_data['videoTitle'] + title = video_data.get('videoTitle') or video_id formats = [] From 2da4316e48475c344be862149f744c3a8a1ab2f1 Mon Sep 17 00:00:00 2001 From: David Caldwell <david+github@porkrind.org> Date: Wed, 3 Jul 2019 09:22:23 -0700 Subject: [PATCH 411/785] [twitch:vod] Actualize m3u8 URL (#21538, #21607) --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index dc5ff29c3..0500e33a6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -317,7 +317,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( - '%s/vod/%s?%s' % ( + '%s/vod/%s.m3u8?%s' % ( self._USHER_BASE, item_id, compat_urllib_parse_urlencode({ 'allow_source': 'true', From cdb7c7d147b19f79512d541465cb5be9a54c7950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 02:04:23 +0700 Subject: [PATCH 412/785] [ted] Restrict info regex (closes #21631) --- youtube_dl/extractor/ted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 9b60cc462..db5a4f44e 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -133,7 +133,7 @@ class TEDIE(InfoExtractor): def _extract_info(self, webpage): info_json = self._search_regex( - r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>', + r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*</script>', webpage, 'info json') return json.loads(info_json) From 5ae9b8b3a3063c97730b79ea1dfd39bc19fd56c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 03:57:11 +0700 Subject: [PATCH 413/785] [adobepass] Add support for AT&T U-verse (mso ATT) (closes #13938, closes #21016) --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 1cf2dcbf3..38dca1b0a 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -25,6 +25,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'ATT': { + 'name': 'AT&T U-verse', + 'username_field': 'userid', + 'password_field': 'password', + }, 'ATTOTT': { 'name': 'DIRECTV NOW', 'username_field': 'email', From a30c2f40550dd1ecc52c470db8ef77ea84bfe85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 04:01:30 +0700 Subject: [PATCH 414/785] [go] Add site info for disneynow (closes #21613) --- youtube_dl/extractor/go.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 5916f9a8f..03e48f4ea 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -34,9 +34,13 @@ class GoIE(AdobePassIE): 'watchdisneyxd': { 'brand': '009', 'resource_id': 'DisneyXD', + }, + 'disneynow': { + 'brand': '011', + 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -83,7 +87,9 @@ class GoIE(AdobePassIE): display_id)['video'] def _real_extract(self, url): - sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + mobj = re.match(self._VALID_URL, url) + sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2') + video_id, display_id = mobj.group('id', 'display_id') site_info = self._SITE_INFO.get(sub_domain, {}) brand = site_info.get('brand') if not video_id or not site_info: From c9fa84d88ef31c847d418223c0c6eb93651ccbec Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 4 Jul 2019 15:59:25 +0100 Subject: [PATCH 415/785] [lecturio] add support id based URLs(closes #21630) --- youtube_dl/extractor/lecturio.py | 113 +++++++++++++++++-------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 24f78d928..6ed7da4ab 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -6,8 +6,8 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, determine_ext, - extract_attributes, ExtractorError, float_or_none, int_or_none, @@ -19,6 +19,7 @@ from ..utils import ( class LecturioBaseIE(InfoExtractor): + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' _NETRC_MACHINE = 'lecturio' @@ -67,51 +68,56 @@ class LecturioIE(LecturioBaseIE): _VALID_URL = r'''(?x) https:// (?: - app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture| - (?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag + app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))| + (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag ) ''' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', - 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', + 'md5': '9a42cf1d8282a6311bf7211bbde26fde', 'info_dict': { 'id': '39634', 'ext': 'mp4', - 'title': 'Important Concepts and Terms – Introduction to Microbiology', + 'title': 'Important Concepts and Terms — Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', }, { 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 'only_matching': True, + }, { + 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', + 'only_matching': True, }] _CC_LANGS = { + 'Arabic': 'ar', + 'Bulgarian': 'bg', 'German': 'de', 'English': 'en', 'Spanish': 'es', + 'Persian': 'fa', 'French': 'fr', + 'Japanese': 'ja', 'Polish': 'pl', + 'Pashto': 'ps', 'Russian': 'ru', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') or mobj.group('id_de') - - webpage = self._download_webpage( - 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, - display_id) - - lecture_id = self._search_regex( - r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id') - - api_url = self._search_regex( - r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'api url', group='url') - - video = self._download_json(api_url, display_id) - + nt = mobj.group('nt') or mobj.group('nt_de') + lecture_id = mobj.group('id') + display_id = nt or lecture_id + api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' + video = self._download_json( + self._API_BASE_URL + api_path, display_id) title = video['title'].strip() + if not lecture_id: + pid = video.get('productId') or video.get('uid') + if pid: + spid = pid.split('_') + if spid and len(spid) == 2: + lecture_id = spid[1] formats = [] for format_ in video['content']['media']: @@ -129,24 +135,30 @@ class LecturioIE(LecturioBaseIE): continue label = str_or_none(format_.get('label')) filesize = int_or_none(format_.get('fileSize')) - formats.append({ + f = { 'url': file_url, 'format_id': label, 'filesize': float_or_none(filesize, invscale=1000) - }) + } + if label: + mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) + if mobj: + f.update({ + 'format_id': mobj.group(2), + 'height': int(mobj.group(1)), + }) + formats.append(f) self._sort_formats(formats) subtitles = {} automatic_captions = {} - cc = self._parse_json( - self._search_regex( - r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles', - default='{}'), display_id, fatal=False) - for cc_label, cc_url in cc.items(): - cc_url = url_or_none(cc_url) + captions = video.get('captions') or [] + for cc in captions: + cc_url = cc.get('url') if not cc_url: continue - lang = self._search_regex( + cc_label = cc.get('translatedCode') + lang = cc.get('languageCode') or self._search_regex( r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0] if cc_label else 'en') original_lang = self._search_regex( @@ -160,7 +172,7 @@ class LecturioIE(LecturioBaseIE): }) return { - 'id': lecture_id, + 'id': lecture_id or nt, 'title': title, 'formats': formats, 'subtitles': subtitles, @@ -169,37 +181,40 @@ class LecturioIE(LecturioBaseIE): class LecturioCourseIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course' - _TEST = { + _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', 'info_dict': { 'id': 'microbiology-introduction', 'title': 'Microbiology: Introduction', + 'description': 'md5:13da8500c25880c6016ae1e6d78c386a', }, 'playlist_count': 45, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://app.lecturio.com/#/course/c/6434', + 'only_matching': True, + }] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - + nt, course_id = re.match(self._VALID_URL, url).groups() + display_id = nt or course_id + api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json' + course = self._download_json( + self._API_BASE_URL + api_path, display_id) entries = [] - for mobj in re.finditer( - r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>', - webpage): - params = extract_attributes(mobj.group(0)) - lecture_url = urljoin(url, params.get('data-url')) - lecture_id = params.get('data-id') + for lecture in course.get('lectures', []): + lecture_id = str_or_none(lecture.get('id')) + lecture_url = lecture.get('url') + if lecture_url: + lecture_url = urljoin(url, lecture_url) + else: + lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id) entries.append(self.url_result( lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) - - title = self._search_regex( - r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage, - 'title', default=None) - - return self.playlist_result(entries, display_id, title) + return self.playlist_result( + entries, display_id, course.get('title'), + clean_html(course.get('description'))) class LecturioDeCourseIE(LecturioBaseIE): From d1850c1a975de37b28c39afdce2e5ea56dec032a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20Schr=C3=B6ter?= <Rikorose@users.noreply.github.com> Date: Fri, 5 Jul 2019 15:47:32 +0000 Subject: [PATCH 416/785] [mixer:vod] Relax _VALID_URL (closes #21657) (#21658) --- youtube_dl/extractor/beampro.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index e264a145f..86abdae00 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,7 +99,7 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\w+)' + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', @@ -122,6 +122,9 @@ class BeamProVodIE(BeamProBaseIE): }, { 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', 'only_matching': True, + }, { + 'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', + 'only_matching': True, }] @staticmethod From d18003a1419517cad49d4c5e8acb8255dd5422df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jul 2019 00:42:54 +0700 Subject: [PATCH 417/785] [peertube] Detect embed URLs in generic extraction (closes #21666) --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e03c3d1d3..b50543e32 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -168,7 +168,7 @@ class PeerTubeIE(InfoExtractor): @staticmethod def _extract_peertube_url(webpage, source_url): mobj = re.match( - r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)' + r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)' % PeerTubeIE._UUID_RE, source_url) if mobj and any(p in webpage for p in ( '<title>PeerTube<', From a6389abfd7fec786ed07031cd7f3a42d02910de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jul 2019 23:16:38 +0700 Subject: [PATCH 418/785] [philharmoniedeparis] Relax _VALID_URL (closes #21672) --- youtube_dl/extractor/philharmoniedeparis.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f723a2b3b..03da64b11 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -14,7 +14,7 @@ class PhilharmonieDeParisIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| pad\.philharmoniedeparis\.fr/doc/CIMU/ ) (?P<id>\d+) @@ -40,6 +40,12 @@ class PhilharmonieDeParisIE(InfoExtractor): }, { 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, }] _LIVE_URL = 'https://live.philharmoniedeparis.fr' From 25d71fb058368e1d48c4ad9496d91d33378649f6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 Jul 2019 08:28:39 +0100 Subject: [PATCH 419/785] [packtpub] fix extraction(closes #21268) --- youtube_dl/extractor/packtpub.py | 111 ++++++++++++++----------------- 1 file changed, 51 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 1324137df..3d39d1b27 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -5,26 +5,27 @@ import re from .common import InfoExtractor from ..compat import ( - compat_str, + # compat_str, compat_HTTPError, ) from ..utils import ( clean_html, ExtractorError, - remove_end, + # remove_end, + str_or_none, strip_or_none, unified_timestamp, - urljoin, + # urljoin, ) class PacktPubBaseIE(InfoExtractor): - _PACKT_BASE = 'https://www.packtpub.com' - _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + # _PACKT_BASE = 'https://www.packtpub.com' + _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/' class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -50,9 +51,9 @@ class PacktPubIE(PacktPubBaseIE): return try: self._TOKEN = self._download_json( - self._MAPT_REST + '/users/tokens', None, + 'https://services.packtpub.com/auth-v1/users/tokens', None, 'Downloading Authorization Token', data=json.dumps({ - 'email': username, + 'username': username, 'password': password, }).encode())['data']['access'] except ExtractorError as e: @@ -61,54 +62,40 @@ class PacktPubIE(PacktPubBaseIE): raise ExtractorError(message, expected=True) raise - def _handle_error(self, response): - if response.get('status') != 'success': - raise ExtractorError( - '% said: %s' % (self.IE_NAME, response['message']), - expected=True) - - def _download_json(self, *args, **kwargs): - response = super(PacktPubIE, self)._download_json(*args, **kwargs) - self._handle_error(response) - return response - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_id, chapter_id, video_id = mobj.group( - 'course_id', 'chapter_id', 'id') + course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups() headers = {} if self._TOKEN: headers['Authorization'] = 'Bearer ' + self._TOKEN - video = self._download_json( - '%s/users/me/products/%s/chapters/%s/sections/%s' - % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, - 'Downloading JSON video', headers=headers)['data'] + try: + video_url = self._download_json( + 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id, + 'Downloading JSON video', headers=headers)['data'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + self.raise_login_required('This video is locked') + raise - content = video.get('content') - if not content: - self.raise_login_required('This video is locked') + # TODO: find a better way to avoid duplicating course requests + # metadata = self._download_json( + # '%s/products/%s/chapters/%s/sections/%s/metadata' + # % (self._MAPT_REST, course_id, chapter_id, video_id), + # video_id)['data'] - video_url = content['file'] - - metadata = self._download_json( - '%s/products/%s/chapters/%s/sections/%s/metadata' - % (self._MAPT_REST, course_id, chapter_id, video_id), - video_id)['data'] - - title = metadata['pageTitle'] - course_title = metadata.get('title') - if course_title: - title = remove_end(title, ' - %s' % course_title) - timestamp = unified_timestamp(metadata.get('publicationDate')) - thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + # title = metadata['pageTitle'] + # course_title = metadata.get('title') + # if course_title: + # title = remove_end(title, ' - %s' % course_title) + # timestamp = unified_timestamp(metadata.get('publicationDate')) + # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) return { 'id': video_id, 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'timestamp': timestamp, + 'title': display_id or video_id, # title, + # 'thumbnail': thumbnail, + # 'timestamp': timestamp, } @@ -119,6 +106,7 @@ class PacktPubCourseIE(PacktPubBaseIE): 'info_dict': { 'id': '9781787122215', 'title': 'Learn Nodejs by building 12 projects [Video]', + 'description': 'md5:489da8d953f416e51927b60a1c7db0aa', }, 'playlist_count': 90, }, { @@ -136,35 +124,38 @@ class PacktPubCourseIE(PacktPubBaseIE): url, course_id = mobj.group('url', 'id') course = self._download_json( - '%s/products/%s/metadata' % (self._MAPT_REST, course_id), - course_id)['data'] + self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id) + metadata = self._download_json( + self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id, + course_id, fatal=False) or {} entries = [] - for chapter_num, chapter in enumerate(course['tableOfContents'], 1): - if chapter.get('type') != 'chapter': - continue - children = chapter.get('children') - if not isinstance(children, list): + for chapter_num, chapter in enumerate(course['chapters'], 1): + chapter_id = str_or_none(chapter.get('id')) + sections = chapter.get('sections') + if not chapter_id or not isinstance(sections, list): continue chapter_info = { 'chapter': chapter.get('title'), 'chapter_number': chapter_num, - 'chapter_id': chapter.get('id'), + 'chapter_id': chapter_id, } - for section in children: - if section.get('type') != 'section': - continue - section_url = section.get('seoUrl') - if not isinstance(section_url, compat_str): + for section in sections: + section_id = str_or_none(section.get('id')) + if not section_id or section.get('contentType') != 'video': continue entry = { '_type': 'url_transparent', - 'url': urljoin(url + '/', section_url), + 'url': '/'.join([url, chapter_id, section_id]), 'title': strip_or_none(section.get('title')), 'description': clean_html(section.get('summary')), + 'thumbnail': metadata.get('coverImage'), + 'timestamp': unified_timestamp(metadata.get('publicationDate')), 'ie_key': PacktPubIE.ie_key(), } entry.update(chapter_info) entries.append(entry) - return self.playlist_result(entries, course_id, course.get('title')) + return self.playlist_result( + entries, course_id, metadata.get('title'), + clean_html(metadata.get('about'))) From c9b0564ac141bed3766fa65011274cb5c1c5bccb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 Jul 2019 11:56:16 +0100 Subject: [PATCH 420/785] [packtpub] Relax lesson _VALID_URL regex(closes #21695) --- youtube_dl/extractor/packtpub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 3d39d1b27..11ad3b3b8 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -25,7 +25,7 @@ class PacktPubBaseIE(InfoExtractor): class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -41,6 +41,9 @@ class PacktPubIE(PacktPubBaseIE): }, { 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro', 'only_matching': True, + }, { + 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project', + 'only_matching': True, }] _NETRC_MACHINE = 'packtpub' _TOKEN = None From 4b30282616c35b08308975b9d51614039b3f3d12 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 13:54:49 +0100 Subject: [PATCH 421/785] [funk] fix extraction(closes #17915) --- youtube_dl/extractor/funk.py | 171 +++++------------------------------ 1 file changed, 23 insertions(+), 148 deletions(-) diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py index 7e1af95e0..81d1949fd 100644 --- a/youtube_dl/extractor/funk.py +++ b/youtube_dl/extractor/funk.py @@ -1,89 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor from .nexx import NexxIE -from ..compat import compat_str from ..utils import ( int_or_none, - try_get, + str_or_none, ) -class FunkBaseIE(InfoExtractor): - _HEADERS = { - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8', - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4', - } - _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4' - - @staticmethod - def _make_headers(referer): - headers = FunkBaseIE._HEADERS.copy() - headers['Referer'] = referer - return headers - - def _make_url_result(self, video): - return { - '_type': 'url_transparent', - 'url': 'nexx:741:%s' % video['sourceId'], - 'ie_key': NexxIE.ie_key(), - 'id': video['sourceId'], - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'season_number': int_or_none(video.get('seasonNr')), - 'episode_number': int_or_none(video.get('episodeNr')), - } - - -class FunkMixIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _TESTS = [{ - 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten', - 'md5': '8edf617c2f2b7c9847dfda313f199009', - 'info_dict': { - 'id': '123748', - 'ext': 'mp4', - 'title': '"Die realste Kifferdoku aller Zeiten"', - 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d', - 'timestamp': 1490274721, - 'upload_date': '20170323', - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - mix_id = mobj.group('id') - alias = mobj.group('alias') - - lists = self._download_json( - 'https://www.funk.net/api/v3.1/curation/curatedLists/', - mix_id, headers=self._make_headers(url), query={ - 'size': 100, - })['_embedded']['curatedListList'] - - metas = next( - l for l in lists - if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas'] - video = next( - meta['videoDataDelegate'] - for meta in metas - if try_get( - meta, lambda x: x['videoDataDelegate']['alias'], - compat_str) == alias) - - return self._make_url_result(video) - - -class FunkChannelIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2', + 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', + 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'info_dict': { 'id': '1155821', 'ext': 'mp4', @@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE): 'timestamp': 1514507395, 'upload_date': '20171229', }, - 'params': { - 'skip_download': True, - }, + }, { - # only available via byIdList API - 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', - 'info_dict': { - 'id': '205067', - 'ext': 'mp4', - 'title': 'Martin Sonneborn erklärt die EU', - 'description': 'md5:050f74626e4ed87edf4626d2024210c0', - 'timestamp': 1494424042, - 'upload_date': '20170510', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', + 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - alias = mobj.group('alias') - - headers = self._make_headers(url) - - video = None - - # Id-based channels are currently broken on their side: webplayer - # tries to process them via byChannelAlias endpoint and fails - # predictably. - for page_num in itertools.count(): - by_channel_alias = self._download_json( - 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s' - % channel_id, - 'Downloading byChannelAlias JSON page %d' % (page_num + 1), - headers=headers, query={ - 'filterFsk': 'false', - 'sort': 'creationDate,desc', - 'size': 100, - 'page': page_num, - }, fatal=False) - if not by_channel_alias: - break - video_list = try_get( - by_channel_alias, lambda x: x['_embedded']['videoList'], list) - if not video_list: - break - try: - video = next(r for r in video_list if r.get('alias') == alias) - break - except StopIteration: - pass - if not try_get( - by_channel_alias, lambda x: x['_links']['next']): - break - - if not video: - by_id_list = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/byIdList', - channel_id, 'Downloading byIdList JSON', headers=headers, - query={ - 'ids': alias, - }, fatal=False) - if by_id_list: - video = try_get(by_id_list, lambda x: x['result'][0], dict) - - if not video: - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', - channel_id, 'Downloading filter JSON', headers=headers, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] - video = next(r for r in results if r.get('alias') == alias) - - return self._make_url_result(video) + display_id, nexx_id = re.match(self._VALID_URL, url).groups() + video = self._download_json( + 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) + return { + '_type': 'url_transparent', + 'url': 'nexx:741:' + nexx_id, + 'ie_key': NexxIE.ie_key(), + 'id': nexx_id, + 'title': video.get('title'), + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'channel_id': str_or_none(video.get('channelId')), + 'display_id': display_id, + 'tags': video.get('tags'), + 'thumbnail': video.get('imageUrlLandscape'), + } From 253289656f6c49f9bed7d043c9806ce4def4973f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 13:57:43 +0100 Subject: [PATCH 422/785] [extractors] update funk.net import --- youtube_dl/extractor/extractors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 02f17cf0d..68b9b9b25 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,10 +395,7 @@ from .frontendmasters import ( FrontendMastersCourseIE ) from .funimation import FunimationIE -from .funk import ( - FunkMixIE, - FunkChannelIE, -) +from .funk import FunkIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE From cfe781d4faf910b8f687ce39480456d97f0946cf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 15:45:00 +0100 Subject: [PATCH 423/785] [gameinformer] fix extraction(closes #8895)(closes #15363)(closes #17206) --- youtube_dl/extractor/gameinformer.py | 34 ++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py index a2920a793..f1b96c172 100644 --- a/youtube_dl/extractor/gameinformer.py +++ b/youtube_dl/extractor/gameinformer.py @@ -1,12 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +from .brightcove import BrightcoveNewIE from .common import InfoExtractor +from ..utils import ( + clean_html, + get_element_by_class, + get_element_by_id, +) class GameInformerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)' + _TESTS = [{ + # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', 'md5': '292f26da1ab4beb4c9099f1304d2b071', 'info_dict': { @@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor): 'upload_date': '20150928', 'uploader_id': '694940074001', }, - } + }, { + # Brightcove id inside unique element with field--name-field-brightcove-video-id class + 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue', + 'info_dict': { + 'id': '6057111913001', + 'ext': 'mp4', + 'title': 'New Gameplay Today – Streets Of Rogue', + 'timestamp': 1562699001, + 'upload_date': '20190709', + 'uploader_id': '694940074001', + + }, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage( url, display_id, headers=self.geo_verification_headers()) - brightcove_id = self._search_regex( - [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"], - webpage, 'brightcove id') - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage)) + brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage) + return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id) From e4d53148f506cfcfab8559d86b40c72b7db87a6f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 16:47:37 +0100 Subject: [PATCH 424/785] [funnyordie] move extraction to VoxMedia extractor and improve vox volume embed extraction(closes #16846) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/funnyordie.py | 162 ----------------------------- youtube_dl/extractor/voxmedia.py | 101 ++++++++++++------ 3 files changed, 67 insertions(+), 197 deletions(-) delete mode 100644 youtube_dl/extractor/funnyordie.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68b9b9b25..555fadfaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,7 +396,6 @@ from .frontendmasters import ( ) from .funimation import FunimationIE from .funk import FunkIE -from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py deleted file mode 100644 index f85e7de14..000000000 --- a/youtube_dl/extractor/funnyordie.py +++ /dev/null @@ -1,162 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - unified_timestamp, -) - - -class FunnyOrDieIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' - _TESTS = [{ - 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', - 'info_dict': { - 'id': '0732f586d7', - 'ext': 'mp4', - 'title': 'Heart-Shaped Box: Literal Video Version', - 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', - 'thumbnail': r're:^http:.*\.jpg$', - 'uploader': 'DASjr', - 'timestamp': 1317904928, - 'upload_date': '20111006', - 'duration': 318.3, - }, - }, { - 'url': 'http://www.funnyordie.com/embed/e402820827', - 'info_dict': { - 'id': 'e402820827', - 'ext': 'mp4', - 'title': 'Please Use This Song (Jon Lajoie)', - 'description': 'Please use this to sell something. www.jonlajoie.com', - 'thumbnail': r're:^http:.*\.jpg$', - 'timestamp': 1398988800, - 'upload_date': '20140502', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - - links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage) - if not links: - raise ExtractorError('No media links available for %s' % video_id) - - links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) - - m3u8_url = self._search_regex( - r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1', - webpage, 'm3u8 url', group='url') - - formats = [] - - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - source_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] - bitrates.sort() - - if source_formats: - self._sort_formats(source_formats) - - for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): - for path, ext in links: - ff = f.copy() - if ff: - if ext != 'mp4': - ff = dict( - [(k, v) for k, v in ff.items() - if k in ('height', 'width', 'format_id')]) - ff.update({ - 'format_id': ff['format_id'].replace('hls', ext), - 'ext': ext, - 'protocol': 'http', - }) - else: - ff.update({ - 'format_id': '%s-%d' % (ext, bitrate), - 'vbr': bitrate, - }) - ff['url'] = self._proto_relative_url( - '%s%d.%s' % (path, bitrate, ext)) - formats.append(ff) - self._check_formats(formats, video_id) - - formats.extend(m3u8_formats) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - subtitles = {} - for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage): - subtitles[src_lang] = [{ - 'ext': src.split('/')[-1], - 'url': 'http://www.funnyordie.com%s' % src, - }] - - timestamp = unified_timestamp(self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - - uploader = self._html_search_regex( - r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h', - webpage, 'uploader', default=None) - - title, description, thumbnail, duration = [None] * 4 - - medium = self._parse_json( - self._search_regex( - r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', - default='{}'), - video_id, fatal=False) - if medium: - title = medium.get('title') - duration = float_or_none(medium.get('duration')) - if not timestamp: - timestamp = unified_timestamp(medium.get('publishDate')) - - post = self._parse_json( - self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', - default='{}'), - video_id, fatal=False) - if post: - if not title: - title = post.get('name') - description = post.get('description') - thumbnail = post.get('picture') - - if not title: - title = self._og_search_title(webpage) - if not description: - description = self._og_search_description(webpage) - if not duration: - duration = int_or_none(self._html_search_meta( - ('video:duration', 'duration'), webpage, 'duration', default=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index c7a0a88fe..b318e15d4 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from .once import OnceIE from ..compat import compat_urllib_parse_unquote -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) class VoxMediaVolumeIE(OnceIE): @@ -13,18 +16,43 @@ class VoxMediaVolumeIE(OnceIE): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id) + + setup = self._parse_json(self._search_regex( + r'setup\s*=\s*({.+});', webpage, 'setup'), video_id) + video_data = setup.get('video') or {} + info = { + 'id': video_id, + 'title': video_data.get('title_short'), + 'description': video_data.get('description_long') or video_data.get('description_short'), + 'thumbnail': video_data.get('brightcove_thumbnail') + } + asset = setup.get('asset') or setup.get('params') or {} + + formats = [] + hls_url = asset.get('hls_url') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + mp4_url = asset.get('mp4_url') + if mp4_url: + tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None) + format_id = 'http' + if tbr: + format_id += '-' + tbr + formats.append({ + 'format_id': format_id, + 'url': mp4_url, + 'tbr': int_or_none(tbr), + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + return info + for provider_video_type in ('ooyala', 'youtube', 'brightcove'): provider_video_id = video_data.get('%s_id' % provider_video_type) if not provider_video_id: continue - info = { - 'id': video_id, - 'title': video_data.get('title_short'), - 'description': video_data.get('description_long') or video_data.get('description_short'), - 'thumbnail': video_data.get('brightcove_thumbnail') - } if provider_video_type == 'brightcove': info['formats'] = self._extract_once_formats(provider_video_id) self._sort_formats(info['formats']) @@ -39,46 +67,49 @@ class VoxMediaVolumeIE(OnceIE): class VoxMediaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)' _TESTS = [{ + # Volume embed, Youtube 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'info_dict': { - 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', + 'id': 'j4mLW6x17VM', 'ext': 'mp4', - 'title': 'Google\'s new material design direction', - 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', + 'title': 'Material world: how Google discovered what software is made of', + 'description': 'md5:dfc17e7715e3b542d66e33a109861382', + 'upload_date': '20190710', + 'uploader_id': 'TheVerge', + 'uploader': 'The Verge', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], }, { - # data-ooyala-id + # Volume embed, Youtube 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', - 'md5': 'd744484ff127884cd2ba09e3fa604e4b', + 'md5': '4c8f4a0937752b437c3ebc0ed24802b5', 'info_dict': { - 'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B', + 'id': 'Gy8Md3Eky38', 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', - 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', + 'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d', + 'uploader_id': 'TheVerge', + 'upload_date': '20141021', + 'uploader': 'The Verge', }, - 'add_ie': ['Ooyala'], - 'skip': 'Video Not Found', + 'add_ie': ['Youtube'], + 'skip': 'similar to the previous test', }, { - # volume embed + # Volume embed, Youtube 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', 'info_dict': { - 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', + 'id': 'YCjDnX-Xzhg', 'ext': 'mp4', - 'title': 'The new frontier of LGBTQ civil rights, explained', - 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', + 'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination", + 'description': 'md5:fc1317922057de31cd74bce91eb1c66c', + 'uploader_id': 'voxdotcom', + 'upload_date': '20150915', + 'uploader': 'Vox', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], + 'skip': 'similar to the previous test', }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -93,6 +124,7 @@ class VoxMediaIE(InfoExtractor): 'uploader': 'Vox', }, 'add_ie': ['Youtube'], + 'skip': 'Page no longer contain videos', }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', @@ -118,10 +150,11 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:e02d56b026d51aa32c010676765a690d', }, }], + 'skip': 'Page no longer contain videos', }, { # volume embed, Brightcove Once 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya', - 'md5': '01571a896281f77dc06e084138987ea2', + 'md5': '2dbc77b8b0bff1894c2fce16eded637d', 'info_dict': { 'id': '1231c973d', 'ext': 'mp4', From 5fc0896168a9ff155475bfb0b7b66504c7077605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Jul 2019 23:37:09 +0700 Subject: [PATCH 425/785] [beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) --- youtube_dl/extractor/beeg.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c15a0ac8f..5788d13ba 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -32,6 +32,10 @@ class BeegIE(InfoExtractor): # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', 'only_matching': True, + }, { + # api/v6 v2 w/o t + 'url': 'https://beeg.com/1277207756', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -49,14 +53,17 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - t = qs.get('t', [''])[0].split('-') - if len(t) > 1: + if len(video_id) >= 10: query = { 'v': 2, - 's': t[0], - 'e': t[1], } + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query.update({ + 's': t[0], + 'e': t[1], + }) else: query = {'v': 1} From 4dcd4b7b163feddc07959ca34cbb29815b354c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:04:25 +0700 Subject: [PATCH 426/785] [mgtv] Pass Referer HTTP header for format URLs (closes #21726) --- youtube_dl/extractor/mgtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 84137df50..7ae2e3c3b 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -79,6 +79,9 @@ class MGTVIE(InfoExtractor): 'ext': 'mp4', 'tbr': tbr, 'protocol': 'm3u8_native', + 'http_headers': { + 'Referer': url, + }, }) self._sort_formats(formats) From 7612406bf9de825e569b9d2d1faee3d82fd60f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:34:03 +0700 Subject: [PATCH 427/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5ce78b07a..ad99a64d6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version <unreleased> + +Core ++ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) + +Extractors ++ [mgtv] Pass Referer HTTP header for format URLs (#21726) ++ [beeg] Add support for api/v6 v2 URLs without t argument (#21701) +* [voxmedia:volume] Improvevox embed extraction (#16846) +* [funnyordie] Move extraction to VoxMedia extractor (#16846) +* [gameinformer] Fix extraction (#8895, #15363, #17206) +* [funk] Fix extraction (#17915) +* [packtpub] Relax lesson URL regular expression (#21695) +* [packtpub] Fix extraction (#21268) +* [philharmoniedeparis] Relax URL regular expression (#21672) +* [peertube] Detect embed URLs in generic extraction (#21666) +* [mixer:vod] Relax URL regular expression (#21657, #21658) ++ [lecturio] Add support id based URLs (#21630) ++ [go] Add site info for disneynow (#21613) +* [ted] Restrict info regular expression (#21631) +* [twitch:vod] Actualize m3u8 URL (#21538, #21607) +* [vzaar] Fix videos with empty title (#21606) +* [tvland] Fix extraction (#21384) +* [arte] Clean extractor (#15583, #21614) + + version 2019.07.02 Core From 0d1f4af39dab36138a3809e31a8c09ee588e1b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:43:54 +0700 Subject: [PATCH 428/785] release 2019.07.12 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 13 +------------ youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 26 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index fb0d33b8f..fcfadeb1f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.02 + [debug] youtube-dl version 2019.07.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 3c95565a6..7e1a3d1c0 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7410776d7..71782a104 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index cc52bcca6..6bcfde1f8 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.02 + [debug] youtube-dl version 2019.07.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index bbd421b1a..89d9c63aa 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index ad99a64d6..45cb2746e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.07.12 Core + [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 55ae43144..4e664336d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -58,16 +58,8 @@ - **ARD:mediathek** - **ARDBetaMediathek** - **Arkena** - - **arte.tv** - **arte.tv:+7** - - **arte.tv:cinema** - - **arte.tv:concert** - - **arte.tv:creative** - - **arte.tv:ddc** - **arte.tv:embed** - - **arte.tv:future** - - **arte.tv:info** - - **arte.tv:magazine** - **arte.tv:playlist** - **AsianCrush** - **AsianCrushPlaylist** @@ -313,9 +305,7 @@ - **FrontendMastersCourse** - **FrontendMastersLesson** - **Funimation** - - **FunkChannel** - - **FunkMix** - - **FunnyOrDie** + - **Funk** - **Fusion** - **Fux** - **FXNetworks** @@ -896,7 +886,6 @@ - **TF1** - **TFO** - **TheIntercept** - - **theoperaplatform** - **ThePlatform** - **ThePlatformFeed** - **TheScene** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 78fe54326..18bcb33e2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.02' +__version__ = '2019.07.12' From baf67a604d912722b0fe03a40e9dc5349a2208cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 02:26:05 +0700 Subject: [PATCH 429/785] [youtube] Fix authentication (closes #11270) --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b570d5bae..9f661a84f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -116,6 +116,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'f.req': json.dumps(f_req), 'flowName': 'GlifWebSignIn', 'flowEntry': 'ServiceLogin', + # TODO: reverse actual botguard identifier generation algo + 'bgRequest': '["identifier",""]', }) return self._download_json( url, None, note=note, errnote=errnote, From 27019dbb4b4829b5e1910c6b714f904ce8fad680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 03:45:58 +0700 Subject: [PATCH 430/785] [youtube] Fix is_live extraction (closes #21734) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9f661a84f..8a3c502ba 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..compat import ( compat_str, ) from ..utils import ( + bool_or_none, clean_html, dict_get, error_to_compat_str, @@ -1890,6 +1891,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if view_count is None and video_details: view_count = int_or_none(video_details.get('viewCount')) + if is_live is None: + is_live = bool_or_none(dict_get( + video_details, ('isLive', 'isLiveContent'), + skip_false_values=False)) + # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) From 0dd58a523fffd06c126c006722850bab36bd3aa2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 11 Jul 2019 23:09:09 +0100 Subject: [PATCH 431/785] [fivetv] relax video URL regex and support https URLs --- youtube_dl/extractor/fivetv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 9f9863746..c4c0f1b3d 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -9,7 +9,7 @@ from ..utils import int_or_none class FiveTVIE(InfoExtractor): _VALID_URL = r'''(?x) - http:// + https?:// (?:www\.)?5-tv\.ru/ (?: (?:[^/]+/)+(?P<id>\d+)| @@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor): 'duration': 180, }, }, { + # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails', 'info_dict': { 'id': 'glavnoe', @@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor): 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, + 'skip': 'redirect to «Известия. Главное» project page', }, { 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', 'only_matching': True, @@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - [r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"', + [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"', r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') From 16d3672ad73802043a9cccd1505909949e2ce71f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 11 Jul 2019 23:37:34 +0100 Subject: [PATCH 432/785] [espn] fix fivethirtyeight.com extraction --- youtube_dl/extractor/abcnews.py | 9 ++++++--- youtube_dl/extractor/espn.py | 16 ++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index cd29aca77..8b407bf9c 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE): IE_NAME = 'abcnews:video' _VALID_URL = r'''(?x) https?:// - abcnews\.go\.com/ (?: - [^/]+/video/(?P<display_id>[0-9a-z-]+)-| - video/embed\?.*?\bid= + abcnews\.go\.com/ + (?: + [^/]+/video/(?P<display_id>[0-9a-z-]+)-| + video/embed\?.*?\bid= + )| + fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ ) (?P<id>\d+) ''' diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 8cc9bd165..6cf05e6da 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor): _TEST = { 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', 'info_dict': { - 'id': '21846851', - 'ext': 'mp4', + 'id': '56032156', + 'ext': 'flv', 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', - 'timestamp': 1513960621, - 'upload_date': '20171222', }, 'params': { 'skip_download': True, }, - 'expected_warnings': ['Unable to download f4m manifest'], } def _real_extract(self, url): @@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r'data-video-id=["\'](?P<id>\d+)', - webpage, 'video id', group='id') + embed_url = self._search_regex( + r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)', + webpage, 'embed url') - return self.url_result( - 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + return self.url_result(embed_url, 'AbcNewsVideo') From d4ece5d359bfe5c6b87e0ef19f2b351e408e510c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 21:56:49 +0100 Subject: [PATCH 433/785] [bleacherreport] fix Bleacher Report CMS extraction --- youtube_dl/extractor/bleacherreport.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index e829974ff..dc60224d0 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor): video = article_data.get('video') if video: video_type = video['type'] - if video_type == 'cms.bleacherreport.com': + if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] elif video_type == 'ooyala.com': info['url'] = 'ooyala:%s' % video['id'] @@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor): class BleacherReportCMSIE(AMPIE): - _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' _TESTS = [{ - 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', 'info_dict': { 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', @@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE): def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) + info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) info['id'] = video_id return info From 82f68e4a0113f00144b55c5d2a1951793ac78818 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:02:06 +0100 Subject: [PATCH 434/785] [facebook] fallback to twitter:image meta for thumbnail extraction(closes #21224) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 789dd79d5..a3dcdca3e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -428,7 +428,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) - thumbnail = self._og_search_thumbnail(webpage) + thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) view_count = parse_count(self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', From 0441d6266ca275b1b4a2ad4efdb4b3f54e318e88 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:31:11 +0100 Subject: [PATCH 435/785] [rudo] remove extractor(closes #18430)(closes #18474) Covered by generic extractor --- youtube_dl/extractor/biobiochiletv.py | 19 ++++++---- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/rudo.py | 53 --------------------------- 3 files changed, 12 insertions(+), 61 deletions(-) delete mode 100644 youtube_dl/extractor/rudo.py diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index b92031c8a..dc86c57c5 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, remove_end, ) -from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): @@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor): }, { 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', 'info_dict': { - 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'id': 'b4xd0LK3SK', 'ext': 'mp4', - 'uploader': '(none)', - 'upload_date': '20160708', - 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + # TODO: fix url_transparent information overriding + # 'uploader': 'Juan Pablo Echenique', + 'title': 'Comentario Oscar Cáceres', + }, + 'params': { + # empty m3u8 manifest + 'skip_download': True, }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', @@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - rudo_url = RudoIE._extract_url(webpage) + rudo_url = self._search_regex( + r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage, 'embed URL', None, group='url') if not rudo_url: raise ExtractorError('No videos found') @@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', + r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', webpage, 'uploader', fatal=False) return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 555fadfaf..e88ad34a8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -967,7 +967,6 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE -from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py deleted file mode 100644 index f036f6757..000000000 --- a/youtube_dl/extractor/rudo.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - get_element_by_class, - unified_strdate, -) - - -class RudoIE(InfoExtractor): - _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' - - _TEST = { - 'url': 'http://rudo.video/vod/oTzw0MGnyG', - 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', - 'info_dict': { - 'id': 'oTzw0MGnyG', - 'ext': 'mp4', - 'title': 'Comentario Tomás Mosciatti', - 'upload_date': '20160617', - }, - } - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, - transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) - - info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') - - info_dict.update({ - 'title': self._og_search_title(webpage), - 'upload_date': unified_strdate(get_element_by_class('date', webpage)), - }) - - return info_dict From 57227618fe2708f9e4f1c87fdd08c49c7122c13b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:50:37 +0100 Subject: [PATCH 436/785] [spike] fix Bellator extraction --- youtube_dl/extractor/spike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 21b93a5b3..7c11ea7aa 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor): 'only_matching': True, }] - _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _FEED_URL = 'http://www.bellator.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] From 272355c17265e8dc921d7f1518606b15fd800112 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 23:26:46 +0100 Subject: [PATCH 437/785] [dbtv] fix extraction --- youtube_dl/extractor/dbtv.py | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index f232f0dc5..aaedf2e3d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -7,50 +7,51 @@ from .common import InfoExtractor class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?' + _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' _TESTS = [{ - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': '2e24f67936517b143a234b4cadf792ec', + 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', + 'md5': 'b8f850ba1860adbda668d367f9b77699', 'info_dict': { - 'id': '3649835190001', - 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'id': 'PynxJnNWChE', 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', + 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69.544, - 'uploader_id': '1027729757001', + 'upload_date': '20160916', + 'duration': 69, + 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', + 'uploader': 'Dagbladet', }, - 'add_ie': ['BrightcoveNew'] + 'add_ie': ['Youtube'] }, { - 'url': 'http://dbtv.no/3649835190001', + 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, }, { - 'url': 'http://www.dbtv.no/lazyplayer/4631135248001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/vice/5000634109001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/filmtrailer/3359293614001', + 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', + r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', webpage)] def _real_extract(self, url): - video_id, display_id = re.match(self._VALID_URL, url).groups() - - return { + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = { '_type': 'url_transparent', - 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, 'id': video_id, 'display_id': display_id, - 'ie_key': 'BrightcoveNew', } + if len(video_id) == 11: + info.update({ + 'url': video_id, + 'ie_key': 'Youtube', + }) + else: + info.update({ + 'url': 'jwplatform:' + video_id, + 'ie_key': 'JWPlatform', + }) + return info From c72dc20d099bbe1dc4ede83e8f94a7bc42d81532 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 13 Jul 2019 10:13:07 +0100 Subject: [PATCH 438/785] [roosterteeth] fix free episode extraction(#16094) --- youtube_dl/extractor/roosterteeth.py | 97 ++++++++++++++-------------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 857434540..d3eeeba62 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, - strip_or_none, - unescapeHTML, + str_or_none, urlencode_postdata, ) @@ -21,15 +24,14 @@ class RoosterTeethIE(InfoExtractor): 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'md5': 'e2bd7764732d785ef797700a2489f212', 'info_dict': { - 'id': '26576', + 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', - 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', - 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', + 'title': 'Million Dollars, But... The Game Announcement', + 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', - 'comment_count': int, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -89,60 +91,55 @@ class RoosterTeethIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id - webpage = self._download_webpage(url, display_id) - - episode = strip_or_none(unescapeHTML(self._search_regex( - (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', - r'<title>(?P<title>[^<]+)'), webpage, 'title', - default=None, group='title'))) - - title = strip_or_none(self._og_search_title( - webpage, default=None)) or episode - - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', - webpage, 'm3u8 url', default=None, group='url') - - if not m3u8_url: - if re.search(r']+class=["\']non-sponsor', webpage): - self.raise_login_required( - '%s is only available for FIRST members' % display_id) - - if re.search(r']+class=["\']golive-gate', webpage): - self.raise_login_required('%s is not available yet' % display_id) - - raise ExtractorError('Unable to extract m3u8 URL') + try: + m3u8_url = self._download_json( + api_episode_url + '/videos', display_id, + 'Downloading video JSON metadata')['data'][0]['attributes']['url'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + raise formats = self._extract_m3u8_formats( - m3u8_url, display_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - description = strip_or_none(self._og_search_description(webpage)) - thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) + episode = self._download_json( + api_episode_url, display_id, + 'Downloading episode JSON metadata')['data'][0] + attributes = episode['attributes'] + title = attributes.get('title') or attributes['display_title'] + video_id = compat_str(episode['id']) - series = self._search_regex( - (r'

    More ([^<]+)

    ', r']+>See All ([^<]+) Videos<'), - webpage, 'series', fatal=False) - - comment_count = int_or_none(self._search_regex( - r'>Comments \((\d+)\)<', webpage, - 'comment count', fatal=False)) - - video_id = self._search_regex( - (r'containerId\s*=\s*["\']episode-(\d+)\1', - r' Date: Sat, 13 Jul 2019 12:47:02 +0100 Subject: [PATCH 439/785] [livejournal] Add new extractor(closes #21526) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/livejournal.py | 42 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/livejournal.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e88ad34a8..75a53f54b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -579,6 +579,7 @@ from .linkedin import ( ) from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE +from .livejournal import LiveJournalIE from .liveleak import ( LiveLeakIE, LiveLeakEmbedIE, diff --git a/youtube_dl/extractor/livejournal.py b/youtube_dl/extractor/livejournal.py new file mode 100644 index 000000000..3a9f4553f --- /dev/null +++ b/youtube_dl/extractor/livejournal.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class LiveJournalIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' + _TEST = { + 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', + 'md5': 'adaf018388572ced8a6f301ace49d4b2', + 'info_dict': { + 'id': '1263729', + 'ext': 'mp4', + 'title': 'Истребители против БПЛА', + 'upload_date': '20190624', + 'timestamp': 1561406715, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + record = self._parse_json(self._search_regex( + r'Site\.page\s*=\s*({.+?});', webpage, + 'page data'), video_id)['video']['record'] + storage_id = compat_str(record['storageid']) + title = record.get('name') + if title: + # remove filename extension(.mp4, .mov, etc...) + title = title.rsplit('.', 1)[0] + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'thumbnail': record.get('thumbnail'), + 'timestamp': int_or_none(record.get('timecreate')), + 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id, + 'ie_key': 'EaglePlatform', + } From 4a71ef6da677bfd08b39d5cb6f584be9f6b2fc27 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Jul 2019 13:08:19 +0100 Subject: [PATCH 440/785] [dlive] Add new extractor(closes #18080) --- youtube_dl/extractor/dlive.py | 94 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 98 insertions(+) create mode 100644 youtube_dl/extractor/dlive.py diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py new file mode 100644 index 000000000..b81eaecce --- /dev/null +++ b/youtube_dl/extractor/dlive.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class DLiveVODIE(InfoExtractor): + IE_NAME = 'dlive:vod' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[a-zA-Z0-9]+)' + _TEST = { + 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', + 'info_dict': { + 'id': '3mTzOl4WR', + 'ext': 'mp4', + 'title': 'Minecraft with james charles epic', + 'upload_date': '20190701', + 'timestamp': 1562011015, + 'uploader_id': 'pdp', + } + } + + def _real_extract(self, url): + uploader_id, vod_id = re.match(self._VALID_URL, url).groups() + broadcast = self._download_json( + 'https://graphigo.prd.dlive.tv/', vod_id, + data=json.dumps({'query': '''query { + pastBroadcast(permlink:"%s+%s") { + content + createdAt + length + playbackUrl + title + thumbnailUrl + viewCount + } +}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] + title = broadcast['title'] + formats = self._extract_m3u8_formats( + broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + return { + 'id': vod_id, + 'title': title, + 'uploader_id': uploader_id, + 'formats': formats, + 'description': broadcast.get('content'), + 'thumbnail': broadcast.get('thumbnailUrl'), + 'timestamp': int_or_none(broadcast.get('createdAt'), 1000), + 'view_count': int_or_none(broadcast.get('viewCount')), + } + + +class DLiveStreamIE(InfoExtractor): + IE_NAME = 'dlive:stream' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?P[\w.-]+)' + + def _real_extract(self, url): + display_name = self._match_id(url) + user = self._download_json( + 'https://graphigo.prd.dlive.tv/', display_name, + data=json.dumps({'query': '''query { + userByDisplayName(displayname:"%s") { + livestream { + content + createdAt + title + thumbnailUrl + watchingCount + } + username + } +}''' % display_name}).encode())['data']['userByDisplayName'] + livestream = user['livestream'] + title = livestream['title'] + username = user['username'] + formats = self._extract_m3u8_formats( + 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, + display_name, 'mp4') + self._sort_formats(formats) + return { + 'id': display_name, + 'title': self._live_title(title), + 'uploader': display_name, + 'uploader_id': username, + 'formats': formats, + 'description': livestream.get('content'), + 'thumbnail': livestream.get('thumbnailUrl'), + 'is_live': True, + 'timestamp': int_or_none(livestream.get('createdAt'), 1000), + 'view_count': int_or_none(livestream.get('watchingCount')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 75a53f54b..15f54a214 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1255,6 +1255,10 @@ from .udn import UDNEmbedIE from .ufctv import UFCTVIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE From b99f11a56b64b647366a00c335a4a55f3e9e1854 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Jul 2019 14:11:57 +0100 Subject: [PATCH 441/785] [dlive] restrict DLive Stream _VALID_URL regex --- youtube_dl/extractor/dlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py index b81eaecce..8787f15a6 100644 --- a/youtube_dl/extractor/dlive.py +++ b/youtube_dl/extractor/dlive.py @@ -55,7 +55,7 @@ class DLiveVODIE(InfoExtractor): class DLiveStreamIE(InfoExtractor): IE_NAME = 'dlive:stream' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?P[\w.-]+)' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P[\w.-]+)' def _real_extract(self, url): display_name = self._match_id(url) From 5f562bd4bbc780e535e187efb36659247b41d6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 00:09:39 +0700 Subject: [PATCH 442/785] [spankbang] Fix extraction (closes #21763, closes #21764) --- youtube_dl/extractor/spankbang.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index f11d728ca..eb0919e3a 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -106,6 +106,8 @@ class SpankBangIE(InfoExtractor): for format_id, format_url in stream.items(): if format_id.startswith(STREAM_URL_PREFIX): + if format_url and isinstance(format_url, list): + format_url = format_url[0] extract_format( format_id[len(STREAM_URL_PREFIX):], format_url) From f9eeeda31c1a643aced8283440983f3a45208840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 00:21:39 +0700 Subject: [PATCH 443/785] [spankbang] Fix and improve metadata extraction --- youtube_dl/extractor/spankbang.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index eb0919e3a..e040ada29 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + merge_dicts, orderedSet, parse_duration, parse_resolution, @@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor): 'description': 'dillion harper masturbates on a bed', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', + 'timestamp': 1422571989, + 'upload_date': '20150129', 'age_limit': 18, } }, { @@ -113,26 +116,29 @@ class SpankBangIE(InfoExtractor): self._sort_formats(formats) + info = self._search_json_ld(webpage, video_id, default={}) + title = self._html_search_regex( - r'(?s)]*>(.+?)', webpage, 'title') + r'(?s)]*>(.+?)', webpage, 'title', default=None) description = self._search_regex( r']+\bclass=["\']bottom[^>]+>\s*

    [^<]*

    \s*

    ([^<]+)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r'class="user"[^>]*>]+>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + uploader = self._html_search_regex( + (r'(?s)]+class=["\']profile[^>]+>(.+?)', + r'class="user"[^>]*>]+>([^<]+)'), webpage, 'uploader', default=None) duration = parse_duration(self._search_regex( r']+\bclass=["\']right_side[^>]+>\s*([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) age_limit = self._rta_search(webpage) - return { + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': title or video_id, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -140,7 +146,8 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - } + }, info + ) class SpankBangPlaylistIE(InfoExtractor): From b7ef93f0ab2963047953be1472a5a108d92b621c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 01:19:17 +0700 Subject: [PATCH 444/785] [twitter] Improve uploader id extraction (closes #21705) --- youtube_dl/extractor/twitter.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 41d0b6be8..cebb6238c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -428,11 +428,22 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/foobar/status/1087791357756956680', + 'info_dict': { + 'id': '1087791357756956680', + 'ext': 'mp4', + 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:66d493500c013e3e2d434195746a7f78', + 'uploader': 'Twitter', + 'uploader_id': 'Twitter', + 'duration': 61.567, + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user_id') twid = mobj.group('id') webpage, urlh = self._download_webpage_handle( @@ -441,8 +452,13 @@ class TwitterIE(InfoExtractor): if 'twitter.com/account/suspended' in urlh.geturl(): raise ExtractorError('Account suspended by Twitter.', expected=True) - if user_id is None: - mobj = re.match(self._VALID_URL, urlh.geturl()) + user_id = None + + redirect_mobj = re.match(self._VALID_URL, urlh.geturl()) + if redirect_mobj: + user_id = redirect_mobj.group('user_id') + + if not user_id: user_id = mobj.group('user_id') username = remove_end(self._og_search_title(webpage), ' on Twitter') From ba036333bf4ebcba70deb51e77e81dca723fb54d Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Sun, 14 Jul 2019 01:23:22 +0700 Subject: [PATCH 445/785] [youtube] Add more invidious instances to _VALID_URL (#21694) --- youtube_dl/extractor/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8a3c502ba..762611b89 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -371,10 +371,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| + # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances (?:(?:www|dev)\.)?invidio\.us/| - (?:www\.)?invidiou\.sh/| - (?:www\.)?invidious\.snopyta\.org/| + (?:(?:www|no)\.)?invidiou\.sh/| + (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?invidious\.enkirton\.net/| + (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls From d89a0a8026e0010a96a1309d70f8fcc2164dd5a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 01:43:31 +0700 Subject: [PATCH 446/785] [lynda] Handle missing subtitles (closes #20490, closes #20513) --- youtube_dl/extractor/lynda.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 3084c6dff..b3d8653d0 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', 'only_matching': True, + }, { + # Status="NotFound", Message="Transcript not found" + 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html', + 'only_matching': True, }] def _raise_unavailable(self, video_id): @@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE): def _get_subtitles(self, video_id): url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id - subs = self._download_json(url, None, False) + subs = self._download_webpage( + url, video_id, 'Downloading subtitles JSON', fatal=False) + if not subs or 'Status="NotFound"' in subs: + return {} + subs = self._parse_json(subs, video_id, fatal=False) + if not subs: + return {} fixed_subs = self._fix_subtitles(subs) if fixed_subs: return {'en': [{'ext': 'srt', 'data': fixed_subs}]} - else: - return {} + return {} class LyndaCourseIE(LyndaBaseIE): From c452790a796730113dd62db0e743b11045606e27 Mon Sep 17 00:00:00 2001 From: aerworker Date: Sat, 13 Jul 2019 22:38:47 +0300 Subject: [PATCH 447/785] [yandexmusic] Add support for multi disk albums and extract track number and disk number (closes #21420) (#21421) * [yandexmusic] extract tracks from all volumes of an album (closes #21420) * [yandexmusic] extract genre, disk_number and track_number * [yandexmusic] extract decomposed artist names * Update yandexmusic.py * Update yandexmusic.py * Update yandexmusic.py --- youtube_dl/extractor/yandexmusic.py | 63 +++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 1dfee59e9..fea817419 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -51,23 +51,43 @@ class YandexMusicTrackIE(YandexMusicBaseIE): IE_DESC = 'Яндекс.Музыка - Трек' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/track/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508/track/4878838', 'md5': 'f496818aa2f60b6c0062980d2e00dc20', 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, 'track': 'Gypsy Eyes 1', 'album': 'Gypsy Soul', 'album_artist': 'Carlo Ambrosio', - 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari', + 'artist': 'Carlo Ambrosio & Fabio Di Bari', 'release_year': 2009, }, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + # multiple disks + 'url': 'http://music.yandex.ru/album/3840501/track/705105', + 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', + 'info_dict': { + 'id': '705105', + 'ext': 'mp3', + 'title': 'Hooverphonic - Sometimes', + 'filesize': 5743386, + 'duration': 239.27, + 'track': 'Sometimes', + 'album': 'The Best of Hooverphonic', + 'album_artist': 'Hooverphonic', + 'artist': 'Hooverphonic', + 'release_year': 2016, + 'genre': 'pop', + 'disc_number': 2, + 'track_number': 9, + }, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -110,9 +130,21 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'abr': int_or_none(download_data.get('bitrate')), } + def extract_artist_name(artist): + decomposed = artist.get('decomposed') + if not isinstance(decomposed, list): + return artist['name'] + parts = [artist['name']] + for element in decomposed: + if isinstance(element, dict) and element.get('name'): + parts.append(element['name']) + elif isinstance(element, compat_str): + parts.append(element) + return ''.join(parts) + def extract_artist(artist_list): if artist_list and isinstance(artist_list, list): - artists_names = [a['name'] for a in artist_list if a.get('name')] + artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] if artists_names: return ', '.join(artists_names) @@ -121,10 +153,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE): album = albums[0] if isinstance(album, dict): year = album.get('year') + disc_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['volume'])) + track_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['index'])) track_info.update({ 'album': album.get('title'), 'album_artist': extract_artist(album.get('artists')), 'release_year': int_or_none(year), + 'genre': album.get('genre'), + 'disc_number': disc_number, + 'track_number': track_number, }) track_artist = extract_artist(track.get('artists')) @@ -152,7 +191,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_DESC = 'Яндекс.Музыка - Альбом' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/?(\?|$)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508', 'info_dict': { 'id': '540508', @@ -160,7 +199,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): }, 'playlist_count': 50, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + 'url': 'https://music.yandex.ru/album/3840501', + 'info_dict': { + 'id': '3840501', + 'title': 'Hooverphonic - The Best of Hooverphonic (2016)', + }, + 'playlist_count': 33, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): album_id = self._match_id(url) @@ -169,7 +216,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, album_id, 'Downloading album JSON') - entries = self._build_playlist(album['volumes'][0]) + entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) title = '%s - %s' % (album['artists'][0]['name'], album['title']) year = album.get('year') From 2fe074a960773c2ec6f0a94a8c5fab5af8714651 Mon Sep 17 00:00:00 2001 From: hrimfaxi Date: Sun, 14 Jul 2019 03:57:44 +0800 Subject: [PATCH 448/785] [porn91] Fix extraction (#21312) --- youtube_dl/extractor/porn91.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 24c3600fe..20eac647a 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -39,7 +39,12 @@ class Porn91IE(InfoExtractor): r'

    ([^<]+)
    ', webpage, 'title') title = title.replace('\n', '') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + video_link_url = self._search_regex( + r']+id=["\']fm-video_link[^>]+>([^<]+)', + webpage, 'video link') + videopage = self._download_webpage(video_link_url, video_id) + + info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0] duration = parse_duration(self._search_regex( r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) From 364a2cb658a0db069a746ca5e25c8b589b3c509d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:07:02 +0700 Subject: [PATCH 449/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 45cb2746e..bc722b73c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version + +Extractors +* [porn91] Fix extraction (#21312) ++ [yandexmusic] Extract track number and disk number (#21421) ++ [yandexmusic] Add support for multi disk albums (#21420, #21421) +* [lynda] Handle missing subtitles (#20490, #20513) ++ [youtube] Add more invidious instances to URL regular expression (#21694) +* [twitter] Improve uploader id extraction (#21705) +* [spankbang] Fix and improve metadata extraction +* [spankbang] Fix extraction (#21763, #21764) ++ [dlive] Add support for dlive.tv (#18080) ++ [livejournal] Add support for livejournal.com (#21526) +* [roosterteeth] Fix free episode extraction (#16094) +* [dbtv] Fix extraction +* [bellator] Fix extraction +- [rudo] Remove extractor (#18430, #18474) +* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224) +* [bleacherreport] Fix Bleacher Report CMS extraction +* [espn] Fix fivethirtyeight.com extraction +* [5tv] Relax video URL regular expression and support https URLs +* [youtube] Fix is_live extraction (#21734) +* [youtube] Fix authentication (#11270) + + version 2019.07.12 Core From 0250161c5272e2794f33085f9f8c3f464d8ee996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:09:16 +0700 Subject: [PATCH 450/785] [yandexmusic] Add missing import --- youtube_dl/extractor/yandexmusic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index fea817419..08d35e04c 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, int_or_none, float_or_none, + try_get, ) From ce80cacefd70a2c268de2fb1d5838ce66ac9a683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:10:49 +0700 Subject: [PATCH 451/785] release 2019.07.14 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 4 +++- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index fcfadeb1f..80ca6d5f1 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.12 + [debug] youtube-dl version 2019.07.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 7e1a3d1c0..a4f3c4dd9 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 71782a104..9d82e1cd9 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 6bcfde1f8..ff82a7435 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.12 + [debug] youtube-dl version 2019.07.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 89d9c63aa..f692c663d 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index bc722b73c..be1606586 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.14 Extractors * [porn91] Fix extraction (#21312) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4e664336d..9ae6e5c96 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -223,6 +223,8 @@ - **DiscoveryNetworksDe** - **DiscoveryVR** - **Disney** + - **dlive:stream** + - **dlive:vod** - **Dotsub** - **DouyuShow** - **DouyuTV**: 斗鱼 @@ -448,6 +450,7 @@ - **linkedin:learning:course** - **LinuxAcademy** - **LiTV** + - **LiveJournal** - **LiveLeak** - **LiveLeakEmbed** - **livestream** @@ -754,7 +757,6 @@ - **rtve.es:television** - **RTVNH** - **RTVS** - - **Rudo** - **RUHD** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 18bcb33e2..8a7f4d733 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.12' +__version__ = '2019.07.14' From 898238e9f82e29ef139ff934f6949ddf574bd4d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 20:30:05 +0700 Subject: [PATCH 452/785] [youtube] Restrict is_live extraction (closes #21782) --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 762611b89..43a3fad9f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1896,9 +1896,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = int_or_none(video_details.get('viewCount')) if is_live is None: - is_live = bool_or_none(dict_get( - video_details, ('isLive', 'isLiveContent'), - skip_false_values=False)) + is_live = bool_or_none(video_details.get('isLive')) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: From 2adedc477ee4c87709ca8d1c9bdfac3c31b1a57b Mon Sep 17 00:00:00 2001 From: Gary <35942108+LameLemon@users.noreply.github.com> Date: Mon, 15 Jul 2019 18:53:20 +0300 Subject: [PATCH 453/785] [gfycat] Extend _VALID_URL (closes #21779) (#21780) --- youtube_dl/extractor/gfycat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index eb6f85836..bbe3cb283 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor): 'categories': list, 'age_limit': 0, } + }, { + 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish', + 'only_matching': True }, { 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', 'only_matching': True From 791d2e81172826ef645b62c6961c65f8c2cb2a4f Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Mon, 15 Jul 2019 22:54:22 +0700 Subject: [PATCH 454/785] [youtube] Add support for invidious.mastodon.host (#21777) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 43a3fad9f..a87a46b3b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -378,6 +378,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?invidious\.mastodon\.host/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains From f2a213d02596b603dea5be65f4778591101db5a2 Mon Sep 17 00:00:00 2001 From: tlonic Date: Mon, 15 Jul 2019 11:58:55 -0400 Subject: [PATCH 455/785] [einthusan] Add support for einthusan.com (closes #21748) (#21775) --- youtube_dl/extractor/einthusan.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 4485bf8c1..1fb00c9b0 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..compat import ( @@ -18,7 +19,7 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com))/movie/watch/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', @@ -32,6 +33,9 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', 'only_matching': True, + }, { + 'url': 'https://einthusan.com/movie/watch/9097/', + 'only_matching': True, }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js @@ -41,7 +45,9 @@ class EinthusanIE(InfoExtractor): )).decode('utf-8'), video_id) def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -53,7 +59,7 @@ class EinthusanIE(InfoExtractor): page_id = self._html_search_regex( ']+data-pageid="([^"]+)"', webpage, 'page ID') video_data = self._download_json( - 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id, + 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id, data=urlencode_postdata({ 'xEvent': 'UIVideoPlayer.PingOutcome', 'xJson': json.dumps({ From 7d4dd3e5b444c43c1cc19b53689514e8deaf3849 Mon Sep 17 00:00:00 2001 From: chien-yu <32920873+chien-yu@users.noreply.github.com> Date: Mon, 15 Jul 2019 09:03:03 -0700 Subject: [PATCH 456/785] [ctsnews] Fix YouTube embeds extraction (#21678) --- youtube_dl/extractor/ctsnews.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index d565335cf..dcda7e89d 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import unified_timestamp - +from .youtube import YoutubeIE class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' @@ -14,8 +14,8 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201501291578109', 'ext': 'mp4', - 'title': '以色列.真主黨交火 3人死亡', - 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...', + 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網', + 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...', 'timestamp': 1422528540, 'upload_date': '20150129', } @@ -26,7 +26,7 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201309031304098', 'ext': 'mp4', - 'title': '韓國31歲童顏男 貌如十多歲小孩', + 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網', 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1378205880, @@ -62,8 +62,7 @@ class CtsNewsIE(InfoExtractor): video_url = mp4_feed['source_url'] else: self.to_screen('Not CTSPlayer video, trying Youtube...') - youtube_url = self._search_regex( - r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url') + youtube_url = YoutubeIE._extract_url(page) return self.url_result(youtube_url, ie='Youtube') From 799756a3b3c794284ca52b9af482e1f03fc46833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:47:10 +0700 Subject: [PATCH 457/785] [kaltura] Check source format URL (#21290) --- youtube_dl/extractor/kaltura.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 639d73837..0a733424c 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor): { 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'only_matching': True, + }, + { + # unavailable source format + 'url': 'kaltura:513551:1_66x4rg7o', + 'only_matching': True, } ] @@ -306,12 +311,17 @@ class KalturaIE(InfoExtractor): f['fileExt'] = 'mp4' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + format_id = '%(fileExt)s-%(bitrate)s' % f + # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o) + if f.get('isOriginal') is True and not self._is_valid_url( + video_url, entry_id, format_id): + continue # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g # -f mp4-56) vcodec = 'none' if 'videoCodecId' not in f and f.get( 'frameRate') == 0 else f.get('videoCodecId') formats.append({ - 'format_id': '%(fileExt)s-%(bitrate)s' % f, + 'format_id': format_id, 'ext': f.get('fileExt'), 'tbr': int_or_none(f['bitrate']), 'fps': int_or_none(f.get('frameRate')), From f61496863d2207718a2a6cb1591dcbc7abc282de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:56:05 +0700 Subject: [PATCH 458/785] [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv (closes #21281, closes #21290) --- youtube_dl/extractor/asiancrush.py | 80 +++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 6d71c5ad5..0348e680c 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -5,14 +5,12 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - extract_attributes, - remove_end, -) +from ..utils import extract_attributes class AsianCrushIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P\d+)v\b' + _VALID_URL_BASE = r'https?://(?:www\.)?(?P(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))' + _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P\d+)v\b' % _VALID_URL_BASE _TESTS = [{ 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', 'md5': 'c3b740e48d0ba002a42c0b72857beae6', @@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor): 'id': '1_y4tmjm5r', 'ext': 'mp4', 'title': 'Women Who Flirt', - 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'description': 'md5:7e986615808bcfb11756eb503a751487', 'timestamp': 1496936429, 'upload_date': '20170608', 'uploader_id': 'craig@crifkin.com', @@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor): }, { 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', + 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor): r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') player = self._download_webpage( - 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + 'https://api.%s/embeddedVideoPlayer' % host, video_id, query={'id': entry_id}) kaltura_id = self._search_regex( @@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor): r'/p(?:artner_id)?/(\d+)', player, 'partner id', default='513551') - return self.url_result( - 'kaltura:%s:%s' % (partner_id, kaltura_id), - ie=KalturaIE.ie_key(), video_id=kaltura_id, - video_title=title) + description = self._html_search_regex( + r'(?s)]+\bclass=["\']description["\'][^>]*>(.+?)
    ', + webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': KalturaIE.ie_key(), + 'id': video_id, + 'title': title, + 'description': description, + } class AsianCrushPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P\d+)s\b' - _TEST = { + _VALID_URL = r'%s/series/0+(?P\d+)s\b' % AsianCrushIE._VALID_URL_BASE + _TESTS = [{ 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', 'info_dict': { 'id': '12481', @@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor): 'description': 'md5:7addd7c5132a09fd4741152d96cce886', }, 'playlist_count': 20, - } + }, { + 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', + 'only_matching': True, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor): entries.append(self.url_result( mobj.group('url'), ie=AsianCrushIE.ie_key())) - title = remove_end( - self._html_search_regex( - r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, - 'title', default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', - default=None) or self._search_regex( - r'([^<]+)', webpage, 'title', fatal=False), - ' | AsianCrush') + title = self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False) + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) description = self._og_search_description( webpage, default=None) or self._html_search_meta( From 8b4a0ebf10376e89daa9da3cd9570a3f16f8f375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:59:23 +0700 Subject: [PATCH 459/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index be1606586..a67b0595a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version + +Extractors ++ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv + (#21281, #21290) +* [kaltura] Check source format URL (#21290) +* [ctsnews] Fix YouTube embeds extraction (#21678) ++ [einthusan] Add support for einthusan.com (#21748, #21775) ++ [youtube] Add support for invidious.mastodon.host (#21777) ++ [gfycat] Extend URL regular expression (#21779, #21780) +* [youtube] Restrict is_live extraction (#21782) + + version 2019.07.14 Extractors From 2f1991ff1461b11134d2b09d6e8d681ce51d93d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Jul 2019 00:01:46 +0700 Subject: [PATCH 460/785] release 2019.07.16 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 80ca6d5f1..89001802b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.14 + [debug] youtube-dl version 2019.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a4f3c4dd9..4cc58fa42 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 9d82e1cd9..f38760b77 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index ff82a7435..e4133dc4e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.14 + [debug] youtube-dl version 2019.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index f692c663d..0bb6543e3 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index a67b0595a..fe0ca7164 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.16 Extractors + [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8a7f4d733..b0f5a6b47 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.14' +__version__ = '2019.07.16' From 1824bfdcdff1af4bfc4f7f6ed885d45ee7e8c376 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Jul 2019 22:51:10 +0100 Subject: [PATCH 461/785] [vrv] fix CMS signing query extraction(closes #21809) --- youtube_dl/extractor/vrv.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index c814a8a4a..6e51469b0 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -64,7 +64,15 @@ class VRVBaseIE(InfoExtractor): def _call_cms(self, path, video_id, note): if not self._CMS_SIGNING: - self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] + index = self._call_api('index', video_id, 'CMS Signing') + self._CMS_SIGNING = index.get('cms_signing') or {} + if not self._CMS_SIGNING: + for signing_policy in index.get('signing_policies', []): + signing_path = signing_policy.get('path') + if signing_path and signing_path.startswith('/cms/'): + name, value = signing_policy.get('name'), signing_policy.get('value') + if name and value: + self._CMS_SIGNING[name] = value return self._download_json( self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) From 5e1c39ac853bfe4da7feda2a48544cb5811873d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 17 Jul 2019 17:47:53 +0200 Subject: [PATCH 462/785] [extractor/common] Fix typo in thumbnails resolution description (#21817) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c3e9eec6..859786617 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -220,7 +220,7 @@ class InfoExtractor(object): * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) - * "resolution" (optional, string "{width}x{height"}, + * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. From 9c1da4a9f9fc17cffc2fa2261030c66d2a032a58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Jul 2019 23:08:26 +0700 Subject: [PATCH 463/785] [extractor/generic] Restrict --default-search schemeless URLs detection pattern (closes #21842) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 77e217460..d34fc4b15 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2226,7 +2226,7 @@ class GenericIE(InfoExtractor): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if '/' in url: + if re.match(r'^[^\s/]+\.[^\s/]+/', url): self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': From 2e18adec98a44ca839cbaaed7ce27d8d07f54cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Jul 2019 23:46:34 +0700 Subject: [PATCH 464/785] [youtube:playlist] Relax _VIDEO_RE (closes #21844) --- youtube_dl/extractor/youtube.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a87a46b3b..aa316ba88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2432,7 +2432,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' + _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', @@ -2556,6 +2556,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'noplaylist': True, 'skip_download': True, }, + }, { + # https://github.com/ytdl-org/youtube-dl/issues/21844 + 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'info_dict': { + 'title': 'Data Analysis with Dr Mike Pound', + 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'uploader_id': 'Computerphile', + 'uploader': 'Computerphile', + }, + 'playlist_mincount': 11, }, { 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 'only_matching': True, From 13a75688a55f32cde316b0f7d5992ff4a1f6d279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Jul 2019 00:01:46 +0700 Subject: [PATCH 465/785] [youtube] Fix some tests --- youtube_dl/extractor/youtube.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aa316ba88..b2c714505 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2455,6 +2455,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': '29C3: Not my department', 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'uploader': 'Christiaan008', + 'uploader_id': 'ChRiStIaAn008', }, 'playlist_count': 95, }, { @@ -2463,6 +2465,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': '[OLD]Team Fortress 2 (Class-based LP)', 'id': 'PLBB231211A4F62143', + 'uploader': 'Wickydoo', + 'uploader_id': 'Wickydoo', }, 'playlist_mincount': 26, }, { @@ -2471,6 +2475,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'Uploads from Cauchemar', 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', + 'uploader': 'Cauchemar', + 'uploader_id': 'Cauchemar89', }, 'playlist_mincount': 799, }, { @@ -2488,13 +2494,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'JODA15', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'uploader': 'milan', + 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw', } }, { 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'playlist_mincount': 485, 'info_dict': { - 'title': '2017 華語最新單曲 (2/24更新)', + 'title': '2018 Chinese New Singles (11/6 updated)', 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + 'uploader': 'LBK', + 'uploader_id': 'sdragonfang', } }, { 'note': 'Embedded SWF player', @@ -2503,13 +2513,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'JODA7', 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ', - } + }, + 'skip': 'This playlist does not exist', }, { 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'info_dict': { 'title': 'Uploads from Interstellar Movie', 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', + 'uploader': 'Interstellar Movie', + 'uploader_id': 'InterstellarMovie1', }, 'playlist_mincount': 21, }, { @@ -2534,6 +2547,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video is not available.', 'add_ie': [YoutubeIE.ie_key()], }, { 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', @@ -2545,7 +2559,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'uploader_id': 'backuspagemuseum', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 'upload_date': '20161008', - 'license': 'Standard YouTube License', 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', 'categories': ['Nonprofits & Activism'], 'tags': list, @@ -2732,6 +2745,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w', 'title': 'Uploads from lex will', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', } }, { 'note': 'Age restricted channel', @@ -2741,6 +2756,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', + 'uploader': 'Deus Ex', + 'uploader_id': 'DeusExOfficial', }, }, { 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', @@ -2825,6 +2842,8 @@ class YoutubeUserIE(YoutubeChannelIE): 'info_dict': { 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ', 'title': 'Uploads from The Linux Foundation', + 'uploader': 'The Linux Foundation', + 'uploader_id': 'TheLinuxFoundation', } }, { # Only available via https://www.youtube.com/c/12minuteathlete/videos @@ -2834,6 +2853,8 @@ class YoutubeUserIE(YoutubeChannelIE): 'info_dict': { 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ', 'title': 'Uploads from 12 Minute Athlete', + 'uploader': '12 Minute Athlete', + 'uploader_id': 'the12minuteathlete', } }, { 'url': 'ytuser:phihag', @@ -2927,7 +2948,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', - 'title': 'Thirst for Science', + 'title': 'ThirstForScience', }, }, { # with "Load more" button @@ -2944,6 +2965,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): 'id': 'UCiU1dHvZObB2iP6xkJ__Icw', 'title': 'Chem Player', }, + 'skip': 'Blocked', }] From 3b446ab3519948980630e3328b971385826ffba8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 20 Jul 2019 20:20:30 +0100 Subject: [PATCH 466/785] [discovery] add support go.discovery.com URLs --- youtube_dl/extractor/discovery.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index b70c307a7..9003545ce 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -19,9 +19,9 @@ from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): _VALID_URL = r'''(?x)https?:// (?P<site> + (?:(?:www|go)\.)?discovery| (?:www\.)? (?: - discovery| investigationdiscovery| discoverylife| animalplanet| @@ -56,6 +56,9 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', 'only_matching': True, + }, { + 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False From ab794a553c36ddd690e2243450653c3ede43e606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Jul 2019 13:20:21 +0700 Subject: [PATCH 467/785] [ctsnews] PEP 8 --- youtube_dl/extractor/ctsnews.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index dcda7e89d..679f1d92e 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import unified_timestamp from .youtube import YoutubeIE + class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' From 608b8a4300fab7792e637fd9b7045adf1c0cb2aa Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Mon, 22 Jul 2019 02:59:36 +0900 Subject: [PATCH 468/785] [yahoo:japannews] Add extractor (closes #21698) (#21265) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yahoo.py | 131 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15f54a214..06de556b7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1448,6 +1448,7 @@ from .yahoo import ( YahooSearchIE, YahooGyaOPlayerIE, YahooGyaOIE, + YahooJapanNewsIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index a3b5f00c8..e5ebdd180 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib import itertools import json import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( + compat_str, compat_urllib_parse, compat_urlparse, ) @@ -18,7 +20,9 @@ from ..utils import ( int_or_none, mimetype2ext, smuggle_url, + try_get, unescapeHTML, + url_or_none, ) from .brightcove import ( @@ -556,3 +560,130 @@ class YahooGyaOIE(InfoExtractor): 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'), YahooGyaOPlayerIE.ie_key(), video_id)) return self.playlist_result(entries, program_id) + + +class YahooJapanNewsIE(InfoExtractor): + IE_NAME = 'yahoo:japannews' + IE_DESC = 'Yahoo! Japan News' + _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?' + _GEO_COUNTRIES = ['JP'] + _TESTS = [{ + 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int', + 'info_dict': { + 'id': '1736242', + 'ext': 'mp4', + 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース', + 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))', + 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$', + }, + 'params': { + 'skip_download': True, + }, + }, { + # geo restricted + 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04', + 'only_matching': True, + }, { + 'url': 'https://headlines.yahoo.co.jp/videonews/', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp/feature/1356', + 'only_matching': True + }] + + def _extract_formats(self, json_data, content_id): + formats = [] + + video_data = try_get( + json_data, + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) + for vid in video_data or []: + delivery = vid.get('delivery') + url = url_or_none(vid.get('Url')) + if not delivery or not url: + continue + elif delivery == 'hls': + formats.extend( + self._extract_m3u8_formats( + url, content_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': url, + 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')), + 'height': int_or_none(vid.get('height')), + 'width': int_or_none(vid.get('width')), + 'tbr': int_or_none(vid.get('bitrate')), + }) + self._remove_duplicate_formats(formats) + self._sort_formats(formats) + + return formats + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + display_id = mobj.group('id') or host + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage, 'title', default=None + ) or self._html_search_regex('<title>([^<]+)', webpage, 'title') + + if display_id == host: + # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) + stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage) + entries = [ + self.url_result( + smuggle_url( + 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id, + {'geo_countries': ['JP']}), + ie='BrightcoveNew', video_id=plist_id) + for plist_id in stream_plists] + return self.playlist_result(entries, playlist_title=title) + + # Article page + description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image', webpage, 'thumbnail', default=None) + space_id = self._search_regex([ + r']+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)', + r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)', + r' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.16 + [debug] youtube-dl version 2019.07.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 4cc58fa42..aeca69974 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index f38760b77..e232df726 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index e4133dc4e..8608a085c 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.16 + [debug] youtube-dl version 2019.07.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 0bb6543e3..64864a3b7 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 08e58524e..32070b8d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.27 Extractors + [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9ae6e5c96..7cf60eefe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1117,6 +1117,7 @@ - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - **yahoo:gyao:player** + - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b0f5a6b47..e3e37b8c5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.16' +__version__ = '2019.07.27' From 8dbf751aa241475dd8a7a6d3040713b5874fd057 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 30 Jul 2019 00:13:33 +0100 Subject: [PATCH 472/785] [youtube] improve title and description extraction(closes #21934) --- youtube_dl/extractor/youtube.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b2c714505..9a182fcf6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1820,16 +1820,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} - # title - if 'title' in video_info: - video_title = video_info['title'][0] - elif 'title' in player_response: - video_title = video_details['title'] - else: + video_title = video_info.get('title', [None])[0] or video_details.get('title') + if not video_title: self._downloader.report_warning('Unable to extract video title') video_title = '_' - # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: @@ -1854,11 +1849,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - fd_mobj = re.search(r' Date: Tue, 30 Jul 2019 09:41:23 +0700 Subject: [PATCH 473/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 32070b8d5..0dbfc4dbf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [youtube] Fix and improve title and description extraction (#21934) + + version 2019.07.27 Extractors From 85c2c4b4abea4618be8013d41f6ba9e95c4e5e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Jul 2019 09:43:47 +0700 Subject: [PATCH 474/785] release 2019.07.30 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 06322bb2f..ccd033716 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.27 + [debug] youtube-dl version 2019.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index aeca69974..8709937ad 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e232df726..c3a555ed3 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8608a085c..07042a466 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.27 + [debug] youtube-dl version 2019.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 64864a3b7..4cf75a2eb 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 0dbfc4dbf..f6f1f7e38 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.30 Extractors * [youtube] Fix and improve title and description extraction (#21934) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e3e37b8c5..04dc83605 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.27' +__version__ = '2019.07.30' From c2d125d99f81aa33429b2158acd9a90524575378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 00:14:08 +0700 Subject: [PATCH 475/785] [youtube] Improve metadata extraction for age gate content (closes #21943) --- youtube_dl/extractor/youtube.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9a182fcf6..1aee0e465 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1700,6 +1700,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_token(v_info): return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + def extract_player_response(player_response, video_id): + pl_response = str_or_none(player_response) + if not pl_response: + return + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + add_dash_mpd_pr(pl_response) + return pl_response + player_response = {} # Get video info @@ -1722,7 +1731,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): note='Refetching age-gated info webpage', errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) + pl_response = video_info.get('player_response', [None])[0] + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(video_info) + view_count = extract_view_count(video_info) else: age_gate = False video_info = None @@ -1745,11 +1757,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = True sts = ytplayer_config.get('sts') if not player_response: - pl_response = str_or_none(args.get('player_response')) - if pl_response: - pl_response = self._parse_json(pl_response, video_id, fatal=False) - if isinstance(pl_response, dict): - player_response = pl_response + player_response = extract_player_response(args.get('player_response'), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) # We also try looking in get_video_info since it may contain different dashmpd @@ -1781,9 +1789,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): get_video_info = compat_parse_qs(video_info_webpage) if not player_response: pl_response = get_video_info.get('player_response', [None])[0] - if isinstance(pl_response, dict): - player_response = pl_response - add_dash_mpd_pr(player_response) + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) From 2c8b1a21e8901904ab674264f5eda118bca992a5 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Tue, 30 Jul 2019 19:40:50 +0100 Subject: [PATCH 476/785] [openload] Add support for oload.best (#21913) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 11e92e471..030355257 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -368,6 +368,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', 'only_matching': True, + }, { + 'url': 'https://oload.best/embed/kkz9JgVZeWc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 07ab44c420a79d1faae09d00323242746e522c4c Mon Sep 17 00:00:00 2001 From: CeruleanSky Date: Tue, 30 Jul 2019 14:43:49 -0400 Subject: [PATCH 477/785] [dlive] Relax _VALID_URL (#21909) --- youtube_dl/extractor/dlive.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py index 8787f15a6..d95c67a5b 100644 --- a/youtube_dl/extractor/dlive.py +++ b/youtube_dl/extractor/dlive.py @@ -9,8 +9,8 @@ from ..utils import int_or_none class DLiveVODIE(InfoExtractor): IE_NAME = 'dlive:vod' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[a-zA-Z0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', 'info_dict': { 'id': '3mTzOl4WR', @@ -20,7 +20,10 @@ class DLiveVODIE(InfoExtractor): 'timestamp': 1562011015, 'uploader_id': 'pdp', } - } + }, { + 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg', + 'only_matching': True, + }] def _real_extract(self, url): uploader_id, vod_id = re.match(self._VALID_URL, url).groups() From 72791634127cc3093592c807225ec684af1cfcc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 02:31:19 +0700 Subject: [PATCH 478/785] [tvn24] Fix metadata extraction (closes #21833, closes #21834) --- youtube_dl/extractor/tvn24.py | 42 +++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 6590e1fd0..39f57ae6b 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + NO_DEFAULT, unescapeHTML, ) @@ -20,6 +21,18 @@ class TVN24IE(InfoExtractor): 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', 'thumbnail': 're:https?://.*[.]jpeg', } + }, { + # different layout + 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', + 'info_dict': { + 'id': '1771763', + 'ext': 'mp4', + 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', + 'thumbnail': 're:https?://.*', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', 'only_matching': True, @@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - title = self._og_search_title(webpage) + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r']+class=["\']magazineItemHeader[^>]+>(.+?)(?!\1).+?)\1' % attr, webpage, - name, group='json', fatal=fatal) or '{}', - video_id, transform_source=unescapeHTML, fatal=fatal) + name, group='json', default=default, fatal=fatal) or '{}', + display_id, transform_source=unescapeHTML, fatal=fatal) quality_data = extract_json('data-quality', 'formats') @@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor): }) self._sort_formats(formats) - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_regex( r'\bdata-poster=(["\'])(?P(?!\1).+?)\1', webpage, 'thumbnail', group='url') + video_id = None + share_params = extract_json( - 'data-share-params', 'share params', fatal=False) + 'data-share-params', 'share params', default=None) if isinstance(share_params, dict): - video_id = share_params.get('id') or video_id + video_id = share_params.get('id') + + if not video_id: + video_id = self._search_regex( + r'data-vid-id=["\'](\d+)', webpage, 'video id', + default=None) or self._search_regex( + r',(\d+)\.html', url, 'video id', default=display_id) return { 'id': video_id, From 766c4f6090fdea635f50597a3c5d60643e3a2913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 02:32:02 +0700 Subject: [PATCH 479/785] [tvn24] Fix test --- youtube_dl/extractor/tvn24.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 39f57ae6b..de0fb5063 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor): 'id': '1584444', 'ext': 'mp4', 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', - 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', 'thumbnail': 're:https?://.*[.]jpeg', } }, { From 9a37ff82f17383336251afcd80821620dd86ee95 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Wed, 31 Jul 2019 13:45:02 -0700 Subject: [PATCH 480/785] [mgtv] Extract format_note (#21881) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit format_note should now show 标清, 高清, 超清, 蓝光, etc. --- youtube_dl/extractor/mgtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 7ae2e3c3b..71fc3ec56 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -82,6 +82,7 @@ class MGTVIE(InfoExtractor): 'http_headers': { 'Referer': url, }, + 'format_note': stream.get('name'), }) self._sort_formats(formats) From 826dcff99cd0a44ec5fa94f0e0201f5115d097ef Mon Sep 17 00:00:00 2001 From: cantandwont <52587695+cantandwont@users.noreply.github.com> Date: Thu, 1 Aug 2019 06:54:39 +1000 Subject: [PATCH 481/785] Output batch filename when it could not be read (#21915) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 165c975dd..9a659fc65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -94,7 +94,7 @@ def _real_main(argv=None): if opts.verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: - sys.exit('ERROR: batch file could not be read') + sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From 535111657b507d4f4454160aaf2587e7ce6b9936 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Aug 2019 22:44:38 +0100 Subject: [PATCH 482/785] [discovery] use API call for video data extraction(#21808) --- youtube_dl/extractor/discovery.py | 59 ++++++++++++++----------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 9003545ce..c4b90cd90 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -5,14 +5,8 @@ import re import string from .discoverygo import DiscoveryGoBaseIE -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - try_get, -) +from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError from ..compat import compat_HTTPError @@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))''' + )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ - 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', + 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { - 'id': '5a2d9b4d6b66d17a5026e1fd', + 'id': '5a2f35ce6b66d17a5026e29e', 'ext': 'mp4', - 'title': 'Dave Foley', - 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', - 'duration': 608, + 'title': 'Riding with Matthew Perry', + 'description': 'md5:a34333153e79bc4526019a5129e7f878', + 'duration': 84, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE): }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False + _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, path, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - - react_data = self._parse_json(self._search_regex( - r'window\.__reactTransmitPacket\s*=\s*({.+?});', - webpage, 'react data'), display_id) - content_blocks = react_data['layout'][path]['contentBlocks'] - video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0] - video_id = video['id'] + site, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE): if auth_storage_cookie and auth_storage_cookie.value: auth_storage = self._parse_json(compat_urllib_parse_unquote( compat_urllib_parse_unquote(auth_storage_cookie.value)), - video_id, fatal=False) or {} + display_id, fatal=False) or {} access_token = auth_storage.get('a') or auth_storage.get('access_token') if not access_token: access_token = self._download_json( - 'https://%s.com/anonymous' % site, display_id, query={ + 'https://%s.com/anonymous' % site, display_id, + 'Downloading token JSON metadata', query={ 'authRel': 'authorization', - 'client_id': try_get( - react_data, lambda x: x['application']['apiClientId'], - compat_str) or '3020a40c2356a645b4b4', + 'client_id': '3020a40c2356a645b4b4', 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, })['access_token'] - try: - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token + headers = self.geo_verification_headers() + headers['Authorization'] = 'Bearer ' + access_token + try: + video = self._download_json( + self._API_BASE_URL + 'content/videos', + display_id, 'Downloading content JSON metadata', + headers=headers, query={ + 'slug': display_id, + })[0] + video_id = video['id'] stream = self._download_json( - 'https://api.discovery.com/v1/streaming/video/' + video_id, - display_id, headers=headers) + self._API_BASE_URL + 'streaming/video/' + video_id, + display_id, 'Downloading streaming JSON metadata', headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): e_description = self._parse_json( From 07f3a05c87619d01c195cad8cd57ec72291ad78d Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Fri, 2 Aug 2019 06:49:01 +0900 Subject: [PATCH 483/785] [CONTRIBUTING.md] Add some more coding conventions (#21939) --- CONTRIBUTING.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd9ccbe96..d0e0a5637 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + From 33b529fabd282a371d3a4c21ee861badd20dae28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:03:25 +0700 Subject: [PATCH 484/785] [yandexvideo] Add support for DASH formats (#21971) --- youtube_dl/extractor/yandexvideo.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py index 1aea95383..46529be05 100644 --- a/youtube_dl/extractor/yandexvideo.py +++ b/youtube_dl/extractor/yandexvideo.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, url_or_none, ) @@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor): # episode, sports 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d', 'only_matching': True, + }, { + # DASH with DRM + 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', + 'only_matching': True, }] def _real_extract(self, url): @@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor): 'disable_trackings': 1, })['content'] - m3u8_url = url_or_none(content.get('content_url')) or url_or_none( + content_url = url_or_none(content.get('content_url')) or url_or_none( content['streams'][0]['url']) title = content.get('title') or content.get('computed_title') - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + ext = determine_ext(content_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + content_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mpd': + formats = self._extract_mpd_formats( + content_url, video_id, mpd_id='dash') + else: + formats = [{'url': content_url}] + self._sort_formats(formats) description = content.get('description') From be306d6a313903a3ebdb8a8ff055bb6b58c9f818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:25:01 +0700 Subject: [PATCH 485/785] [tvigle] Fix extraction and add support for HLS and DASH formats (closes #21967) --- youtube_dl/extractor/tvigle.py | 53 +++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 3475ef4c3..180259aba 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -9,6 +9,8 @@ from ..utils import ( float_or_none, int_or_none, parse_age_limit, + try_get, + url_or_none, ) @@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.tvigle.ru/video/sokrat/', - 'md5': '36514aed3657d4f70b4b2cef8eb520cd', 'info_dict': { 'id': '1848932', 'display_id': 'sokrat', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Сократ', 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17', 'duration': 6586, @@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor): }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', - 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', 'info_dict': { 'id': '5142516', 'ext': 'flv', @@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( (r']+class=["\']player["\'][^>]+id=["\'](\d+)', - r'var\s+cloudId\s*=\s*["\'](\d+)', + r'cloudId\s*=\s*["\'](\d+)', r'class="video-preview current_playing" id="(\d+)"'), webpage, 'video id') @@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor): age_limit = parse_age_limit(item.get('ageRestrictions')) formats = [] - for vcodec, fmts in item['videos'].items(): + for vcodec, url_or_fmts in item['videos'].items(): if vcodec == 'hls': - continue - for format_id, video_url in fmts.items(): - if format_id == 'm3u8': + m3u8_url = url_or_none(url_or_fmts) + if not m3u8_url: continue - height = self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None) - formats.append({ - 'url': video_url, - 'format_id': '%s-%s' % (vcodec, format_id), - 'vcodec': vcodec, - 'height': int_or_none(height), - 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), - }) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif vcodec == 'dash': + mpd_url = url_or_none(url_or_fmts) + if not mpd_url: + continue + formats.extend(self._extract_mpd_formats( + mpd_url, video_id, mpd_id='dash', fatal=False)) + else: + if not isinstance(url_or_fmts, dict): + continue + for format_id, video_url in url_or_fmts.items(): + if format_id == 'm3u8': + continue + video_url = url_or_none(video_url) + if not video_url: + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + filesize = int_or_none(try_get( + item, lambda x: x['video_files_size'][vcodec][format_id])) + formats.append({ + 'url': video_url, + 'format_id': '%s-%s' % (vcodec, format_id), + 'vcodec': vcodec, + 'height': int_or_none(height), + 'filesize': filesize, + }) self._sort_formats(formats) return { From 2e9522b06173f2c5cfb2ba020958242d2a93feb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:36:32 +0700 Subject: [PATCH 486/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6f1f7e38..c650e25d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version + +Extractors ++ [tvigle] Add support for HLS and DASH formats (#21967) +* [tvigle] Fix extraction (#21967) ++ [yandexvideo] Add support for DASH formats (#21971) +* [discovery] Use API call for video data extraction (#21808) ++ [mgtv] Extract format_note (#21881) +* [tvn24] Fix metadata extraction (#21833, #21834) +* [dlive] Relax URL regular expression (#21909) ++ [openload] Add support for oload.best (#21913) +* [youtube] Improve metadata extraction for age gate content (#21943) + + version 2019.07.30 Extractors From 4f2d735803f723a8d8d6ffbbb1dd6b203f71af58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:37:54 +0700 Subject: [PATCH 487/785] release 2019.08.02 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +- .../ISSUE_TEMPLATE/2_site_support_request.md | 4 +- .../ISSUE_TEMPLATE/3_site_feature_request.md | 4 +- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 +- CONTRIBUTING.md | 64 ------------------- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 8 files changed, 14 insertions(+), 78 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index ccd033716..4d3894ad3 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.30 + [debug] youtube-dl version 2019.08.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8709937ad..796e11e54 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index c3a555ed3..aa2348548 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 07042a466..5b2501a65 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.30 + [debug] youtube-dl version 2019.08.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4cf75a2eb..d1758a95c 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d0e0a5637..cd9ccbe96 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -366,67 +366,3 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` -### Inline values - -Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. - -#### Example - -Correct: - -```python -title = self._html_search_regex(r'([^<]+)', webpage, 'title') -``` - -Incorrect: - -```python -TITLE_RE = r'([^<]+)' -# ...some lines of code... -title = self._html_search_regex(TITLE_RE, webpage, 'title') -``` - -### Collapse fallbacks - -Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values. - -#### Example - -Good: - -```python -description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], - webpage, 'description', default=None) -``` - -Unwieldy: - -```python -description = ( - self._og_search_description(webpage, default=None) - or self._html_search_meta('description', webpage, default=None) - or self._html_search_meta('twitter:description', webpage, default=None)) -``` - -### Trailing parentheses - -Always move trailing parentheses after the last argument. - -#### Example - -Correct: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list) -``` - -Incorrect: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list, -) -``` - diff --git a/ChangeLog b/ChangeLog index c650e25d5..7db147498 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.08.02 Extractors + [tvigle] Add support for HLS and DASH formats (#21967) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 04dc83605..0f7fdb23d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.30' +__version__ = '2019.08.02' From d9d3a5a816253f14ee33623662690293365013e0 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Fri, 2 Aug 2019 05:54:56 +0700 Subject: [PATCH 488/785] [README.md] Move code from #21939 to the right place --- README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/README.md b/README.md index 8c48a3012..c39b13616 100644 --- a/README.md +++ b/README.md @@ -1216,6 +1216,72 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. From 995f319b0605188d145c78b88319d38b69130132 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Aug 2019 18:08:26 +0100 Subject: [PATCH 489/785] [discovery] limit video data by show slug(closes #21980) --- youtube_dl/extractor/discovery.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index c4b90cd90..6287ca685 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -34,7 +34,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)''' + )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { @@ -53,13 +53,17 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', 'only_matching': True, + }, { + # using `show_slug` is important to get the correct video data + 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() + site, show_slug, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -91,6 +95,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): display_id, 'Downloading content JSON metadata', headers=headers, query={ 'slug': display_id, + 'show_slug': show_slug, })[0] video_id = video['id'] stream = self._download_json( From 5efbc1366f4e4d9d4969cbfb404657349a5b3f99 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Aug 2019 19:38:35 +0100 Subject: [PATCH 490/785] [roosterteeth] add support for watch URLs --- youtube_dl/extractor/roosterteeth.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index d3eeeba62..8d88ee499 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -17,7 +17,7 @@ from ..utils import ( class RoosterTeethIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P[^/?#&]+)' _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ @@ -49,6 +49,9 @@ class RoosterTeethIE(InfoExtractor): # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, }] def _login(self): From eb9c9c74a6a2f9e13d0efaef304416b30354e5a3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 3 Aug 2019 10:29:20 +0100 Subject: [PATCH 491/785] [vimeo] fix album extraction closes #1933 closes #15704 closes #15855 closes #18967 closes #21986 --- youtube_dl/extractor/vimeo.py | 58 +++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b5b44a79a..ddf375c6c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -2,12 +2,14 @@ from __future__ import unicode_literals import base64 +import functools import json import re import itertools from .common import InfoExtractor from ..compat import ( + compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -19,6 +21,7 @@ from ..utils import ( int_or_none, merge_dicts, NO_DEFAULT, + OnDemandPagedList, parse_filesize, qualities, RegexNotFoundError, @@ -98,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): webpage, 'vuid', group='vuid') return xsrft, vuid + def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', + webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + if vimeo_config: + return self._parse_json(vimeo_config, video_id) + def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) @@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?Ppro)?\.com/ - (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) - vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, - 'vimeo config', default=None) + vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + seed_status = vimeo_config.get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'
    ', webpage, 'description', default=None) or self._og_search_description(webpage) From 2906631e1230617883cdef8e227b369a9c98c9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Oct 2019 23:16:46 +0700 Subject: [PATCH 556/785] [viewlift] Fix URL matching --- youtube_dl/extractor/viewlift.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py index 391419d9e..851ad936c 100644 --- a/youtube_dl/extractor/viewlift.py +++ b/youtube_dl/extractor/viewlift.py @@ -179,6 +179,10 @@ class ViewLiftIE(ViewLiftBaseIE): 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url) + def _real_extract(self, url): domain, display_id = re.match(self._VALID_URL, url).groups() From 74bc299453884bc4e802ca225815d3134b9510cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Oct 2019 02:03:22 +0700 Subject: [PATCH 557/785] [teachable] Skip login when already logged in (closes #22572) --- youtube_dl/extractor/teachable.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index c1a9deafe..7d2e34b3b 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -48,6 +48,16 @@ class TeachableBaseIE(InfoExtractor): 'https://%s/sign_in' % site, None, 'Downloading %s login page' % site) + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']user-signout', + r']+\bhref=["\']/sign_out', + r'Log\s+[Oo]ut\s*<')) + + if is_logged(login_page): + self._logged_in = True + return + login_url = compat_str(urlh.geturl()) login_form = self._hidden_inputs(login_page) @@ -78,10 +88,7 @@ class TeachableBaseIE(InfoExtractor): 'Go to https://%s/ and accept.' % (site, site), expected=True) # Successful login - if any(re.search(p, response) for p in ( - r'class=["\']user-signout', - r']+\bhref=["\']/sign_out', - r'>\s*Log out\s*<')): + if is_logged(response): self._logged_in = True return From 25e911a968f6675a2c06f0d60a09a86972aadc40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Oct 2019 00:53:07 +0700 Subject: [PATCH 558/785] [extractor/common] Make _is_valid_url more relaxed --- youtube_dl/extractor/common.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 859786617..50d48c40d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1424,12 +1424,10 @@ class InfoExtractor(object): try: self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) return True - except ExtractorError as e: - if isinstance(e.cause, compat_urllib_error.URLError): - self.to_screen( - '%s: %s URL is invalid, skipping' % (video_id, item)) - return False - raise + except ExtractorError: + self.to_screen( + '%s: %s URL is invalid, skipping' % (video_id, item)) + return False def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ From aaf9d904aa77bfe60714393c0ab413c32cca8a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Oct 2019 00:55:46 +0700 Subject: [PATCH 559/785] [orf:tvthek] Make manifest requests non fatal (refs #22578) --- youtube_dl/extractor/orf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 499be0029..3425f7602 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -86,12 +86,13 @@ class ORFTVthekIE(InfoExtractor): if value: format_id_list.append(value) format_id = '-'.join(format_id_list) - if determine_ext(fd['src']) == 'm3u8': + ext = determine_ext(src) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - fd['src'], video_id, 'mp4', m3u8_id=format_id)) - elif determine_ext(fd['src']) == 'f4m': + src, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': formats.extend(self._extract_f4m_formats( - fd['src'], video_id, f4m_id=format_id)) + src, video_id, f4m_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, From 894b3826f5a2e1742010c20554a6a1b9e98a51ee Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Fri, 4 Oct 2019 18:52:15 +0700 Subject: [PATCH 560/785] [youtube] Add support for yt.lelux.fi (#22597) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a3364a14e..6bd56f340 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -391,6 +391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| (?:www\.)?yt\.elukerio\.org/| + (?:www\.)?yt\.lelux\.fi/| (?:www\.)?kgg2m7yk5aybusll\.onion/| (?:www\.)?qklhadlycap4cnod\.onion/| (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| From ca20b1304818a8d2d8eadfbe6f5387284e7ebc4d Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Fri, 4 Oct 2019 13:57:18 +0200 Subject: [PATCH 561/785] [nrktv:seriebase] Fix extraction (#22596) --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 5f43e692f..60933f069 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -406,7 +406,7 @@ class NRKTVSerieBaseIE(InfoExtractor): def _extract_series(self, webpage, display_id, fatal=True): config = self._parse_json( self._search_regex( - (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;', + (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', r'({.+?})\s*,\s*"[^"]+"\s*\)\s*'), webpage, 'config', default='{}' if not fatal else NO_DEFAULT), display_id, fatal=False) From 9679a62a283f1384c7572ec78f7996e1276d5d7a Mon Sep 17 00:00:00 2001 From: kr4ssi <44404263+kr4ssi@users.noreply.github.com> Date: Fri, 4 Oct 2019 13:57:51 +0200 Subject: [PATCH 562/785] [openload] Add support for oload.monster (#22592) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 1fe581780..66e38cdb4 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -246,7 +246,7 @@ class OpenloadIE(InfoExtractor): _DOMAINS = r''' (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|press|pw|life|live|space|services|website|vip)| + oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)| oladblock\.(?:services|xyz|me)|openloed\.co ) ''' @@ -365,6 +365,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.online/f/W8o2UfN1vNY/', 'only_matching': True, + }, { + 'url': 'https://oload.monster/f/W8o2UfN1vNY/', + 'only_matching': True, }, { 'url': 'https://oload.press/embed/drTBl1aOTvk/', 'only_matching': True, From 76e510b92c4a1c4b0001f892504ba2cbb4b8d486 Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Fri, 4 Oct 2019 19:01:03 +0700 Subject: [PATCH 563/785] [youtube] Remove support for invidious.enkirton.net (#22543) --- youtube_dl/extractor/youtube.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6bd56f340..5e397324b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -383,7 +383,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:www|no)\.)?invidiou\.sh/| (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| - (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| (?:www\.)?invidious\.nixnet\.xyz/| From 4e72d02f39f0d8e9ae9bbe8233c157bef3b58bdf Mon Sep 17 00:00:00 2001 From: Stephan Date: Fri, 4 Oct 2019 14:05:35 +0200 Subject: [PATCH 564/785] [xvideos] Extend _VALID_URL (#22471) --- youtube_dl/extractor/xvideos.py | 39 ++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 166bcf443..8fc64914c 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -17,7 +17,8 @@ class XVideosIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?xvideos\.com/video| + (?:[^/]+\.)?xvideos2?\.com/video| + (?:www\.)?xvideos\.es/video| flashservice\.xvideos\.com/embedframe/| static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= ) @@ -39,6 +40,42 @@ class XVideosIE(InfoExtractor): }, { 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'only_matching': True, + }, { + 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True }] def _real_extract(self, url): From c2915de82e0ee793330d553899347ec54a4b834e Mon Sep 17 00:00:00 2001 From: Patrice Levesque Date: Fri, 4 Oct 2019 08:14:31 -0400 Subject: [PATCH 565/785] [telequebec] Add support for coucou.telequebec.tv (#22482) --- youtube_dl/extractor/telequebec.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 6965c127b..911385d01 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -22,7 +22,13 @@ class TeleQuebecBaseIE(InfoExtractor): class TeleQuebecIE(TeleQuebecBaseIE): - _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + zonevideo\.telequebec\.tv/media| + coucou\.telequebec\.tv/videos + )/(?P\d+) + ''' _TESTS = [{ # available till 01.01.2023 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', @@ -41,6 +47,9 @@ class TeleQuebecIE(TeleQuebecBaseIE): # no description 'url': 'http://zonevideo.telequebec.tv/media/30261', 'only_matching': True, + }, { + 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain', + 'only_matching': True, }] def _real_extract(self, url): From b64045cd2a564bb44ef917803678ca362f412eb4 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 4 Oct 2019 13:17:16 +0100 Subject: [PATCH 566/785] [peertube] Update instances (#22414) --- youtube_dl/extractor/peertube.py | 397 +++++++++++++++++++++++++++---- 1 file changed, 347 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index b50543e32..d3a83ea2b 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -18,81 +18,385 @@ from ..utils import ( class PeerTubeIE(InfoExtractor): _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances + peertube\.rainbowswingers\.net| + tube\.stanisic\.nl| + peer\.suiri\.us| + medias\.libox\.fr| + videomensoif\.ynh\.fr| + peertube\.travelpandas\.eu| + peertube\.rachetjay\.fr| + peertube\.montecsys\.fr| + tube\.eskuero\.me| + peer\.tube| + peertube\.umeahackerspace\.se| + tube\.nx-pod\.de| + video\.monsieurbidouille\.fr| tube\.openalgeria\.org| - peertube\.pointsecu\.fr| + vid\.lelux\.fi| + video\.anormallostpod\.ovh| + tube\.crapaud-fou\.org| + peertube\.stemy\.me| + lostpod\.space| + exode\.me| + peertube\.snargol\.com| + vis\.ion\.ovh| + videosdulib\.re| + v\.mbius\.io| + videos\.judrey\.eu| + peertube\.osureplayviewer\.xyz| + peertube\.mathieufamily\.ovh| + www\.videos-libr\.es| + fightforinfo\.com| + peertube\.fediverse\.ru| + peertube\.oiseauroch\.fr| + video\.nesven\.eu| + v\.bearvideo\.win| + video\.qoto\.org| + justporn\.cc| + video\.vny\.fr| + peervideo\.club| + tube\.taker\.fr| + peertube\.chantierlibre\.org| + tube\.ipfixe\.info| + tube\.kicou\.info| + tube\.dodsorf\.as| + videobit\.cc| + video\.yukari\.moe| + videos\.elbinario\.net| + hkvideo\.live| + pt\.tux\.tf| + www\.hkvideo\.live| + FIGHTFORINFO\.com| + pt\.765racing\.com| + peertube\.gnumeria\.eu\.org| + nordenmedia\.com| + peertube\.co\.uk| + tube\.darfweb\.eu| + tube\.kalah-france\.org| + 0ch\.in| + vod\.mochi\.academy| + film\.node9\.org| + peertube\.hatthieves\.es| + video\.fitchfamily\.org| + peertube\.ddns\.net| + video\.ifuncle\.kr| + video\.fdlibre\.eu| + tube\.22decembre\.eu| + peertube\.harmoniescreatives\.com| + tube\.fabrigli\.fr| + video\.thedwyers\.co| + video\.bruitbruit\.com| + peertube\.foxfam\.club| + peer\.philoxweb\.be| + videos\.bugs\.social| + peertube\.malbert\.xyz| + peertube\.bilange\.ca| + libretube\.net| + diytelevision\.com| + peertube\.fedilab\.app| + libre\.video| + video\.mstddntfdn\.online| + us\.tv| + peertube\.sl-network\.fr| + peertube\.dynlinux\.io| + peertube\.david\.durieux\.family| + peertube\.linuxrocks\.online| + peerwatch\.xyz| + v\.kretschmann\.social| + tube\.otter\.sh| + yt\.is\.nota\.live| + tube\.dragonpsi\.xyz| + peertube\.boneheadmedia\.com| + videos\.funkwhale\.audio| + watch\.44con\.com| + peertube\.gcaillaut\.fr| + peertube\.icu| + pony\.tube| + spacepub\.space| + tube\.stbr\.io| + v\.mom-gay\.faith| + tube\.port0\.xyz| + peertube\.simounet\.net| + play\.jergefelt\.se| + peertube\.zeteo\.me| + tube\.danq\.me| + peertube\.kerenon\.com| + tube\.fab-l3\.org| + tube\.calculate\.social| + peertube\.mckillop\.org| + tube\.netzspielplatz\.de| + vod\.ksite\.de| + peertube\.laas\.fr| + tube\.govital\.net| + peertube\.stephenson\.cc| + bistule\.nohost\.me| + peertube\.kajalinifi\.de| + video\.ploud\.jp| + video\.omniatv\.com| + peertube\.ffs2play\.fr| + peertube\.leboulaire\.ovh| + peertube\.tronic-studio\.com| + peertube\.public\.cat| + peertube\.metalbanana\.net| + video\.1000i100\.fr| + peertube\.alter-nativ-voll\.de| + tube\.pasa\.tf| + tube\.worldofhauru\.xyz| + pt\.kamp\.site| + peertube\.teleassist\.fr| + videos\.mleduc\.xyz| + conf\.tube| + media\.privacyinternational\.org| + pt\.forty-two\.nl| + video\.halle-leaks\.de| + video\.grosskopfgames\.de| + peertube\.schaeferit\.de| + peertube\.jackbot\.fr| + tube\.extinctionrebellion\.fr| + peertube\.f-si\.org| + video\.subak\.ovh| + videos\.koweb\.fr| + peertube\.zergy\.net| + peertube\.roflcopter\.fr| + peertube\.floss-marketing-school\.com| + vloggers\.social| + peertube\.iriseden\.eu| + videos\.ubuntu-paris\.org| + peertube\.mastodon\.host| + armstube\.com| + peertube\.s2s\.video| + peertube\.lol| + tube\.open-plug\.eu| + open\.tube| + peertube\.ch| + peertube\.normandie-libre\.fr| + peertube\.slat\.org| + video\.lacaveatonton\.ovh| + peertube\.uno| + peertube\.servebeer\.com| + peertube\.fedi\.quebec| + tube\.h3z\.jp| + tube\.plus200\.com| + peertube\.eric\.ovh| + tube\.metadocs\.cc| + tube\.unmondemeilleur\.eu| + gouttedeau\.space| + video\.antirep\.net| + nrop\.cant\.at| + tube\.ksl-bmx\.de| + tube\.plaf\.fr| + tube\.tchncs\.de| + video\.devinberg\.com| + hitchtube\.fr| + peertube\.kosebamse\.com| + yunopeertube\.myddns\.me| + peertube\.varney\.fr| + peertube\.anon-kenkai\.com| + tube\.maiti\.info| + tubee\.fr| + videos\.dinofly\.com| + toobnix\.org| + videotape\.me| + voca\.tube| + video\.heromuster\.com| + video\.lemediatv\.fr| + video\.up\.edu\.ph| + balafon\.video| + video\.ivel\.fr| + thickrips\.cloud| + pt\.laurentkruger\.fr| + video\.monarch-pass\.net| + peertube\.artica\.center| + video\.alternanet\.fr| + indymotion\.fr| + fanvid\.stopthatimp\.net| + video\.farci\.org| + v\.lesterpig\.com| + video\.okaris\.de| + tube\.pawelko\.net| + peertube\.mablr\.org| + tube\.fede\.re| + pytu\.be| + evertron\.tv| + devtube\.dev-wiki\.de| + raptube\.antipub\.org| + video\.selea\.se| + peertube\.mygaia\.org| + video\.oh14\.de| + peertube\.livingutopia\.org| + peertube\.the-penguin\.de| + tube\.thechangebook\.org| + tube\.anjara\.eu| + pt\.pube\.tk| + video\.samedi\.pm| + mplayer\.demouliere\.eu| + widemus\.de| + peertube\.me| + peertube\.zapashcanon\.fr| + video\.latavernedejohnjohn\.fr| + peertube\.pcservice46\.fr| + peertube\.mazzonetto\.eu| + video\.irem\.univ-paris-diderot\.fr| + video\.livecchi\.cloud| + alttube\.fr| + video\.coop\.tools| + video\.cabane-libre\.org| + peertube\.openstreetmap\.fr| + videos\.alolise\.org| + irrsinn\.video| + video\.antopie\.org| + scitech\.video| + tube2\.nemsia\.org| + video\.amic37\.fr| + peertube\.freeforge\.eu| + video\.arbitrarion\.com| + video\.datsemultimedia\.com| + stoptrackingus\.tv| + peertube\.ricostrongxxx\.com| + docker\.videos\.lecygnenoir\.info| + peertube\.togart\.de| + tube\.postblue\.info| + videos\.domainepublic\.net| + peertube\.cyber-tribal\.com| + video\.gresille\.org| + peertube\.dsmouse\.net| + cinema\.yunohost\.support| + tube\.theocevaer\.fr| + repro\.video| + tube\.4aem\.com| + quaziinc\.com| + peertube\.metawurst\.space| + videos\.wakapo\.com| + video\.ploud\.fr| + video\.freeradical\.zone| + tube\.valinor\.fr| + refuznik\.video| + pt\.kircheneuenburg\.de| + peertube\.asrun\.eu| + peertube\.lagob\.fr| + videos\.side-ways\.net| + 91video\.online| + video\.valme\.io| + video\.taboulisme\.com| + videos-libr\.es| + tv\.mooh\.fr| + nuage\.acostey\.fr| + video\.monsieur-a\.fr| + peertube\.librelois\.fr| + videos\.pair2jeux\.tube| + videos\.pueseso\.club| + peer\.mathdacloud\.ovh| + media\.assassinate-you\.net| + vidcommons\.org| + ptube\.rousset\.nom\.fr| + tube\.cyano\.at| + videos\.squat\.net| + video\.iphodase\.fr| + peertube\.makotoworkshop\.org| + peertube\.serveur\.slv-valbonne\.fr| + vault\.mle\.party| + hostyour\.tv| + videos\.hack2g2\.fr| + libre\.tube| + pire\.artisanlogiciel\.net| + videos\.numerique-en-commun\.fr| + video\.netsyms\.com| + video\.die-partei\.social| + video\.writeas\.org| + peertube\.swarm\.solvingmaz\.es| + tube\.pericoloso\.ovh| + watching\.cypherpunk\.observer| + videos\.adhocmusic\.com| + tube\.rfc1149\.net| + peertube\.librelabucm\.org| + videos\.numericoop\.fr| + peertube\.koehn\.com| + peertube\.anarchmusicall\.net| + tube\.kampftoast\.de| + vid\.y-y\.li| + peertube\.xtenz\.xyz| + diode\.zone| + tube\.egf\.mn| + peertube\.nomagic\.uk| + visionon\.tv| + videos\.koumoul\.com| + video\.rastapuls\.com| + video\.mantlepro\.com| + video\.deadsuperhero\.com| + peertube\.musicstudio\.pro| + peertube\.we-keys\.fr| + artitube\.artifaille\.fr| + peertube\.ethernia\.net| + tube\.midov\.pl| + peertube\.fr| + watch\.snoot\.tube| + peertube\.donnadieu\.fr| + argos\.aquilenet\.fr| + tube\.nemsia\.org| + tube\.bruniau\.net| + videos\.darckoune\.moe| + tube\.traydent\.info| + dev\.videos\.lecygnenoir\.info| + peertube\.nayya\.org| + peertube\.live| + peertube\.mofgao\.space| + video\.lequerrec\.eu| + peertube\.amicale\.net| + aperi\.tube| + tube\.ac-lyon\.fr| + video\.lw1\.at| + www\.yiny\.org| + videos\.pofilo\.fr| + tube\.lou\.lt| + choob\.h\.etbus\.ch| + tube\.hoga\.fr| + peertube\.heberge\.fr| + video\.obermui\.de| + videos\.cloudfrancois\.fr| + betamax\.video| + video\.typica\.us| + tube\.piweb\.be| + video\.blender\.org| + peertube\.cat| + tube\.kdy\.ch| + pe\.ertu\.be| + peertube\.social| + videos\.lescommuns\.org| + tv\.datamol\.org| + videonaute\.fr| + dialup\.express| peertube\.nogafa\.org| - peertube\.pl| megatube\.lilomoino\.fr| peertube\.tamanoir\.foucry\.net| - peertube\.inapurna\.org| - peertube\.netzspielplatz\.de| - video\.deadsuperhero\.com| peertube\.devosi\.org| peertube\.1312\.media| - tube\.worldofhauru\.xyz| tube\.bootlicker\.party| skeptikon\.fr| - peertube\.geekshell\.fr| - tube\.opportunis\.me| - peertube\.peshane\.net| video\.blueline\.mg| tube\.homecomputing\.fr| - videos\.cloudfrancois\.fr| - peertube\.viviers-fibre\.net| tube\.ouahpiti\.info| video\.tedomum\.net| video\.g3l\.org| fontube\.fr| peertube\.gaialabs\.ch| - peertube\.extremely\.online| - peertube\.public-infrastructure\.eu| tube\.kher\.nl| peertube\.qtg\.fr| - tube\.22decembre\.eu| - facegirl\.me| video\.migennes\.net| - janny\.moe| tube\.p2p\.legal| - video\.atlanti\.se| troll\.tv| - peertube\.geekael\.fr| - vid\.leotindall\.com| - video\.anormallostpod\.ovh| - p-tube\.h3z\.jp| - tube\.darfweb\.eu| videos\.iut-orsay\.fr| peertube\.solidev\.net| - videos\.symphonie-of-code\.fr| - testtube\.ortg\.de| videos\.cemea\.org| - peertube\.gwendalavir\.eu| video\.passageenseine\.fr| videos\.festivalparminous\.org| peertube\.touhoppai\.moe| - peertube\.duckdns\.org| sikke\.fi| - peertube\.mastodon\.host| - firedragonvideos\.com| - vidz\.dou\.bet| - peertube\.koehn\.com| peer\.hostux\.social| share\.tube| peertube\.walkingmountains\.fr| - medias\.libox\.fr| - peertube\.moe| - peertube\.xyz| - jp\.peertube\.network| videos\.benpro\.fr| - tube\.otter\.sh| - peertube\.angristan\.xyz| peertube\.parleur\.net| - peer\.ecutsa\.fr| peertube\.heraut\.eu| - peertube\.tifox\.fr| - peertube\.maly\.io| - vod\.mochi\.academy| - exode\.me| - coste\.video| tube\.aquilenet\.fr| peertube\.gegeweb\.eu| framatube\.org| @@ -100,18 +404,11 @@ class PeerTubeIE(InfoExtractor): tube\.conferences-gesticulees\.net| peertube\.datagueule\.tv| video\.lqdn\.fr| - meilleurtube\.delire\.party| tube\.mochi\.academy| - peertube\.dav\.li| media\.zat\.im| - pytu\.be| - peertube\.valvin\.fr| - peertube\.nsa\.ovh| video\.colibris-outilslibres\.org| - video\.hispagatos\.org| tube\.svnet\.fr| peertube\.video| - videos\.lecygnenoir\.info| peertube3\.cpy\.re| peertube2\.cpy\.re| videos\.tcit\.fr| @@ -126,7 +423,7 @@ class PeerTubeIE(InfoExtractor): (?P%s) ''' % (_INSTANCES_RE, _UUID_RE) _TESTS = [{ - 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', + 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', 'md5': '80f24ff364cc9d333529506a263e7feb', 'info_dict': { 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', From fd4db1ebc231b65bea91add4cd55ce564b05eee3 Mon Sep 17 00:00:00 2001 From: axelerometer <54915681+axelerometer@users.noreply.github.com> Date: Fri, 4 Oct 2019 15:22:01 +0300 Subject: [PATCH 567/785] [chaturbate] Extend _VALID_URL (#22309) --- youtube_dl/extractor/chaturbate.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index e2b828d8a..656e715ae 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class ChaturbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.chaturbate.com/siswet19/', 'info_dict': { @@ -21,6 +21,9 @@ class ChaturbateIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Room is offline', + }, { + 'url': 'https://chaturbate.com/fullvideo/?b=caylin', + 'only_matching': True, }, { 'url': 'https://en.chaturbate.com/siswet19/', 'only_matching': True, @@ -32,7 +35,8 @@ class ChaturbateIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=self.geo_verification_headers()) + 'https://chaturbate.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) m3u8_urls = [] From 0b87beefe60fb6ae52529603fd5826364146dfb7 Mon Sep 17 00:00:00 2001 From: Anh Nhan Nguyen Date: Fri, 4 Oct 2019 14:27:58 +0200 Subject: [PATCH 568/785] [gfycat] Extend _VALID_URL (#22225) --- youtube_dl/extractor/gfycat.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index bbe3cb283..18a30fe67 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#\.]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -53,6 +53,12 @@ class GfycatIE(InfoExtractor): }, { 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball', 'only_matching': True + }, { + 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif', + 'only_matching': True + }, { + 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4', + 'only_matching': True }] def _real_extract(self, url): From 3a37f2c3be16bb75a12d0617b5bc80ee6cab0f61 Mon Sep 17 00:00:00 2001 From: bitraid Date: Fri, 4 Oct 2019 15:48:20 +0300 Subject: [PATCH 569/785] [wimp] Remove extractor (closes #22088) (#22091) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/wimp.py | 54 ------------------------------ 2 files changed, 55 deletions(-) delete mode 100644 youtube_dl/extractor/wimp.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 44120cae2..a2d6e5314 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1413,7 +1413,6 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE -from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wsj import ( diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py deleted file mode 100644 index ea234e3c5..000000000 --- a/youtube_dl/extractor/wimp.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .youtube import YoutubeIE - - -class WimpIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P[^/]+)' - _TESTS = [{ - 'url': 'http://www.wimp.com/maru-is-exhausted/', - 'md5': 'ee21217ffd66d058e8b16be340b74883', - 'info_dict': { - 'id': 'maru-is-exhausted', - 'ext': 'mp4', - 'title': 'Maru is exhausted.', - 'description': 'md5:57e099e857c0a4ea312542b684a869b8', - } - }, { - 'url': 'http://www.wimp.com/clowncar/', - 'md5': '5c31ad862a90dc5b1f023956faec13fe', - 'info_dict': { - 'id': 'cG4CEr2aiSg', - 'ext': 'webm', - 'title': 'Basset hound clown car...incredible!', - 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom', - 'upload_date': '20140303', - 'uploader': 'Gretchen Hoey', - 'uploader_id': 'gretchenandjeff1', - }, - 'add_ie': ['Youtube'], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - youtube_id = self._search_regex( - (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']", - r'data-id=["\']([0-9A-Za-z_-]{11})'), - webpage, 'video URL', default=None) - if youtube_id: - return self.url_result(youtube_id, YoutubeIE.ie_key()) - - info_dict = self._extract_jwplayer_data( - webpage, video_id, require_title=False) - - info_dict.update({ - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - }) - - return info_dict From 05446d483d089d0bc7fa3037900dadc856d3e687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Oct 2019 20:14:45 +0700 Subject: [PATCH 570/785] [telequebec:squat] Add support for squat.telequebec.tv (closes #18503) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/telequebec.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a2d6e5314..8d3e433c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1129,6 +1129,7 @@ from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .telequebec import ( TeleQuebecIE, + TeleQuebecSquatIE, TeleQuebecEmissionIE, TeleQuebecLiveIE, ) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 911385d01..ae9f66787 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, smuggle_url, try_get, + unified_timestamp, ) @@ -70,6 +71,52 @@ class TeleQuebecIE(TeleQuebecBaseIE): return info +class TeleQuebecSquatIE(InfoExtractor): + _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P\d+)' + _TESTS = [{ + 'url': 'https://squat.telequebec.tv/videos/9314', + 'info_dict': { + 'id': 'd59ae78112d542e793d83cc9d3a5b530', + 'ext': 'mp4', + 'title': 'Poupeflekta', + 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75', + 'duration': 1351, + 'timestamp': 1569057600, + 'upload_date': '20190921', + 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir', + 'season': 'Saison 3', + 'season_number': 3, + 'episode_number': 57, + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id, + video_id) + + media_id = video['sourceId'] + + return { + '_type': 'url_transparent', + 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id, + 'ie_key': TeleQuebecIE.ie_key(), + 'id': media_id, + 'title': video.get('titre'), + 'description': video.get('description'), + 'timestamp': unified_timestamp(video.get('datePublication')), + 'series': video.get('container'), + 'season': video.get('saison'), + 'season_number': int_or_none(video.get('noSaison')), + 'episode_number': int_or_none(video.get('episode')), + } + + class TeleQuebecEmissionIE(TeleQuebecBaseIE): _VALID_URL = r'''(?x) https?:// From 4bf568d36cf516b38e4634e07bd8b4c3d33324f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Oct 2019 21:43:31 +0700 Subject: [PATCH 571/785] [pornhub:uservideos:upload] Fix extraction (closes #22619) --- youtube_dl/extractor/pornhub.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 11b8cfcf7..ba0ad7da2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -403,6 +403,15 @@ class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') @@ -411,13 +420,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) - page_url = self._make_page_url(url) - entries = [] for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, item_id, 'Downloading page %d' % page_num, + url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -547,18 +554,6 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubPagedVideoListIE, cls).suitable(url)) - def _make_page_url(self, url): - return url - - @staticmethod - def _has_more(webpage): - return re.search( - r'''(?x) - ]+\bclass=["\']page_next| - ]+\brel=["\']next| - ]+\bid=["\']moreDataBtn - ''', webpage) is not None - class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' @@ -572,11 +567,3 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'only_matching': True, }] - - def _make_page_url(self, url): - mobj = re.match(self._VALID_URL, url) - return '%s/ajax' % mobj.group('url') - - @staticmethod - def _has_more(webpage): - return True From 560d3b7d7c86a0bfff36d59cb977fd3c01b10ad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Oct 2019 22:04:49 +0700 Subject: [PATCH 572/785] [redtube] Improve metadata extraction (closes #22492, closes #22615) --- youtube_dl/extractor/redtube.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 10311a81a..5c84028ef 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + merge_dicts, str_to_int, unified_strdate, url_or_none, @@ -45,11 +46,14 @@ class RedTubeIE(InfoExtractor): if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): raise ExtractorError('Video %s has been removed' % video_id, expected=True) - title = self._html_search_regex( - (r']+class="(?:video_title_text|videoTitle)[^"]*">(?P(?:(?!\1).)+)</h\1>', - r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), - webpage, 'title', group='title', - default=None) or self._og_search_title(webpage) + info = self._search_json_ld(webpage, video_id, default={}) + + if not info.get('title'): + info['title'] = self._html_search_regex( + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) formats = [] sources = self._parse_json( @@ -88,28 +92,28 @@ class RedTubeIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( - r'<span[^>]+>ADDED ([^<]+)<', - webpage, 'upload date', fatal=False)) + r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', + webpage, 'upload date', default=None)) duration = int_or_none(self._og_search_property( 'video:duration', webpage, default=None) or self._search_regex( r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', - r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'), - webpage, 'view count', fatal=False)) + r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', + r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), + webpage, 'view count', default=None)) # No self-labeling, but they describe themselves as # "Home of Videos Porno" age_limit = 18 - return { + return merge_dicts(info, { 'id': video_id, 'ext': 'mp4', - 'title': title, 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'age_limit': age_limit, 'formats': formats, - } + }) From d4bb825b83a87813f54d007febd79d2f3dcee7b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Oct 2019 11:07:46 +0100 Subject: [PATCH 573/785] [globo] fix format extraction(closes #20319) --- youtube_dl/extractor/globo.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index fb8f7679b..b9c400a57 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -96,6 +96,8 @@ class GloboIE(InfoExtractor): video = self._download_json( 'http://api.globovideos.com/videos/%s/playlist' % video_id, video_id)['videos'][0] + if video.get('encrypted') is True: + raise ExtractorError('This video is DRM protected.', expected=True) title = video['title'] @@ -109,8 +111,8 @@ class GloboIE(InfoExtractor): security = self._download_json( 'http://security.video.globo.com/videos/%s/hash' % video_id, video_id, 'Downloading security hash for %s' % resource_id, query={ - 'player': 'flash', - 'version': '17.0.0.132', + 'player': 'desktop', + 'version': '5.19.1', 'resource_id': resource_id, }) @@ -122,19 +124,18 @@ class GloboIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, message), expected=True) continue - hash_code = security_hash[:2] - received_time = security_hash[2:12] - received_random = security_hash[12:22] - received_md5 = security_hash[22:] + assert security_hash[:2] in ('04', '14') + received_time = security_hash[3:13] + received_md5 = security_hash[24:] sign_time = compat_str(int(received_time) + 86400) padding = '%010d' % random.randint(1, 10000000000) - md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode() + md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5 + signed_hash = security_hash[:23] + sign_time + padding + signed_md5 - signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') + signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', From 1907f06e7b0689840b75810e5ad2683581f83924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Oct 2019 00:11:41 +0700 Subject: [PATCH 574/785] [kaltura] Fix embed info strip (refs #22658) --- youtube_dl/extractor/kaltura.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 0a733424c..1c486c038 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -151,7 +151,8 @@ class KalturaIE(InfoExtractor): if mobj: embed_info = mobj.groupdict() for k, v in embed_info.items(): - embed_info[k] = v.strip() + if v: + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( From 07b50f616e407c8b7b2c183298acbb58e2ddf09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Oct 2019 00:24:03 +0700 Subject: [PATCH 575/785] [kaltura] Fix service URL extraction (closes #22658) --- youtube_dl/extractor/kaltura.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 1c486c038..2d38b758b 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -155,11 +155,11 @@ class KalturaIE(InfoExtractor): embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) - service_url = re.search( - r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), + service_mobj = re.search( + r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), webpage) - if service_url: - url = smuggle_url(url, {'service_url': service_url.group(1)}) + if service_mobj: + url = smuggle_url(url, {'service_url': service_mobj.group('id')}) return url def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): From 2765c47a8c4e7154fa0a9be0bb63f3bcba592b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Oct 2019 03:40:01 +0700 Subject: [PATCH 576/785] [promptfile] Remove extractor (closes #6239) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/promptfile.py | 70 ------------------------------ 2 files changed, 71 deletions(-) delete mode 100644 youtube_dl/extractor/promptfile.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d3e433c3..f393683da 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -893,7 +893,6 @@ from .puhutv import ( PuhuTVSerieIE, ) from .presstv import PressTVIE -from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py deleted file mode 100644 index 23ac93d7e..000000000 --- a/youtube_dl/extractor/promptfile.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - urlencode_postdata, -) - - -class PromptFileIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)' - _TEST = { - 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', - 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', - 'info_dict': { - 'id': '86D1CE8462-576CAAE416', - 'ext': 'mp4', - 'title': 'oceans.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - chash = self._search_regex( - r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') - fields = self._hidden_inputs(webpage) - keys = list(fields.keys()) - chash_key = keys[0] if len(keys) == 1 else next( - key for key in keys if key.startswith('cha')) - fields[chash_key] = chash + fields[chash_key] - - webpage = self._download_webpage( - url, video_id, 'Downloading video page', - data=urlencode_postdata(fields), - headers={'Content-type': 'application/x-www-form-urlencoded'}) - - video_url = self._search_regex( - (r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File', - r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), - webpage, 'video url', group='url') - title = self._html_search_regex( - r'<span.+title="([^"]+)">', webpage, 'title') - thumbnail = self._html_search_regex( - r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"', - webpage, 'thumbnail', fatal=False, flags=re.DOTALL) - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - 'ext': determine_ext(title), - }] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } From c317b6163b294f4cdc2d1dff96e1a63da1bae910 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Oct 2019 00:01:37 +0100 Subject: [PATCH 577/785] [vessel] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/generic.py | 6 -- youtube_dl/extractor/vessel.py | 157 ----------------------------- 3 files changed, 164 deletions(-) delete mode 100644 youtube_dl/extractor/vessel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f393683da..7a1e0dad6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1282,7 +1282,6 @@ from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE -from .vessel import VesselIE from .vesti import VestiIE from .vevo import ( VevoIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d1725d98b..ec43c5ae4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -77,7 +77,6 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE -from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE @@ -2491,11 +2490,6 @@ class GenericIE(InfoExtractor): if tp_urls: return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') - # Look for Vessel embeds - vessel_urls = VesselIE._extract_urls(webpage) - if vessel_urls: - return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()) - # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py deleted file mode 100644 index 31eee0ba7..000000000 --- a/youtube_dl/extractor/vessel.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_iso8601, - sanitized_Request, -) - - -class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)' - _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' - _LOGIN_URL = 'https://www.vessel.com/api/account/login' - _NETRC_MACHINE = 'vessel' - _TESTS = [{ - 'url': 'https://www.vessel.com/videos/HDN7G5UMs', - 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', - 'info_dict': { - 'id': 'HDN7G5UMs', - 'ext': 'mp4', - 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20150317', - 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', - 'timestamp': int, - }, - }, { - 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', - 'only_matching': True, - }, { - 'url': 'https://www.vessel.com/videos/F01_dsLj1', - 'only_matching': True, - }, { - 'url': 'https://www.vessel.com/videos/RRX-sir-J', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1', - webpage)] - - @staticmethod - def make_json_request(url, data): - payload = json.dumps(data).encode('utf-8') - req = sanitized_Request(url, payload) - req.add_header('Content-Type', 'application/json; charset=utf-8') - return req - - @staticmethod - def find_assets(data, asset_type, asset_id=None): - for asset in data.get('assets', []): - if not asset.get('type') == asset_type: - continue - elif asset_id is not None and not asset.get('id') == asset_id: - continue - else: - yield asset - - def _check_access_rights(self, data): - access_info = data.get('__view', {}) - if not access_info.get('allow_access', True): - err_code = access_info.get('error_code') or '' - if err_code == 'ITEM_PAID_ONLY': - raise ExtractorError( - 'This video requires subscription.', expected=True) - else: - raise ExtractorError( - 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - self.report_login() - data = { - 'client_id': 'web', - 'type': 'password', - 'user_key': username, - 'password': password, - } - login_request = VesselIE.make_json_request(self._LOGIN_URL, data) - self._download_webpage(login_request, None, False, 'Wrong login info') - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) - asset_id = data['model']['data']['id'] - - req = VesselIE.make_json_request( - self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) - data = self._download_json(req, video_id) - video_asset_id = data.get('main_video_asset') - - self._check_access_rights(data) - - try: - video_asset = next( - VesselIE.find_assets(data, 'video', asset_id=video_asset_id)) - except StopIteration: - raise ExtractorError('No video assets found') - - formats = [] - for f in video_asset.get('sources', []): - location = f.get('location') - if not location: - continue - name = f.get('name') - if name == 'hls-index': - formats.extend(self._extract_m3u8_formats( - location, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) - elif name == 'dash-index': - formats.extend(self._extract_mpd_formats( - location, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': name, - 'tbr': f.get('bitrate'), - 'height': f.get('height'), - 'width': f.get('width'), - 'url': location, - }) - self._sort_formats(formats) - - thumbnails = [] - for im_asset in VesselIE.find_assets(data, 'image'): - thumbnails.append({ - 'url': im_asset['location'], - 'width': im_asset.get('width', 0), - 'height': im_asset.get('height', 0), - }) - - return { - 'id': video_id, - 'title': data['title'], - 'formats': formats, - 'thumbnails': thumbnails, - 'description': data.get('short_description'), - 'duration': data.get('duration'), - 'comment_count': data.get('comment_count'), - 'like_count': data.get('like_count'), - 'view_count': data.get('view_count'), - 'timestamp': parse_iso8601(data.get('released_at')), - } From 311ee457314359662c975cd29f2ee58ad068db49 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 14 Oct 2019 18:36:25 +0100 Subject: [PATCH 578/785] [nbc] switch to graphql api(closes #18581)(closes #22693)(closes #22701) --- youtube_dl/extractor/nbc.py | 39 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3282f84ee..10680b202 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -10,7 +10,6 @@ from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( smuggle_url, - try_get, update_url_query, int_or_none, ) @@ -85,27 +84,41 @@ class NBCIE(AdobePassIE): permalink, video_id = re.match(self._VALID_URL, url).groups() permalink = 'http' + compat_urllib_parse_unquote(permalink) response = self._download_json( - 'https://api.nbc.com/v3/videos', video_id, query={ - 'filter[permalink]': permalink, - 'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating', - 'fields[shows]': 'shortTitle', - 'include': 'show.shortTitle', + 'https://friendship.nbc.co/v2/graphql', video_id, query={ + 'query': '''{ + page(name: "%s", platform: web, type: VIDEO, userId: "0") { + data { + ... on VideoPageData { + description + episodeNumber + keywords + locked + mpxAccountId + mpxGuid + rating + seasonNumber + secondaryTitle + seriesShortTitle + } + } + } +}''' % permalink, }) - video_data = response['data'][0]['attributes'] + video_data = response['data']['page']['data'] query = { 'mbr': 'true', 'manifest': 'm3u', } - video_id = video_data['guid'] - title = video_data['title'] - if video_data.get('entitlement') == 'auth': + video_id = video_data['mpxGuid'] + title = video_data['secondaryTitle'] + if video_data.get('locked'): resource = self._get_mvpd_resource( 'nbcentertainment', title, video_id, - video_data.get('vChipRating')) + video_data.get('rating')) query['auth'] = self._extract_mvpd_auth( url, video_id, 'nbcentertainment', resource) theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, + 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id), query), {'force_smil_url': True}) return { '_type': 'url_transparent', @@ -117,7 +130,7 @@ class NBCIE(AdobePassIE): 'season_number': int_or_none(video_data.get('seasonNumber')), 'episode_number': int_or_none(video_data.get('episodeNumber')), 'episode': title, - 'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']), + 'series': video_data.get('seriesShortTitle'), 'ie_key': 'ThePlatform', } From a1ee23e98fe2ec80b8726829927fcae1267e76b1 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 14 Oct 2019 18:37:35 +0100 Subject: [PATCH 579/785] [vimeo] fix VHX embed extraction --- youtube_dl/extractor/vimeo.py | 97 ++++------------------------------- 1 file changed, 9 insertions(+), 88 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ddf375c6c..5dc38e243 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -23,7 +23,6 @@ from ..utils import ( NO_DEFAULT, OnDemandPagedList, parse_filesize, - qualities, RegexNotFoundError, sanitized_Request, smuggle_url, @@ -211,6 +210,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): video_uploader_url = owner.get('url') return { + 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, 'uploader': owner.get('name'), 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, @@ -730,7 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor): channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None info_dict = { - 'id': video_id, 'formats': formats, 'timestamp': unified_timestamp(timestamp), 'description': video_description, @@ -1061,7 +1060,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): if source_format: info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) - info_dict['id'] = video_id return info_dict @@ -1115,94 +1113,17 @@ class VimeoLikesIE(VimeoChannelIE): return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id) -class VHXEmbedIE(InfoExtractor): +class VHXEmbedIE(VimeoBaseInfoExtractor): IE_NAME = 'vhx:embed' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' - def _call_api(self, video_id, access_token, path='', query=None): - return self._download_json( - 'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={ - 'Authorization': 'Bearer ' + access_token, - }, query=query) - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - credentials = self._parse_json(self._search_regex( - r'(?s)credentials\s*:\s*({.+?}),', webpage, - 'config'), video_id, js_to_json) - access_token = credentials['access_token'] - - query = {} - for k, v in credentials.items(): - if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined': - if k == 'authUserToken': - query['auth_user_token'] = v - else: - query[k] = v - files = self._call_api(video_id, access_token, '/files', query) - - formats = [] - for f in files: - href = try_get(f, lambda x: x['_links']['source']['href']) - if not href: - continue - method = f.get('method') - if method == 'hls': - formats.extend(self._extract_m3u8_formats( - href, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif method == 'dash': - formats.extend(self._extract_mpd_formats( - href, video_id, mpd_id='dash', fatal=False)) - else: - fmt = { - 'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])), - 'format_id': 'http', - 'preference': 1, - 'url': href, - 'vcodec': f.get('codec'), - } - quality = f.get('quality') - if quality: - fmt.update({ - 'format_id': 'http-' + quality, - 'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)), - }) - formats.append(fmt) - self._sort_formats(formats) - - video_data = self._call_api(video_id, access_token) - title = video_data.get('title') or video_data['name'] - - subtitles = {} - for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []: - lang = subtitle.get('srclang') or subtitle.get('label') - for _link in subtitle.get('_links', {}).values(): - href = _link.get('href') - if not href: - continue - subtitles.setdefault(lang, []).append({ - 'url': href, - }) - - q = qualities(['small', 'medium', 'large', 'source']) - thumbnails = [] - for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items(): - thumbnails.append({ - 'id': thumbnail_id, - 'url': thumbnail_url, - 'preference': q(thumbnail_id), - }) - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description'), - 'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'timestamp': unified_timestamp(video_data.get('created_at')), - 'view_count': int_or_none(video_data.get('plays_count')), - } + config_url = self._parse_json(self._search_regex( + r'window\.OTTData\s*=\s*({.+})', webpage, + 'ott data'), video_id, js_to_json)['config_url'] + config = self._download_json(config_url, video_id) + info = self._parse_config(config, video_id) + self._vimeo_sort_formats(info['formats']) + return info From 7e05df71b7d8c0e1ea9beafff48275ef3c9e27d2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 00:10:22 +0100 Subject: [PATCH 580/785] [nexx] handle result list(closes #22666) --- youtube_dl/extractor/nexx.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 82d526c22..f9aad83c4 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -295,13 +295,23 @@ class NexxIE(InfoExtractor): video = None + def find_video(result): + if isinstance(result, dict): + return result + elif isinstance(result, list): + vid = int(video_id) + for v in result: + if try_get(v, lambda x: x['general']['ID'], int) == vid: + return v + return None + response = self._download_json( 'https://arc.nexx.cloud/api/video/%s.json' % video_id, video_id, fatal=False) if response and isinstance(response, dict): result = response.get('result') - if result and isinstance(result, dict): - video = result + if result: + video = find_video(result) # not all videos work via arc, e.g. nexx:741:1269984 if not video: @@ -348,7 +358,7 @@ class NexxIE(InfoExtractor): request_token = hashlib.md5( ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() - video = self._call_api( + result = self._call_api( domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', 'addInteractionOptions': '1', @@ -363,6 +373,7 @@ class NexxIE(InfoExtractor): 'X-Request-CID': cid, 'X-Request-Token': request_token, }) + video = find_video(result) general = video['general'] title = general['title'] From 2af01c0293db53dc80c552df3986d0e088b65b76 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 15:18:51 +0100 Subject: [PATCH 581/785] [bokecc] improve player params extraction(closes #22638) --- youtube_dl/extractor/bokecc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py index 86a7f4d7d..6017e8344 100644 --- a/youtube_dl/extractor/bokecc.py +++ b/youtube_dl/extractor/bokecc.py @@ -11,8 +11,8 @@ from ..utils import ExtractorError class BokeCCBaseIE(InfoExtractor): def _extract_bokecc_formats(self, webpage, video_id, format_id=None): player_params_str = self._html_search_regex( - r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)', - webpage, 'player params') + r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)', + webpage, 'player params', group='query') player_params = compat_parse_qs(player_params_str) @@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE): _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _TESTS = [{ - 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', + 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A', 'info_dict': { - 'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30', + 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461', 'ext': 'flv', 'title': 'BokeCC Video', }, From 30eb05cb41d95a73f7baff8da9ec1d6a50b08f50 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 19:54:53 +0100 Subject: [PATCH 582/785] [globo] extract subtitles(closes #22713) --- youtube_dl/extractor/globo.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index b9c400a57..9ad1d95fb 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -102,10 +102,18 @@ class GloboIE(InfoExtractor): title = video['title'] formats = [] + subtitles = {} for resource in video['resources']: resource_id = resource.get('_id') resource_url = resource.get('url') - if not resource_id or not resource_url: + resource_type = resource.get('type') + if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'): + continue + + if resource_type == 'subtitle': + subtitles.setdefault(resource.get('language') or 'por', []).append({ + 'url': resource_url, + }) continue security = self._download_json( @@ -165,7 +173,8 @@ class GloboIE(InfoExtractor): 'duration': duration, 'uploader': uploader, 'uploader_id': uploader_id, - 'formats': formats + 'formats': formats, + 'subtitles': subtitles, } From 974311b5aa1a53564a00915b9228af30e2a5b40d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 21:01:59 +0100 Subject: [PATCH 583/785] [vimeo] improve album videos id extraction(closes #22599) --- youtube_dl/extractor/vimeo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5dc38e243..9abd59d98 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -938,7 +938,7 @@ class VimeoAlbumIE(VimeoChannelIE): def _fetch_page(self, album_id, authorizaion, hashed_pass, page): api_page = page + 1 query = { - 'fields': 'link', + 'fields': 'link,uri', 'page': api_page, 'per_page': self._PAGE_SIZE, } @@ -953,7 +953,9 @@ class VimeoAlbumIE(VimeoChannelIE): link = video.get('link') if not link: continue - yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link)) + uri = video.get('uri') + video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None + yield self.url_result(link, VimeoIE.ie_key(), video_id) def _real_extract(self, url): album_id = self._match_id(url) From 173190f5e3946173daea0539cf0e749cb14acd12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Oct 2019 03:25:13 +0700 Subject: [PATCH 584/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/ChangeLog b/ChangeLog index 80681a9ae..8a59398d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,39 @@ +version <unreleased> + +Core +* [extractor/common] Make _is_valid_url more relaxed + +Extractors +* [vimeo] Improve album videos id extraction (#22599) ++ [globo] Extract subtitles (#22713) +* [bokecc] Improve player params extraction (#22638) +* [nexx] Handle result list (#22666) +* [vimeo] Fix VHX embed extraction +* [nbc] Switch to graphql API (#18581, #22693, #22701) +- [vessel] Remove extractor +- [promptfile] Remove extractor (#6239) +* [kaltura] Fix service URL extraction (#22658) +* [kaltura] Fix embed info strip (#22658) +* [globo] Fix format extraction (#20319) +* [redtube] Improve metadata extraction (#22492, #22615) +* [pornhub:uservideos:upload] Fix extraction (#22619) ++ [telequebec:squat] Add support for squat.telequebec.tv (#18503) +- [wimp] Remove extractor (#22088, #22091) ++ [gfycat] Extend URL regular expression (#22225) ++ [chaturbate] Extend URL regular expression (#22309) +* [peertube] Update instances (#22414) ++ [telequebec] Add support for coucou.telequebec.tv (#22482) ++ [xvideos] Extend URL regular expression (#22471) +- [youtube] Remove support for invidious.enkirton.net (#22543) ++ [openload] Add support for oload.monster (#22592) +* [nrktv:seriebase] Fix extraction (#22596) ++ [youtube] Add support for yt.lelux.fi (#22597) +* [orf:tvthek] Make manifest requests non fatal (#22578) +* [teachable] Skip login when already logged in (#22572) +* [viewlift] Improve extraction (#22545) +* [nonktube] Fix extraction (#22544) + + version 2019.09.28 Core From 7815d6b74373feb90d969b5fcde7df11702fa5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Oct 2019 03:26:47 +0700 Subject: [PATCH 585/785] release 2019.10.16 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 4 +--- youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 2fea0120e..5cd9f0dc0 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.09.28 + [debug] youtube-dl version 2019.10.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 6116acc79..6cc34796a 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 79d1a7f3c..0b7911e79 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 9bda3d440..a6f417d38 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.09.28 + [debug] youtube-dl version 2019.10.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 581344917..3fe753b62 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 8a59398d9..dc5c32a1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.10.16 Core * [extractor/common] Make _is_valid_url more relaxed diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 35275278b..0cbad28ea 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -694,7 +694,6 @@ - **PornoXO** - **PornTube** - **PressTV** - - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital - **puhutv** - **puhutv:serie** @@ -884,6 +883,7 @@ - **TeleQuebec** - **TeleQuebecEmission** - **TeleQuebecLive** + - **TeleQuebecSquat** - **TeleTask** - **Telewebion** - **TennisTV** @@ -991,7 +991,6 @@ - **VeeHD** - **Veoh** - **verystream** - - **Vessel** - **Vesti**: Вести.Ru - **Vevo** - **VevoPlaylist** @@ -1090,7 +1089,6 @@ - **Weibo** - **WeiboMobile** - **WeiqiTV**: WQTV - - **Wimp** - **Wistia** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c3eafb068..53889b7cb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.09.28' +__version__ = '2019.10.16' From 6d394a66f54216cc2b0b68fadd958eaf455c2778 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 12:03:46 +0100 Subject: [PATCH 586/785] [atresplayer] fix extraction(closes #16277)(closes #16716) --- youtube_dl/extractor/atresplayer.py | 213 +++++++++------------------- 1 file changed, 64 insertions(+), 149 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index ae1c09427..b96218f6c 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,202 +1,117 @@ from __future__ import unicode_literals -import time -import hmac -import hashlib import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, - float_or_none, int_or_none, - sanitized_Request, urlencode_postdata, - xpath_text, ) class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { - 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', - 'md5': 'efd56753cda1bb64df52a3074f62e38a', + 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', 'info_dict': { - 'id': 'capitulo-10-especial-solidario-nochebuena', + 'id': '5d4aa2c57ed1a88fc715a615', 'ext': 'mp4', - 'title': 'Especial Solidario de Nochebuena', - 'description': 'md5:e2d52ff12214fa937107d21064075bf1', - 'duration': 5527.6, - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Capítulo 7: Asuntos pendientes', + 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', + 'duration': 3413, + }, + 'params': { + 'format': 'bestvideo', }, 'skip': 'This video is only available for registered users' }, { - 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', - 'md5': '6e52cbb513c405e403dbacb7aacf8747', - 'info_dict': { - 'id': 'capitulo-112-david-bustamante', - 'ext': 'flv', - 'title': 'David Bustamante', - 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', - 'duration': 1439.0, - 'thumbnail': r're:^https?://.*\.jpg$', - }, + 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', + 'only_matching': True, }, { - 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', + 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', 'only_matching': True, }, ] - - _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' - _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)' - _TIMESTAMP_SHIFT = 30000 - - _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json' - _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json' - _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' - _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s' - - _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' - - _ERRORS = { - 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.', - 'DELETED': 'This video has expired and is no longer available for online streaming.', - 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.', - # 'PREMIUM': 'PREMIUM', - } + _API_BASE = 'https://api.atresplayer.com/' def _real_initialize(self): self._login() + def _handle_error(self, e, code): + if isinstance(e.cause, compat_HTTPError) and e.cause.code == code: + error = self._parse_json(e.cause.read(), None) + if error.get('error') == 'required_registered': + self.raise_login_required() + raise ExtractorError(error['error_description'], expected=True) + raise + def _login(self): username, password = self._get_login_info() if username is None: return - login_form = { - 'j_username': username, - 'j_password': password, - } + self._request_webpage( + self._API_BASE + 'login', None, 'Downloading login page') - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - response = self._download_webpage( - request, None, 'Logging in') + try: + target_url = self._download_json( + 'https://account.atresmedia.com/api/login', None, + 'Logging in', headers={ + 'Content-Type': 'application/x-www-form-urlencoded' + }, data=urlencode_postdata({ + 'username': username, + 'password': password, + }))['targetUrl'] + except ExtractorError as e: + self._handle_error(e, 400) - error = self._html_search_regex( - r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>', - response, 'error', default=None) - if error: - raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + self._request_webpage(target_url, None, 'Following Target URL') def _real_extract(self, url): - video_id = self._match_id(url) + display_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) + try: + episode = self._download_json( + self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) + except ExtractorError as e: + self._handle_error(e, 403) - episode_id = self._search_regex( - r'episode="([^"]+)"', webpage, 'episode id') - - request = sanitized_Request( - self._PLAYER_URL_TEMPLATE % episode_id, - headers={'User-Agent': self._USER_AGENT}) - player = self._download_json(request, episode_id, 'Downloading player JSON') - - episode_type = player.get('typeOfEpisode') - error_message = self._ERRORS.get(episode_type) - if error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + title = episode['titulo'] formats = [] - video_url = player.get('urlVideo') - if video_url: - format_info = { - 'url': video_url, - 'format_id': 'http', - } - mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url) - if mobj: - format_info.update({ - 'width': int_or_none(mobj.group('width')), - 'height': int_or_none(mobj.group('height')), - 'tbr': int_or_none(mobj.group('bitrate')), - }) - formats.append(format_info) - - timestamp = int_or_none(self._download_webpage( - self._TIME_API_URL, - video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) - timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) - token = hmac.new( - self._MAGIC.encode('ascii'), - (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5 - ).hexdigest() - - request = sanitized_Request( - self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), - headers={'User-Agent': self._USER_AGENT}) - - fmt_json = self._download_json( - request, video_id, 'Downloading windows video JSON') - - result = fmt_json.get('resultDes') - if result.lower() != 'ok': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, result), expected=True) - - for format_id, video_url in fmt_json['resultObject'].items(): - if format_id == 'token' or not video_url.startswith('http'): + for source in episode.get('sources', []): + src = source.get('src') + if not src: continue - if 'geodeswowsmpra3player' in video_url: - # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] - # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) - # this videos are protected by DRM, the f4m downloader doesn't support them - continue - video_url_hd = video_url.replace('free_es', 'es') - formats.extend(self._extract_f4m_formats( - video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', - fatal=False)) - formats.extend(self._extract_mpd_formats( - video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', - fatal=False)) + src_type = source.get('type') + if src_type == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif src_type == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) self._sort_formats(formats) - path_data = player.get('pathData') - - episode = self._download_xml( - self._EPISODE_URL_TEMPLATE % path_data, video_id, - 'Downloading episode XML') - - duration = float_or_none(xpath_text( - episode, './media/asset/info/technical/contentDuration', 'duration')) - - art = episode.find('./media/asset/info/art') - title = xpath_text(art, './name', 'title') - description = xpath_text(art, './description', 'description') - thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') - - subtitles = {} - subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') - if subtitle_url: - subtitles['es'] = [{ - 'ext': 'srt', - 'url': subtitle_url, - }] + heartbeat = episode.get('heartbeat') or {} + omniture = episode.get('omniture') or {} + get_meta = lambda x: heartbeat.get(x) or omniture.get(x) return { + 'display_id': display_id, 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, + 'description': episode.get('descripcion'), + 'thumbnail': episode.get('imgPoster'), + 'duration': int_or_none(episode.get('duration')), 'formats': formats, - 'subtitles': subtitles, + 'channel': get_meta('channel'), + 'season': get_meta('season'), + 'episode_number': int_or_none(get_meta('episodeNumber')), } From e29e96a9f5bc390789d176d509f592e208aa30d8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 15:06:48 +0100 Subject: [PATCH 587/785] [dumpert] fix extraction(closes #22428)(closes #22564) --- youtube_dl/extractor/dumpert.py | 83 +++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index be2e3d378..d9d9afdec 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -1,20 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import compat_b64decode from ..utils import ( + int_or_none, qualities, - sanitized_Request, ) class DumpertIE(InfoExtractor): - _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)' + _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)' _TESTS = [{ - 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', + 'url': 'https://www.dumpert.nl/item/6646981_951bc60f', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', 'info_dict': { 'id': '6646981/951bc60f', @@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', } }, { - 'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/', + 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', + 'only_matching': True, + }, { + 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f', + 'only_matching': True, + }, { + 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - protocol = mobj.group('protocol') - - url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id) - req = sanitized_Request(url) - req.add_header('Cookie', 'nsfw=1; cpc=10') - webpage = self._download_webpage(req, video_id) - - files_base64 = self._search_regex( - r'data-files="([^"]+)"', webpage, 'data files') - - files = self._parse_json( - compat_b64decode(files_base64).decode('utf-8'), - video_id) + video_id = self._match_id(url).replace('_', '/') + item = self._download_json( + 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'), + video_id)['items'][0] + title = item['title'] + media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO') quality = qualities(['flv', 'mobile', 'tablet', '720p']) - - formats = [{ - 'url': video_url, - 'format_id': format_id, - 'quality': quality(format_id), - } for format_id, video_url in files.items() if format_id != 'still'] + formats = [] + for variant in media.get('variants', []): + uri = variant.get('uri') + if not uri: + continue + version = variant.get('version') + formats.append({ + 'url': uri, + 'format_id': version, + 'quality': quality(version), + }) self._sort_formats(formats) - title = self._html_search_meta( - 'title', webpage) or self._og_search_title(webpage) - description = self._html_search_meta( - 'description', webpage) or self._og_search_description(webpage) - thumbnail = files.get('still') or self._og_search_thumbnail(webpage) + thumbnails = [] + stills = item.get('stills') or {} + for t in ('thumb', 'still'): + for s in ('', '-medium', '-large'): + still_id = t + s + still_url = stills.get(still_id) + if not still_url: + continue + thumbnails.append({ + 'id': still_id, + 'url': still_url, + }) + + stats = item.get('stats') or {} return { 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats + 'description': item.get('description'), + 'thumbnails': thumbnails, + 'formats': formats, + 'duration': int_or_none(media.get('duration')), + 'like_count': int_or_none(stats.get('kudos_total')), + 'view_count': int_or_none(stats.get('views_total')), } From 2b115b9460502944d6088cf42810c440495128a3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 15:41:58 +0100 Subject: [PATCH 588/785] [servingsys] Remove extractor(closes #22639) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/servingsys.py | 72 ------------------------------ 2 files changed, 73 deletions(-) delete mode 100644 youtube_dl/extractor/servingsys.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7a1e0dad6..53d527440 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -995,7 +995,6 @@ from .scrippsnetworks import ScrippsNetworksWatchIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE -from .servingsys import ServingSysIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py deleted file mode 100644 index c013d678f..000000000 --- a/youtube_dl/extractor/servingsys.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, -) - - -class ServingSysIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)' - - _TEST = { - 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', - 'info_dict': { - 'id': '5349193', - 'title': 'AdAPPter_Hyundai_demo', - }, - 'playlist': [{ - 'md5': 'baed851342df6846eb8677a60a011a0f', - 'info_dict': { - 'id': '29955898', - 'ext': 'flv', - 'title': 'AdAPPter_Hyundai_demo (1)', - 'duration': 74, - 'tbr': 1378, - 'width': 640, - 'height': 400, - }, - }, { - 'md5': '979b4da2655c4bc2d81aeb915a8c5014', - 'info_dict': { - 'id': '29907998', - 'ext': 'flv', - 'title': 'AdAPPter_Hyundai_demo (2)', - 'duration': 34, - 'width': 854, - 'height': 480, - 'tbr': 516, - }, - }], - 'params': { - 'playlistend': 2, - }, - '_skip': 'Blocked in the US [sic]', - } - - def _real_extract(self, url): - pl_id = self._match_id(url) - vast_doc = self._download_xml(url, pl_id) - - title = vast_doc.find('.//AdTitle').text - media = vast_doc.find('.//MediaFile').text - info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') - - doc = self._download_xml(info_url, pl_id, 'Downloading video info') - entries = [{ - '_type': 'video', - 'id': a.attrib['id'], - 'title': '%s (%s)' % (title, a.attrib['assetID']), - 'url': a.attrib['URL'], - 'duration': int_or_none(a.attrib.get('length')), - 'tbr': int_or_none(a.attrib.get('bitrate')), - 'height': int_or_none(a.attrib.get('height')), - 'width': int_or_none(a.attrib.get('width')), - } for a in doc.findall('.//AdditionalAssets/asset')] - - return { - '_type': 'playlist', - 'id': pl_id, - 'title': title, - 'entries': entries, - } From d07866f13efac39bf3f0b331870a15e0f5e98057 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 15:45:45 +0100 Subject: [PATCH 589/785] [mit] Remove support for video.mit.edu(closes #22403) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/mit.py | 24 ------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 53d527440..ea47b99f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -644,7 +644,7 @@ from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE from .miomio import MioMioIE -from .mit import TechTVMITIE, MITIE, OCWMITIE +from .mit import TechTVMITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import ( MixcloudIE, diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 1aea78d11..e1506a745 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -65,30 +65,6 @@ class TechTVMITIE(InfoExtractor): } -class MITIE(TechTVMITIE): - IE_NAME = 'video.mit.edu' - _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' - - _TEST = { - 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', - 'md5': '7db01d5ccc1895fc5010e9c9e13648da', - 'info_dict': { - 'id': '21783', - 'ext': 'mp4', - 'title': 'The Government is Profiling You', - 'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - page_title = mobj.group('title') - webpage = self._download_webpage(url, page_title) - embed_url = self._search_regex( - r'<iframe .*?src="(.+?)"', webpage, 'embed url') - return self.url_result(embed_url) - - class OCWMITIE(InfoExtractor): IE_NAME = 'ocw.mit.edu' _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' From bc48773ed4c068adfe67078714814035660e5ca4 Mon Sep 17 00:00:00 2001 From: MobiDotS <msaad615@gmail.com> Date: Wed, 16 Oct 2019 10:13:35 -0500 Subject: [PATCH 590/785] [twitch] update VOD URL matching (closes #22395) (#22727) --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 0500e33a6..ca7676fe2 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -248,7 +248,7 @@ class TwitchVodIE(TwitchItemBaseIE): https?:// (?: (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/| - player\.twitch\.tv/\?.*?\bvideo=v + player\.twitch\.tv/\?.*?\bvideo=v? ) (?P<id>\d+) ''' @@ -306,6 +306,9 @@ class TwitchVodIE(TwitchItemBaseIE): }, { 'url': 'https://www.twitch.tv/northernlion/video/291940395', 'only_matching': True, + }, { + 'url': 'https://player.twitch.tv/?video=480452374', + 'only_matching': True, }] def _real_extract(self, url): From 000115759485797be719c71716c1ac35f003ba6c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 23:57:40 +0100 Subject: [PATCH 591/785] [atresplayer] Add coding cookie --- youtube_dl/extractor/atresplayer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index b96218f6c..c2cec9845 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re From 86f63633c8e7c62ce245d1352d4d381efb614466 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 13:20:16 +0100 Subject: [PATCH 592/785] [audioboom] improve metadata extraction --- youtube_dl/extractor/audioboom.py | 34 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index 393f381c6..c51837b40 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -2,22 +2,25 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + clean_html, + float_or_none, +) class AudioBoomIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', - 'md5': '63a8d73a055c6ed0f1e51921a10a5a76', + 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry', + 'md5': '7b00192e593ff227e6a315486979a42d', 'info_dict': { - 'id': '4279833', + 'id': '7398103', 'ext': 'mp3', - 'title': '3/09/2016 Czaban Hour 3', - 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', - 'duration': 2245.72, - 'uploader': 'SB Nation A.M.', - 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', + 'title': 'Asim Chaudhry', + 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc', + 'duration': 4000.99, + 'uploader': 'Sue Perkins: An hour or so with...', + 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', } }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', @@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor): clip = None clip_store = self._parse_json( - self._search_regex( - r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id, + self._html_search_regex( + r'data-new-clip-store=(["\'])(?P<json>{.+?})\1', webpage, 'clip store', default='{}', group='json'), video_id, fatal=False) if clip_store: @@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor): audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( 'audio', webpage, 'audio url') - title = from_clip('title') or self._og_search_title(webpage) - description = from_clip('description') or self._og_search_description(webpage) + title = from_clip('title') or self._html_search_meta( + ['og:title', 'og:audio:title', 'audio_title'], webpage) + description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage) duration = float_or_none(from_clip('duration') or self._html_search_meta( 'weibo:audio:duration', webpage)) - uploader = from_clip('author') or self._og_search_property( - 'audio:artist', webpage, 'uploader', fatal=False) + uploader = from_clip('author') or self._html_search_meta( + ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader') uploader_url = from_clip('author_url') or self._html_search_meta( 'audioboo:channel', webpage, 'uploader url') From 755541a4c8ac3dd4e8b9abd0c7df95182a1f3fd4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 13:21:44 +0100 Subject: [PATCH 593/785] [mangomolo] fix video format extraction and add support for player URLs --- youtube_dl/extractor/generic.py | 8 ++++++-- youtube_dl/extractor/mangomolo.py | 17 +++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ec43c5ae4..5ed952b29 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2962,10 +2962,14 @@ class GenericIE(InfoExtractor): # Look for Mangomolo embeds mobj = re.search( - r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/ + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?// + (?: + admin\.mangomolo\.com/analytics/index\.php/customers/embed| + player\.mangomolo\.com/v1 + )/ (?: video\?.*?\bid=(?P<video_id>\d+)| - index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) + (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) ).+?)\1''', webpage) if mobj is not None: info = { diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 482175a34..acee370e9 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -10,18 +10,21 @@ from ..utils import int_or_none class MangomoloBaseIE(InfoExtractor): + _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)' + def _get_real_id(self, page_id): return page_id def _real_extract(self, url): page_id = self._get_real_id(self._match_id(url)) - webpage = self._download_webpage(url, page_id) + webpage = self._download_webpage( + 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id) hidden_inputs = self._hidden_inputs(webpage) m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' format_url = self._html_search_regex( [ - r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', + r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r'<a[^>]+href="(rtsp://[^"]+)"' ], webpage, 'format url') formats = self._extract_wowza_formats( @@ -39,14 +42,16 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): - IE_NAME = 'mangomolo:video' - _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _TYPE = 'video' + IE_NAME = 'mangomolo:' + _TYPE + _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): - IE_NAME = 'mangomolo:live' - _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _TYPE = 'live' + IE_NAME = 'mangomolo:' + _TYPE + _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True def _get_real_id(self, page_id): From 59296bae7ec6d15b0df37dce34bdd96381c0e743 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 13:26:45 +0100 Subject: [PATCH 594/785] [xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 --- youtube_dl/extractor/xfileshare.py | 192 +++++++++++++---------------- 1 file changed, 86 insertions(+), 106 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index b38c7a7b3..48ef07ed1 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -4,37 +4,64 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_chr from ..utils import ( decode_packed_codes, determine_ext, ExtractorError, int_or_none, - NO_DEFAULT, + js_to_json, urlencode_postdata, ) +# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 +def aa_decode(aa_code): + symbol_table = [ + ('7', '((゚ー゚) + (o^_^o))'), + ('6', '((o^_^o) +(o^_^o))'), + ('5', '((゚ー゚) + (゚Θ゚))'), + ('2', '((o^_^o) - (゚Θ゚))'), + ('4', '(゚ー゚)'), + ('3', '(o^_^o)'), + ('1', '(゚Θ゚)'), + ('0', '(c^_^o)'), + ] + delim = '(゚Д゚)[゚ε゚]+' + ret = '' + for aa_char in aa_code.split(delim): + for val, pat in symbol_table: + aa_char = aa_char.replace(pat, val) + aa_char = aa_char.replace('+ ', '') + m = re.match(r'^\d+', aa_char) + if m: + ret += compat_chr(int(m.group(0), 8)) + else: + m = re.match(r'^u([\da-f]+)', aa_char) + if m: + ret += compat_chr(int(m.group(1), 16)) + return ret + + class XFileShareIE(InfoExtractor): _SITES = ( - (r'daclips\.(?:in|com)', 'DaClips'), - (r'filehoot\.com', 'FileHoot'), - (r'gorillavid\.(?:in|com)', 'GorillaVid'), - (r'movpod\.in', 'MovPod'), - (r'powerwatch\.pw', 'PowerWatch'), - (r'rapidvideo\.ws', 'Rapidvideo.ws'), + (r'clipwatching\.com', 'ClipWatching'), + (r'gounlimited\.to', 'GoUnlimited'), + (r'govid\.me', 'GoVid'), + (r'holavid\.com', 'HolaVid'), + (r'streamty\.com', 'Streamty'), (r'thevideobee\.to', 'TheVideoBee'), - (r'vidto\.(?:me|se)', 'Vidto'), - (r'streamin\.to', 'Streamin.To'), - (r'xvidstage\.com', 'XVIDSTAGE'), - (r'vidabc\.com', 'Vid ABC'), + (r'uqload\.com', 'Uqload'), (r'vidbom\.com', 'VidBom'), (r'vidlo\.us', 'vidlo'), - (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'), - (r'fastvideo\.me', 'FastVideo.me'), + (r'vidlocker\.xyz', 'VidLocker'), + (r'vidshare\.tv', 'VidShare'), + (r'vup\.to', 'VUp'), + (r'xvideosharing\.com', 'XVideoSharing'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) - _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' % '|'.join(site for site in list(zip(*_SITES))[0])) _FILE_NOT_FOUND_REGEXES = ( @@ -43,82 +70,14 @@ class XFileShareIE(InfoExtractor): ) _TESTS = [{ - 'url': 'http://gorillavid.in/06y9juieqpmi', - 'md5': '5ae4a3580620380619678ee4875893ba', + 'url': 'http://xvideosharing.com/fq65f94nd2ve', + 'md5': '4181f63957e8fe90ac836fa58dc3c8a6', 'info_dict': { - 'id': '06y9juieqpmi', + 'id': 'fq65f94nd2ve', 'ext': 'mp4', - 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', + 'title': 'sample', 'thumbnail': r're:http://.*\.jpg', }, - }, { - 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', - 'only_matching': True, - }, { - 'url': 'http://daclips.in/3rso4kdn6f9m', - 'md5': '1ad8fd39bb976eeb66004d3a4895f106', - 'info_dict': { - 'id': '3rso4kdn6f9m', - 'ext': 'mp4', - 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', - 'thumbnail': r're:http://.*\.jpg', - } - }, { - 'url': 'http://movpod.in/0wguyyxi1yca', - 'only_matching': True, - }, { - 'url': 'http://filehoot.com/3ivfabn7573c.html', - 'info_dict': { - 'id': '3ivfabn7573c', - 'ext': 'mp4', - 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', - 'thumbnail': r're:http://.*\.jpg', - }, - 'skip': 'Video removed', - }, { - 'url': 'http://vidto.me/ku5glz52nqe1.html', - 'info_dict': { - 'id': 'ku5glz52nqe1', - 'ext': 'mp4', - 'title': 'test' - } - }, { - 'url': 'http://powerwatch.pw/duecjibvicbu', - 'info_dict': { - 'id': 'duecjibvicbu', - 'ext': 'mp4', - 'title': 'Big Buck Bunny trailer', - }, - }, { - 'url': 'http://xvidstage.com/e0qcnl03co6z', - 'info_dict': { - 'id': 'e0qcnl03co6z', - 'ext': 'mp4', - 'title': 'Chucky Prank 2015.mp4', - }, - }, { - # removed by administrator - 'url': 'http://xvidstage.com/amfy7atlkx25', - 'only_matching': True, - }, { - 'url': 'http://vidabc.com/i8ybqscrphfv', - 'info_dict': { - 'id': 'i8ybqscrphfv', - 'ext': 'mp4', - 'title': 're:Beauty and the Beast 2017', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.rapidvideo.cool/b667kprndr8w', - 'only_matching': True, - }, { - 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html', - 'only_matching': True, - }, { - 'url': 'http://vidto.se/1tx1pf6t12cg.html', - 'only_matching': True, }] @staticmethod @@ -131,10 +90,9 @@ class XFileShareIE(InfoExtractor): webpage)] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + host, video_id = re.match(self._VALID_URL, url).groups() - url = 'http://%s/%s' % (mobj.group('host'), video_id) + url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) webpage = self._download_webpage(url, video_id) if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): @@ -142,7 +100,7 @@ class XFileShareIE(InfoExtractor): fields = self._hidden_inputs(webpage) - if fields['op'] == 'download1': + if fields.get('op') == 'download1': countdown = int_or_none(self._search_regex( r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>', webpage, 'countdown', default=None)) @@ -160,13 +118,37 @@ class XFileShareIE(InfoExtractor): (r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'h4-fine[^>]*>([^<]+)<', - r'>Watch (.+) ', + r'>Watch (.+)[ <]', r'<h2 class="video-page-head">([^<]+)</h2>', - r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to + r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to + r'title\s*:\s*"([^"]+)"'), # govid.me webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or video_id).strip() - def extract_formats(default=NO_DEFAULT): + for regex, func in ( + (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes), + (r'(゚.+)', aa_decode)): + obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None) + if obf_code: + webpage = webpage.replace(obf_code, func(obf_code)) + + formats = [] + + jwplayer_data = self._search_regex( + [ + r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);', + r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);', + ], webpage, + 'jwplayer data', default=None) + if jwplayer_data: + jwplayer_data = self._parse_json( + jwplayer_data.replace(r"\'", "'"), video_id, js_to_json) + if jwplayer_data: + formats = self._parse_jwplayer_data( + jwplayer_data, video_id, False, + m3u8_id='hls', mpd_id='dash')['formats'] + + if not formats: urls = [] for regex in ( r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', @@ -177,6 +159,12 @@ class XFileShareIE(InfoExtractor): video_url = mobj.group('url') if video_url not in urls: urls.append(video_url) + + sources = self._search_regex( + r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None) + if sources: + urls.extend(self._parse_json(sources, video_id)) + formats = [] for video_url in urls: if determine_ext(video_url) == 'm3u8': @@ -189,21 +177,13 @@ class XFileShareIE(InfoExtractor): 'url': video_url, 'format_id': 'sd', }) - if not formats and default is not NO_DEFAULT: - return default - self._sort_formats(formats) - return formats - - formats = extract_formats(default=None) - - if not formats: - webpage = decode_packed_codes(self._search_regex( - r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", - webpage, 'packed code')) - formats = extract_formats() + self._sort_formats(formats) thumbnail = self._search_regex( - r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) + [ + r'<video[^>]+poster="([^"]+)"', + r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],', + ], webpage, 'thumbnail', default=None) return { 'id': video_id, From 34e3885bc9e3aecab104b96eabce03854ac8f7a2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 15:55:44 +0100 Subject: [PATCH 595/785] [viewster->contv] remove viewster extractor and add support for contv.com --- youtube_dl/extractor/contv.py | 118 ++++++++++++++++ youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/viewster.py | 217 ----------------------------- 3 files changed, 119 insertions(+), 218 deletions(-) create mode 100644 youtube_dl/extractor/contv.py delete mode 100644 youtube_dl/extractor/viewster.py diff --git a/youtube_dl/extractor/contv.py b/youtube_dl/extractor/contv.py new file mode 100644 index 000000000..84b462d40 --- /dev/null +++ b/youtube_dl/extractor/contv.py @@ -0,0 +1,118 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, +) + + +class CONtvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter', + 'info_dict': { + 'id': 'CEG10022949', + 'ext': 'mp4', + 'title': 'Days Of Thrills & Laughter', + 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb', + 'upload_date': '20180703', + 'timestamp': 1530634789.61, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites', + 'info_dict': { + 'id': 'CLIP-show_fotld_bts', + 'title': 'Fight of the Living Dead: Behind the Scenes Bites', + }, + 'playlist_mincount': 7, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + details = self._download_json( + 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id, + video_id, query={'device': 'web'}) + + if details.get('type') == 'episodic': + seasons = self._download_json( + 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id, + video_id) + entries = [] + for season in seasons: + for episode in season.get('episodes', []): + episode_id = episode.get('id') + if not episode_id: + continue + entries.append(self.url_result( + 'https://www.contv.com/details-movie/' + episode_id, + CONtvIE.ie_key(), episode_id)) + return self.playlist_result(entries, video_id, details.get('title')) + + m_details = details['details'] + title = details['title'] + + formats = [] + + media_hls_url = m_details.get('media_hls_url') + if media_hls_url: + formats.extend(self._extract_m3u8_formats( + media_hls_url, video_id, 'mp4', + m3u8_id='hls', fatal=False)) + + media_mp4_url = m_details.get('media_mp4_url') + if media_mp4_url: + formats.append({ + 'format_id': 'http', + 'url': media_mp4_url, + }) + + self._sort_formats(formats) + + subtitles = {} + captions = m_details.get('captions') or {} + for caption_url in captions.values(): + subtitles.setdefault('en', []).append({ + 'url': caption_url + }) + + thumbnails = [] + for image in m_details.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + }) + + description = None + for p in ('large_', 'medium_', 'small_', ''): + d = m_details.get(p + 'description') + if d: + description = d + break + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': description, + 'timestamp': float_or_none(details.get('metax_added_on'), 1000), + 'subtitles': subtitles, + 'duration': float_or_none(m_details.get('duration'), 1000), + 'view_count': int_or_none(details.get('num_watched')), + 'like_count': int_or_none(details.get('num_fav')), + 'categories': details.get('category'), + 'tags': details.get('tags'), + 'season_number': int_or_none(details.get('season')), + 'episode_number': int_or_none(details.get('episode')), + 'release_year': int_or_none(details.get('pub_year')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ea47b99f6..1db21529f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -231,6 +231,7 @@ from .commonprotocols import ( RtmpIE, ) from .condenast import CondeNastIE +from .contv import CONtvIE from .corus import CorusIE from .cracked import CrackedIE from .crackle import CrackleIE @@ -1322,7 +1323,6 @@ from .viewlift import ( ViewLiftIE, ViewLiftEmbedIE, ) -from .viewster import ViewsterIE from .viidea import ViideaIE from .vimeo import ( VimeoIE, diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py deleted file mode 100644 index 6e318479c..000000000 --- a/youtube_dl/extractor/viewster.py +++ /dev/null @@ -1,217 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urllib_parse_unquote, -) -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - parse_iso8601, - sanitized_Request, - HEADRequest, - url_basename, -) - - -class ViewsterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)' - _TESTS = [{ - # movie, Type=Movie - 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', - 'md5': 'e642d1b27fcf3a4ffa79f194f5adde36', - 'info_dict': { - 'id': '1140-11855-000', - 'ext': 'mp4', - 'title': 'The listening Project', - 'description': 'md5:bac720244afd1a8ea279864e67baa071', - 'timestamp': 1214870400, - 'upload_date': '20080701', - 'duration': 4680, - }, - }, { - # series episode, Type=Episode - 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/', - 'md5': '9243079a8531809efe1b089db102c069', - 'info_dict': { - 'id': '1284-19427-001', - 'ext': 'mp4', - 'title': 'The World and a Wall', - 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3', - 'timestamp': 1428192000, - 'upload_date': '20150405', - 'duration': 1500, - }, - }, { - # serie, Type=Serie - 'url': 'http://www.viewster.com/serie/1303-19426-000/', - 'info_dict': { - 'id': '1303-19426-000', - 'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?', - 'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11', - }, - 'playlist_count': 13, - }, { - # unfinished serie, no Type - 'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/', - 'info_dict': { - 'id': '1284-19427-000', - 'title': 'Baby Steps—Season 2', - 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1', - }, - 'playlist_mincount': 16, - }, { - # geo restricted series - 'url': 'https://www.viewster.com/serie/1280-18794-002/', - 'only_matching': True, - }, { - # geo restricted video - 'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/', - 'only_matching': True, - }] - - _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' - - def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}): - request = sanitized_Request(url) - request.add_header('Accept', self._ACCEPT_HEADER) - request.add_header('Auth-token', self._AUTH_TOKEN) - return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query) - - def _real_extract(self, url): - video_id = self._match_id(url) - # Get 'api_token' cookie - self._request_webpage( - HEADRequest('http://www.viewster.com/'), - video_id, headers=self.geo_verification_headers()) - cookies = self._get_cookies('http://www.viewster.com/') - self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value) - - info = self._download_json( - 'https://public-api.viewster.com/search/%s' % video_id, - video_id, 'Downloading entry JSON') - - entry_id = info.get('Id') or info['id'] - - # unfinished serie has no Type - if info.get('Type') in ('Serie', None): - try: - episodes = self._download_json( - 'https://public-api.viewster.com/series/%s/episodes' % entry_id, - video_id, 'Downloading series JSON') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.raise_geo_restricted() - else: - raise - entries = [ - self.url_result( - 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster') - for episode in episodes] - title = (info.get('Title') or info['Synopsis']['Title']).strip() - description = info.get('Synopsis', {}).get('Detailed') - return self.playlist_result(entries, video_id, title, description) - - formats = [] - for language_set in info.get('LanguageSets', []): - manifest_url = None - m3u8_formats = [] - audio = language_set.get('Audio') or '' - subtitle = language_set.get('Subtitle') or '' - base_format_id = audio - if subtitle: - base_format_id += '-%s' % subtitle - - def concat(suffix, sep='-'): - return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix - - medias = self._download_json( - 'https://public-api.viewster.com/movies/%s/videos' % entry_id, - video_id, fatal=False, query={ - 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'], - 'language': audio, - 'subtitle': subtitle, - }) - if not medias: - continue - for media in medias: - video_url = media.get('Uri') - if not video_url: - continue - ext = determine_ext(video_url) - if ext == 'f4m': - manifest_url = video_url - video_url += '&' if '?' in video_url else '?' - video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' - formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id=concat('hds'))) - elif ext == 'm3u8': - manifest_url = video_url - m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=concat('hls'), - fatal=False) # m3u8 sometimes fail - if m3u8_formats: - formats.extend(m3u8_formats) - else: - qualities_basename = self._search_regex( - r'/([^/]+)\.csmil/', - manifest_url, 'qualities basename', default=None) - if not qualities_basename: - continue - QUALITIES_RE = r'((,\d+k)+,?)' - qualities = self._search_regex( - QUALITIES_RE, qualities_basename, - 'qualities', default=None) - if not qualities: - continue - qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(','))) - qualities.sort() - http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename) - http_url_basename = url_basename(video_url) - if m3u8_formats: - self._sort_formats(m3u8_formats) - m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - if len(qualities) == len(m3u8_formats): - for q, m3u8_format in zip(qualities, m3u8_formats): - f = m3u8_format.copy() - f.update({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(f) - else: - for q in qualities: - formats.append({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'ext': 'mp4', - 'format_id': 'http-%d' % q, - 'tbr': q, - }) - - if not formats and not info.get('VODSettings'): - self.raise_geo_restricted() - - self._sort_formats(formats) - - synopsis = info.get('Synopsis') or {} - # Prefer title outside synopsis since it's less messy - title = (info.get('Title') or synopsis['Title']).strip() - description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short') - duration = int_or_none(info.get('Duration')) - timestamp = parse_iso8601(info.get('ReleaseDate')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } From 824fa51165d92ceee01589bf995ebbf009df328c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 18 Oct 2019 04:03:53 +0700 Subject: [PATCH 596/785] [utils] Improve subtitles_filename (closes #22753) --- test/test_utils.py | 6 ++++++ youtube_dl/YoutubeDL.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 8 ++++---- youtube_dl/utils.py | 4 ++-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 659c6ece5..3920542bb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -74,6 +74,7 @@ from youtube_dl.utils import ( str_to_int, strip_jsonp, strip_or_none, + subtitles_filename, timeconvert, unescapeHTML, unified_strdate, @@ -261,6 +262,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_subtitles_filename(self): + self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') + self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') + self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt') + def test_remove_start(self): self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start('A - B', 'A - '), 'B') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c3d1407f9..f5cb46308 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1814,7 +1814,7 @@ class YoutubeDL(object): ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] - sub_filename = subtitles_filename(filename, sub_lang, sub_format) + sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) else: diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 70416c25e..fd3f921a8 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -393,7 +393,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): sub_ext = sub_info['ext'] if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': sub_langs.append(lang) - sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) + sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) else: if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': webm_vtt_warn = True @@ -606,9 +606,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): self._downloader.to_screen( '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) continue - old_file = subtitles_filename(filename, lang, ext) + old_file = subtitles_filename(filename, lang, ext, info.get('ext')) sub_filenames.append(old_file) - new_file = subtitles_filename(filename, lang, new_ext) + new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) if ext in ('dfxp', 'ttml', 'tt'): self._downloader.report_warning( @@ -616,7 +616,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'which results in style information loss') dfxp_file = old_file - srt_file = subtitles_filename(filename, lang, 'srt') + srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext')) with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 798757241..53117ea90 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2906,8 +2906,8 @@ def determine_ext(url, default_ext='unknown_video'): return default_ext -def subtitles_filename(filename, sub_lang, sub_format): - return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format +def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): + return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) def date_from_str(date_str): From 2297c0d7d977921dca865e6c9cbc7ee5282ba8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 19 Oct 2019 23:56:36 +0700 Subject: [PATCH 597/785] [facebook] Bypass download rate limits (closes #21018) --- youtube_dl/extractor/facebook.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index a3dcdca3e..a56f85c21 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -405,6 +405,11 @@ class FacebookIE(InfoExtractor): if not formats: raise ExtractorError('Cannot find video formats') + # Downloads with browser's User-Agent are rate limited. Working around + # with non-browser User-Agent. + for f in formats: + f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' + self._sort_formats(formats) video_title = self._html_search_regex( From b4818e3c7a718428d3366c34da8e21e2f416f5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Oct 2019 00:02:22 +0700 Subject: [PATCH 598/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index dc5c32a1f..045349b05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version <unreleased> + +Core +* [utils] Improve subtitles_filename (#22753) + +Extractors +* [facebook] Bypass download rate limits (#21018) ++ [contv] Add support for contv.com +- [viewster] Remove extractor +* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239) + * Update the list of domains + + Add support for aa-encoded video data + * Improve jwplayer format extraction + + Add support for Clappr sources +* [mangomolo] Fix video format extraction and add support for player URLs +* [audioboom] Improve metadata extraction +* [twitch] Update VOD URL matching (#22395, #22727) +- [mit] Remove support for video.mit.edu (#22403) +- [servingsys] Remove extractor (#22639) +* [dumpert] Fix extraction (#22428, #22564) +* [atresplayer] Fix extraction (#16277, #16716) + + version 2019.10.16 Core From 820215f0e34813089d559fed24a398d9e91810e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Oct 2019 00:09:02 +0700 Subject: [PATCH 599/785] release 2019.10.22 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 6 ++---- youtube_dl/version.py | 2 +- 8 files changed, 16 insertions(+), 18 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 5cd9f0dc0..f1afe704c 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.16 + [debug] youtube-dl version 2019.10.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 6cc34796a..a4dc9b005 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 0b7911e79..5bf86adce 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a6f417d38..7aa5534e5 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.16 + [debug] youtube-dl version 2019.10.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 3fe753b62..5d3645e3d 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 045349b05..64233b03b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.10.22 Core * [utils] Improve subtitles_filename (#22753) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0cbad28ea..a1b0edeeb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -183,6 +183,7 @@ - **ComedyCentralShortname** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **CONtv** - **Corus** - **Coub** - **Cracked** @@ -784,7 +785,6 @@ - **Seeker** - **SenateISVP** - **SendtoNews** - - **ServingSys** - **Servus** - **Sexu** - **SeznamZpravy** @@ -1005,7 +1005,6 @@ - **Viddler** - **Videa** - **video.google:search**: Google Video search - - **video.mit.edu** - **VideoDetective** - **videofy.me** - **videomore** @@ -1023,7 +1022,6 @@ - **vier:videos** - **ViewLift** - **ViewLiftEmbed** - - **Viewster** - **Viidea** - **viki** - **viki:channel** @@ -1097,7 +1095,7 @@ - **WWE** - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me + - **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing - **XHamster** - **XHamsterEmbed** - **XHamsterUser** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 53889b7cb..39b355b9e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.10.16' +__version__ = '2019.10.22' From 0c2d10d225f61ac1fb534d8ed1788250401465b2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Oct 2019 17:49:50 +0100 Subject: [PATCH 600/785] [globo] handle alternative hash signing method --- youtube_dl/extractor/globo.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 9ad1d95fb..60d842d3a 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -132,18 +132,24 @@ class GloboIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, message), expected=True) continue - assert security_hash[:2] in ('04', '14') - received_time = security_hash[3:13] - received_md5 = security_hash[24:] - - sign_time = compat_str(int(received_time) + 86400) + hash_code = security_hash[:2] padding = '%010d' % random.randint(1, 10000000000) + if hash_code in ('04', '14'): + received_time = security_hash[3:13] + received_md5 = security_hash[24:] + hash_prefix = security_hash[:23] + elif hash_code in ('02', '12', '03', '13'): + received_time = security_hash[2:12] + received_md5 = security_hash[22:] + padding += '1' + hash_prefix = '05' + security_hash[:22] - md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode() + padded_sign_time = compat_str(int(received_time) + 86400) + padding + md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = security_hash[:23] + sign_time + padding + signed_md5 - + signed_hash = hash_prefix + padded_sign_time + signed_md5 signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') + if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', From 07154c793065bca816793186590d8d6461e07478 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Oct 2019 17:53:47 +0100 Subject: [PATCH 601/785] [facebook] extract subtitles(closes #22777) --- youtube_dl/extractor/ceskatelevize.py | 2 ++ youtube_dl/extractor/facebook.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 1ec58f7d8..7cb4efb74 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor): is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'drmOnly=true' in stream_url: + continue if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index a56f85c21..c723726b7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor): if not video_data: raise ExtractorError('Cannot parse data') + subtitles = {} formats = [] for f in video_data: format_id = f['stream_type'] @@ -402,6 +403,9 @@ class FacebookIE(InfoExtractor): if dash_manifest: formats.extend(self._parse_mpd_formats( compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) + subtitles_src = f[0].get('subtitles_src') + if subtitles_src: + subtitles.setdefault('en', []).append({'url': subtitles_src}) if not formats: raise ExtractorError('Cannot find video formats') @@ -447,6 +451,7 @@ class FacebookIE(InfoExtractor): 'timestamp': timestamp, 'thumbnail': thumbnail, 'view_count': view_count, + 'subtitles': subtitles, } return webpage, info_dict From 162bcc68dc73706699b559fffdd8bed3db6643b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 24 Oct 2019 12:53:33 +0100 Subject: [PATCH 602/785] [puhutv] improve extraction - fix subtitles extraction - transform HLS URLs to http URLs - improve metadata extraction --- youtube_dl/extractor/puhutv.py | 90 ++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 5465e8ab7..fb704a3c4 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -25,21 +25,21 @@ class PuhuTVIE(InfoExtractor): _TESTS = [{ # film 'url': 'https://puhutv.com/sut-kardesler-izle', - 'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7', + 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { 'id': '5085', 'display_id': 'sut-kardesler', 'ext': 'mp4', 'title': 'Süt Kardeşler', - 'description': 'md5:405fd024df916ca16731114eb18e511a', + 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 4832.44, 'creator': 'Arzu Film', - 'timestamp': 1469778212, - 'upload_date': '20160729', + 'timestamp': 1561062602, + 'upload_date': '20190620', 'release_year': 1976, 'view_count': int, - 'tags': ['Aile', 'Komedi', 'Klasikler'], + 'tags': list, }, }, { # episode, geo restricted, bypassable with --geo-verification-proxy @@ -64,9 +64,10 @@ class PuhuTVIE(InfoExtractor): display_id)['data'] video_id = compat_str(info['id']) - title = info.get('name') or info['title']['name'] + show = info.get('title') or {} + title = info.get('name') or show['name'] if info.get('display_name'): - title = '%s %s' % (title, info.get('display_name')) + title = '%s %s' % (title, info['display_name']) try: videos = self._download_json( @@ -78,17 +79,36 @@ class PuhuTVIE(InfoExtractor): self.raise_geo_restricted() raise + urls = [] formats = [] + + def add_http_from_hls(m3u8_f): + http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4') + if http_url != m3u8_f['url']: + f = m3u8_f.copy() + f.update({ + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + 'url': http_url, + }) + formats.append(f) + for video in videos['data']['videos']: media_url = url_or_none(video.get('url')) - if not media_url: + if not media_url or media_url in urls: continue + urls.append(media_url) + playlist = video.get('is_playlist') - if video.get('stream_type') == 'hls' and playlist is True: - formats.extend(self._extract_m3u8_formats( + if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: + m3u8_formats = self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False) + for m3u8_f in m3u8_formats: + formats.append(m3u8_f) + add_http_from_hls(m3u8_f) continue + quality = int_or_none(video.get('quality')) f = { 'url': media_url, @@ -96,34 +116,29 @@ class PuhuTVIE(InfoExtractor): 'height': quality } video_format = video.get('video_format') - if video_format == 'hls' and playlist is False: + is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False + if is_hls: format_id = 'hls' f['protocol'] = 'm3u8_native' elif video_format == 'mp4': format_id = 'http' - else: continue if quality: format_id += '-%sp' % quality f['format_id'] = format_id formats.append(f) + if is_hls: + add_http_from_hls(f) self._sort_formats(formats) - description = try_get( - info, lambda x: x['title']['description'], - compat_str) or info.get('description') - timestamp = unified_timestamp(info.get('created_at')) creator = try_get( - info, lambda x: x['title']['producer']['name'], compat_str) + show, lambda x: x['producer']['name'], compat_str) - duration = float_or_none( - try_get(info, lambda x: x['content']['duration_in_ms'], int), - scale=1000) - view_count = try_get(info, lambda x: x['content']['watch_count'], int) + content = info.get('content') or {} images = try_get( - info, lambda x: x['content']['images']['wide'], dict) or {} + content, lambda x: x['images']['wide'], dict) or {} thumbnails = [] for image_id, image_url in images.items(): if not isinstance(image_url, compat_str): @@ -137,14 +152,8 @@ class PuhuTVIE(InfoExtractor): }) thumbnails.append(t) - release_year = try_get(info, lambda x: x['title']['released_at'], int) - - season_number = int_or_none(info.get('season_number')) - season_id = str_or_none(info.get('season_id')) - episode_number = int_or_none(info.get('episode_number')) - tags = [] - for genre in try_get(info, lambda x: x['title']['genres'], list) or []: + for genre in show.get('genres') or []: if not isinstance(genre, dict): continue genre_name = genre.get('name') @@ -152,12 +161,11 @@ class PuhuTVIE(InfoExtractor): tags.append(genre_name) subtitles = {} - for subtitle in try_get( - info, lambda x: x['content']['subtitles'], list) or []: + for subtitle in content.get('subtitles') or []: if not isinstance(subtitle, dict): continue lang = subtitle.get('language') - sub_url = url_or_none(subtitle.get('url')) + sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) if not lang or not isinstance(lang, compat_str) or not sub_url: continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ @@ -168,15 +176,15 @@ class PuhuTVIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': title, - 'description': description, - 'season_id': season_id, - 'season_number': season_number, - 'episode_number': episode_number, - 'release_year': release_year, - 'timestamp': timestamp, + 'description': info.get('description') or show.get('description'), + 'season_id': str_or_none(info.get('season_id')), + 'season_number': int_or_none(info.get('season_number')), + 'episode_number': int_or_none(info.get('episode_number')), + 'release_year': int_or_none(show.get('released_at')), + 'timestamp': unified_timestamp(info.get('created_at')), 'creator': creator, - 'view_count': view_count, - 'duration': duration, + 'view_count': int_or_none(content.get('watch_count')), + 'duration': float_or_none(content.get('duration_in_ms'), 1000), 'tags': tags, 'subtitles': subtitles, 'thumbnails': thumbnails, From 416c3ca7f53dab76b9e5ec46a0c0335698252c2d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 25 Oct 2019 19:27:28 +0100 Subject: [PATCH 603/785] [odnoklassniki] add support for Schemeless embed extraction --- youtube_dl/extractor/generic.py | 7 ++++--- youtube_dl/extractor/odnoklassniki.py | 9 +++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5ed952b29..f66cae0eb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -118,6 +118,7 @@ from .foxnews import FoxNewsIE from .viqeo import ViqeoIE from .expressen import ExpressenIE from .zype import ZypeIE +from .odnoklassniki import OdnoklassnikiIE class GenericIE(InfoExtractor): @@ -2627,9 +2628,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'VK') # Look for embedded Odnoklassniki player - mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Odnoklassniki') + odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage) + if odnoklassniki_url: + return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) # Look for embedded ivi player mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 114b93c07..7ed9fac55 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, @@ -121,6 +123,13 @@ class OdnoklassnikiIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): start_time = int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) From 3c989818e7dc7706da069312bbdd040165a97517 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 25 Oct 2019 19:35:07 +0100 Subject: [PATCH 604/785] [vk] improve extraction - add support for Odnoklassniki embeds - update tests - extract more video from user lists(closes #4470) - fix wall post audio extraction(closes #18332) - improve error detection(closes #22568) --- youtube_dl/extractor/vk.py | 329 +++++++++++++++++++------------------ 1 file changed, 173 insertions(+), 156 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 8b6dc0e24..c289fcad3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -12,7 +12,6 @@ from ..utils import ( get_element_by_class, int_or_none, orderedSet, - remove_start, str_or_none, str_to_int, unescapeHTML, @@ -21,6 +20,7 @@ from ..utils import ( urlencode_postdata, ) from .dailymotion import DailymotionIE +from .odnoklassniki import OdnoklassnikiIE from .pladform import PladformIE from .vimeo import VimeoIE from .youtube import YoutubeIE @@ -60,6 +60,18 @@ class VKBaseIE(InfoExtractor): def _real_initialize(self): self._login() + def _download_payload(self, path, video_id, data, fatal=True): + data['al'] = 1 + code, payload = self._download_json( + 'https://vk.com/%s.php' % path, video_id, + data=urlencode_postdata(data), fatal=fatal, + headers={'X-Requested-With': 'XMLHttpRequest'})['payload'] + if code == '3': + self.raise_login_required() + elif code == '8': + raise ExtractorError(clean_html(payload[0][1:-1]), expected=True) + return payload + class VKIE(VKBaseIE): IE_NAME = 'vk' @@ -96,7 +108,6 @@ class VKIE(VKBaseIE): }, { 'url': 'http://vk.com/video205387401_165548505', - 'md5': '6c0aeb2e90396ba97035b9cbde548700', 'info_dict': { 'id': '205387401_165548505', 'ext': 'mp4', @@ -110,18 +121,18 @@ class VKIE(VKBaseIE): }, { 'note': 'Embedded video', - 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', - 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', + 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa', + 'md5': '7babad3b85ea2e91948005b1b8b0cb84', 'info_dict': { - 'id': '32194266_162925554', + 'id': '-77521_162222515', 'ext': 'mp4', - 'uploader': 'Vladimir Gavrin', - 'title': 'Lin Dan', - 'duration': 101, - 'upload_date': '20120730', - 'view_count': int, + 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', + 'title': 'ProtivoGunz - Хуёвая песня', + 'duration': 195, + 'upload_date': '20120212', + 'timestamp': 1329049880, + 'uploader_id': '-77521', }, - 'skip': 'This video has been removed from public access.', }, { # VIDEO NOW REMOVED @@ -138,18 +149,19 @@ class VKIE(VKBaseIE): 'upload_date': '20121218', 'view_count': int, }, - 'skip': 'Requires vk account credentials', + 'skip': 'Removed', }, { 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', - 'md5': '4d7a5ef8cf114dfa09577e57b2993202', 'info_dict': { 'id': '-43215063_168067957', 'ext': 'mp4', - 'uploader': 'Киномания - лучшее из мира кино', + 'uploader': 'Bro Mazter', 'title': ' ', 'duration': 7291, 'upload_date': '20140328', + 'uploader_id': '223413403', + 'timestamp': 1396018030, }, 'skip': 'Requires vk account credentials', }, @@ -165,7 +177,7 @@ class VKIE(VKBaseIE): 'upload_date': '20140626', 'view_count': int, }, - 'skip': 'Only works from Russia', + 'skip': 'Removed', }, { # video (removed?) only available with list id @@ -247,6 +259,9 @@ class VKIE(VKBaseIE): 'uploader_id': '-387766', 'timestamp': 1475137527, }, + 'params': { + 'skip_download': True, + }, }, { # live stream, hls and rtmp links, most likely already finished live @@ -288,80 +303,94 @@ class VKIE(VKBaseIE): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') + mv_data = {} if video_id: - info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id + data = { + 'act': 'show_inline', + 'video': video_id, + } # Some videos (removed?) can only be downloaded with list id specified list_id = mobj.group('list_id') if list_id: - info_url += '&list=%s' % list_id + data['list'] = list_id + + payload = self._download_payload('al_video', video_id, data) + info_page = payload[1] + opts = payload[-1] + mv_data = opts.get('mvData') or {} + player = opts.get('player') or {} else: - info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query') video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) - info_page = self._download_webpage(info_url, video_id) + info_page = self._download_webpage( + 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id) - error_message = self._html_search_regex( - [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', - r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'], - info_page, 'error message', default=None) - if error_message: - raise ExtractorError(error_message, expected=True) + error_message = self._html_search_regex( + [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', + r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'], + info_page, 'error message', default=None) + if error_message: + raise ExtractorError(error_message, expected=True) - if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): - raise ExtractorError( - 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', - expected=True) + if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): + raise ExtractorError( + 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', + expected=True) - ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.' + ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.' - ERRORS = { - r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': - ERROR_COPYRIGHT, + ERRORS = { + r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': + ERROR_COPYRIGHT, - r'>The video .*? was removed from public access by request of the copyright holder.<': - ERROR_COPYRIGHT, + r'>The video .*? was removed from public access by request of the copyright holder.<': + ERROR_COPYRIGHT, - r'<!>Please log in or <': - 'Video %s is only available for registered users, ' - 'use --username and --password options to provide account credentials.', + r'<!>Please log in or <': + 'Video %s is only available for registered users, ' + 'use --username and --password options to provide account credentials.', - r'<!>Unknown error': - 'Video %s does not exist.', + r'<!>Unknown error': + 'Video %s does not exist.', - r'<!>Видео временно недоступно': - 'Video %s is temporarily unavailable.', + r'<!>Видео временно недоступно': + 'Video %s is temporarily unavailable.', - r'<!>Access denied': - 'Access denied to video %s.', + r'<!>Access denied': + 'Access denied to video %s.', - r'<!>Видеозапись недоступна, так как её автор был заблокирован.': - 'Video %s is no longer available, because its author has been blocked.', + r'<!>Видеозапись недоступна, так как её автор был заблокирован.': + 'Video %s is no longer available, because its author has been blocked.', - r'<!>This video is no longer available, because its author has been blocked.': - 'Video %s is no longer available, because its author has been blocked.', + r'<!>This video is no longer available, because its author has been blocked.': + 'Video %s is no longer available, because its author has been blocked.', - r'<!>This video is no longer available, because it has been deleted.': - 'Video %s is no longer available, because it has been deleted.', + r'<!>This video is no longer available, because it has been deleted.': + 'Video %s is no longer available, because it has been deleted.', - r'<!>The video .+? is not available in your region.': - 'Video %s is not available in your region.', - } + r'<!>The video .+? is not available in your region.': + 'Video %s is not available in your region.', + } - for error_re, error_msg in ERRORS.items(): - if re.search(error_re, info_page): - raise ExtractorError(error_msg % video_id, expected=True) + for error_re, error_msg in ERRORS.items(): + if re.search(error_re, info_page): + raise ExtractorError(error_msg % video_id, expected=True) + + player = self._parse_json(self._search_regex( + r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', + info_page, 'player params'), video_id) youtube_url = YoutubeIE._extract_url(info_page) if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + return self.url_result(youtube_url, YoutubeIE.ie_key()) vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: - return self.url_result(vimeo_url) + return self.url_result(vimeo_url, VimeoIE.ie_key()) pladform_url = PladformIE._extract_url(info_page) if pladform_url: - return self.url_result(pladform_url) + return self.url_result(pladform_url, PladformIE.ie_key()) m_rutube = re.search( r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page) @@ -374,6 +403,10 @@ class VKIE(VKBaseIE): if dailymotion_urls: return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key()) + odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page) + if odnoklassniki_url: + return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) @@ -383,38 +416,7 @@ class VKIE(VKBaseIE): opts_url = 'http:' + opts_url return self.url_result(opts_url) - # vars does not look to be served anymore since 24.10.2016 - data = self._parse_json( - self._search_regex( - r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'), - video_id, fatal=False) - - # <!json> is served instead - if not data: - data = self._parse_json( - self._search_regex( - [r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'], - info_page, 'json', default='{}'), - video_id) - if data: - data = data['player']['params'][0] - - if not data: - data = self._parse_json( - self._search_regex( - r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page, - 'player params', default='{}'), - video_id) - if data: - data = data['params'][0] - - # <!--{...} - if not data: - data = self._parse_json( - self._search_regex( - r'<!--\s*({.+})', info_page, 'payload'), - video_id)['payload'][-1][-1]['player']['params'][0] - + data = player['params'][0] title = unescapeHTML(data['md_title']) # 2 = live @@ -463,12 +465,12 @@ class VKIE(VKBaseIE): 'title': title, 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), - 'uploader_id': str_or_none(data.get('author_id')), - 'duration': data.get('duration'), + 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')), + 'duration': int_or_none(data.get('duration') or mv_data.get('duration')), 'timestamp': timestamp, 'view_count': view_count, - 'like_count': int_or_none(data.get('liked')), - 'dislike_count': int_or_none(data.get('nolikes')), + 'like_count': int_or_none(mv_data.get('likes')), + 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, } @@ -482,7 +484,6 @@ class VKUserVideosIE(VKBaseIE): 'url': 'http://vk.com/videos205387401', 'info_dict': { 'id': '205387401', - 'title': "Tom Cruise's Videos", }, 'playlist_mincount': 4, }, { @@ -498,22 +499,25 @@ class VKUserVideosIE(VKBaseIE): 'url': 'http://new.vk.com/videos205387401', 'only_matching': True, }] + _VIDEO = collections.namedtuple( + 'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta']) def _real_extract(self, url): page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + l = self._download_payload('al_video', page_id, { + 'act': 'load_videos_silent', + 'oid': page_id, + })[0]['']['list'] - entries = [ - self.url_result( - 'http://vk.com/video' + video_id, 'VK', video_id=video_id) - for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))] + entries = [] + for video in l: + v = self._VIDEO._make(video) + video_id = '%d_%d' % (v.owner_id, v.id) + entries.append(self.url_result( + 'http://vk.com/video' + video_id, 'VK', video_id=video_id)) - title = unescapeHTML(self._search_regex( - r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos', - webpage, 'title', default=page_id)) - - return self.playlist_result(entries, page_id, title) + return self.playlist_result(entries, page_id) class VKWallPostIE(VKBaseIE): @@ -523,15 +527,15 @@ class VKWallPostIE(VKBaseIE): # public page URL, audio playlist 'url': 'https://vk.com/bs.official?w=wall-23538238_35', 'info_dict': { - 'id': '23538238_35', - 'title': 'Black Shadow - Wall post 23538238_35', + 'id': '-23538238_35', + 'title': 'Black Shadow - Wall post -23538238_35', 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', }, 'playlist': [{ 'md5': '5ba93864ec5b85f7ce19a9af4af080f6', 'info_dict': { 'id': '135220665_111806521', - 'ext': 'mp3', + 'ext': 'mp4', 'title': 'Black Shadow - Слепое Верование', 'duration': 370, 'uploader': 'Black Shadow', @@ -542,18 +546,16 @@ class VKWallPostIE(VKBaseIE): 'md5': '4cc7e804579122b17ea95af7834c9233', 'info_dict': { 'id': '135220665_111802303', - 'ext': 'mp3', + 'ext': 'mp4', 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', 'duration': 423, 'uploader': 'Black Shadow', 'artist': 'Black Shadow', 'track': 'Война - Негасимое Бездны Пламя!', }, - 'params': { - 'skip_download': True, - }, }], 'params': { + 'skip_download': True, 'usenetrc': True, }, 'skip': 'Requires vk account credentials', @@ -562,7 +564,7 @@ class VKWallPostIE(VKBaseIE): 'url': 'https://vk.com/wall85155021_6319', 'info_dict': { 'id': '85155021_6319', - 'title': 'Sergey Gorbunov - Wall post 85155021_6319', + 'title': 'Сергей Горбунов - Wall post 85155021_6319', }, 'playlist_count': 1, 'params': { @@ -578,58 +580,73 @@ class VKWallPostIE(VKBaseIE): 'url': 'https://m.vk.com/wall-23538238_35', 'only_matching': True, }] + _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' + _AUDIO = collections.namedtuple( + 'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key']) + + def _decode(self, enc): + dec = '' + e = n = 0 + for c in enc: + r = self._BASE64_CHARS.index(c) + cond = n % 4 + e = 64 * e + r if cond else r + n += 1 + if cond: + dec += chr(255 & e >> (-2 * n & 6)) + return dec + + def _unmask_url(self, mask_url, vk_id): + if 'audio_api_unavailable' in mask_url: + extra = mask_url.split('?extra=')[1].split('#') + func, base = self._decode(extra[1]).split(chr(11)) + assert (func == 'i') + mask_url = list(self._decode(extra[0])) + url_len = len(mask_url) + indexes = [None] * url_len + index = int(base) ^ vk_id + for n in range(url_len - 1, -1, -1): + index = (url_len * (n + 1) ^ index + n) % url_len + indexes[n] = index + for n in range(1, url_len): + c = mask_url[n] + index = indexes[url_len - 1 - n] + mask_url[n] = mask_url[index] + mask_url[index] = c + mask_url = ''.join(mask_url) + return mask_url def _real_extract(self, url): post_id = self._match_id(url) - wall_url = 'https://vk.com/wall%s' % post_id - - post_id = remove_start(post_id, '-') - - webpage = self._download_webpage(wall_url, post_id) - - error = self._html_search_regex( - r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)', - webpage, 'error', default=None) - if error: - raise ExtractorError('VK said: %s' % error, expected=True) + webpage = self._download_payload('wkview', post_id, { + 'act': 'show', + 'w': 'wall' + post_id, + })[1] description = clean_html(get_element_by_class('wall_post_text', webpage)) uploader = clean_html(get_element_by_class('author', webpage)) - thumbnail = self._og_search_thumbnail(webpage) entries = [] - audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) - if audio_ids: - al_audio = self._download_webpage( - 'https://vk.com/al_audio.php', post_id, - note='Downloading audio info', fatal=False, - data=urlencode_postdata({ - 'act': 'reload_audio', - 'al': '1', - 'ids': ','.join(audio_ids) - })) - if al_audio: - Audio = collections.namedtuple( - 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) - audios = self._parse_json( - self._search_regex( - r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'), - post_id, fatal=False, transform_source=unescapeHTML) - if isinstance(audios, list): - for audio in audios: - a = Audio._make(audio[:6]) - entries.append({ - 'id': '%s_%s' % (a.user_id, a.id), - 'url': a.url, - 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id, - 'thumbnail': thumbnail, - 'duration': a.duration, - 'uploader': uploader, - 'artist': a.artist, - 'track': a.track, - }) + for audio in re.findall(r'data-audio="([^"]+)', webpage): + audio = self._parse_json(unescapeHTML(audio), post_id) + a = self._AUDIO._make(audio) + if not a.url: + continue + title = unescapeHTML(a.title) + entries.append({ + 'id': '%s_%s' % (a.owner_id, a.id), + 'url': self._unmask_url(a.url, a.ads['vk_id']), + 'title': '%s - %s' % (a.performer, title) if a.performer else title, + 'thumbnail': a.cover_url.split(',') if a.cover_url else None, + 'duration': a.duration, + 'uploader': uploader, + 'artist': a.performer, + 'track': title, + 'ext': 'mp4', + 'protocol': 'm3u8', + }) for video in re.finditer( r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): From 42cd0824b3975e6ce500d8cecd60e1fc077a758b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Oct 2019 00:06:05 +0100 Subject: [PATCH 605/785] [vk] remove assert statement --- youtube_dl/extractor/vk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index c289fcad3..4c8ca4f41 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -600,7 +600,6 @@ class VKWallPostIE(VKBaseIE): if 'audio_api_unavailable' in mask_url: extra = mask_url.split('?extra=')[1].split('#') func, base = self._decode(extra[1]).split(chr(11)) - assert (func == 'i') mask_url = list(self._decode(extra[0])) url_len = len(mask_url) indexes = [None] * url_len From 235dbb434bfa724718c37d8af0a61baf93b775be Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Oct 2019 14:57:42 +0100 Subject: [PATCH 606/785] [discoverynetworks] add support for dplay.co.uk --- youtube_dl/extractor/discoverynetworks.py | 63 +++++++---------------- 1 file changed, 19 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py index fba1ef221..607a54948 100644 --- a/youtube_dl/extractor/discoverynetworks.py +++ b/youtube_dl/extractor/discoverynetworks.py @@ -3,63 +3,38 @@ from __future__ import unicode_literals import re -from .brightcove import BrightcoveLegacyIE from .dplay import DPlayIE -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) -from ..utils import smuggle_url class DiscoveryNetworksDeIE(DPlayIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/ - (?: - .*\#(?P<id>\d+)| - (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)| - programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+) - )''' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' _TESTS = [{ - 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', + 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'info_dict': { - 'id': '3235167922001', + 'id': '78867', 'ext': 'mp4', - 'title': 'Breaking Amish: Die Welt da draußen', - 'description': ( - 'Vier Amische und eine Mennonitin wagen in New York' - ' den Sprung in ein komplett anderes Leben. Begleitet sie auf' - ' ihrem spannenden Weg.'), - 'timestamp': 1396598084, - 'upload_date': '20140404', - 'uploader_id': '1659832546', + 'title': 'Die Welt da draußen', + 'description': 'md5:61033c12b73286e409d99a41742ef608', + 'timestamp': 1554069600, + 'upload_date': '20190331', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, }, }, { - 'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/', + 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', 'only_matching': True, }, { - 'url': 'http://www.discovery.de/#5332316765001', + 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', 'only_matching': True, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - alternate_id = mobj.group('alternate_id') - if alternate_id: - self._initialize_geo_bypass({ - 'countries': ['DE'], - }) - return self._get_disco_api_info( - url, '%s/%s' % (mobj.group('programme'), alternate_id), - 'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de') - brightcove_id = mobj.group('id') - if not brightcove_id: - title = mobj.group('title') - webpage = self._download_webpage(url, title) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(compat_urlparse.urlparse( - brightcove_legacy_url).query)['@videoPlayer'][0] - return self.url_result(smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}), - 'BrightcoveNew', brightcove_id) + domain, programme, alternate_id = re.match(self._VALID_URL, url).groups() + country = 'GB' if domain == 'dplay.co.uk' else 'DE' + realm = 'questuk' if country == 'GB' else domain.replace('.', '') + return self._get_disco_api_info( + url, '%s/%s' % (programme, alternate_id), + 'sonic-eu1-prod.disco-api.com', realm, country) From 0b98f3a7517601b7d2aabc789997016b9c3c24f2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Oct 2019 14:58:29 +0100 Subject: [PATCH 607/785] [dplay] improve extraction - add support for dplay.fi, dplay.jp and es.dplay.com(closes #16969) - fix it.dplay.com extraction(closes #22826) - update tests - extract creator, tags and thumbnails - handle playback API call errors --- youtube_dl/extractor/dplay.py | 397 ++++++++++------------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 133 insertions(+), 269 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index ebf59512c..d9c3d59cd 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -1,74 +1,68 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re -import time from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urlparse, -) +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, float_or_none, int_or_none, - remove_end, - try_get, - unified_strdate, unified_timestamp, - update_url_query, - urljoin, - USER_AGENTS, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' + _VALID_URL = r'''(?x)https?:// + (?P<domain> + (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))| + (?P<subdomain_country>es|it)\.dplay\.com + )/[^/]+/(?P<id>[^/]+/[^/?#]+)''' _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL - 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', + 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', 'info_dict': { - 'id': '3172', - 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', + 'id': '13628', + 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', 'ext': 'mp4', 'title': 'Svensken lär sig njuta av livet', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', - 'duration': 2650, - 'timestamp': 1365454320, + 'duration': 2649.856, + 'timestamp': 1365453720, 'upload_date': '20130408', - 'creator': 'Kanal 5 (Home)', + 'creator': 'Kanal 5', 'series': 'Nugammalt - 77 händelser som format Sverige', 'season_number': 1, 'episode_number': 1, - 'age_limit': 0, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, }, }, { # geo restricted, via secure api, unsigned download hls URL - 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', + 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', 'info_dict': { - 'id': '70816', - 'display_id': 'mig-og-min-mor/season-6-episode-12', + 'id': '104465', + 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', 'ext': 'mp4', - 'title': 'Episode 12', - 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', - 'duration': 2563, - 'timestamp': 1429696800, - 'upload_date': '20150422', - 'creator': 'Kanal 4 (Home)', - 'series': 'Mig og min mor', - 'season_number': 6, - 'episode_number': 12, - 'age_limit': 0, + 'title': 'Ted Bundy: Mind Of A Monster', + 'description': 'md5:8b780f6f18de4dae631668b8a9637995', + 'duration': 5290.027, + 'timestamp': 1570694400, + 'upload_date': '20191010', + 'creator': 'ID - Investigation Discovery', + 'series': 'Ted Bundy: Mind Of A Monster', + 'season_number': 1, + 'episode_number': 1, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, }, - }, { - # geo restricted, via direct unsigned hls URL - 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', - 'only_matching': True, }, { # disco-api 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', @@ -89,19 +83,59 @@ class DPlayIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, + 'skip': 'Available for Premium users', }, { - - 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', + 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', + 'md5': '2b808ffb00fc47b884a172ca5d13053c', + 'info_dict': { + 'id': '6918', + 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij', + 'ext': 'mp4', + 'title': 'Luigi Di Maio: la psicosi di Stanislawskij', + 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'upload_date': '20160524', + 'timestamp': 1464076800, + 'series': 'Biografie imbarazzanti', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + }, + }, { + 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/', + 'info_dict': { + 'id': '21652', + 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1', + 'ext': 'mp4', + 'title': 'Episodio 1', + 'description': 'md5:b9dcff2071086e003737485210675f69', + 'thumbnail': r're:^https?://.*\.png', + 'upload_date': '20180709', + 'timestamp': 1531173540, + 'series': 'La fiebre del oro', + 'season_number': 8, + 'episode': 'Episode 1', + 'episode_number': 1, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16', 'only_matching': True, }, { - 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', + 'url': 'https://www.dplay.jp/video/gold-rush/24086', 'only_matching': True, }] - def _get_disco_api_info(self, url, display_id, disco_host, realm): - disco_base = 'https://' + disco_host + def _get_disco_api_info(self, url, display_id, disco_host, realm, country): + geo_countries = [country.upper()] + self._initialize_geo_bypass({ + 'countries': geo_countries, + }) + disco_base = 'https://%s/' % disco_host token = self._download_json( - '%s/token' % disco_base, display_id, 'Downloading token', + disco_base + 'token', display_id, 'Downloading token', query={ 'realm': realm, })['data']['attributes']['token'] @@ -110,17 +144,30 @@ class DPlayIE(InfoExtractor): 'Authorization': 'Bearer ' + token, } video = self._download_json( - '%s/content/videos/%s' % (disco_base, display_id), display_id, + disco_base + 'content/videos/' + display_id, display_id, headers=headers, query={ - 'include': 'show' + 'include': 'images,primaryChannel,show,tags' }) video_id = video['data']['id'] info = video['data']['attributes'] - title = info['name'] + title = info['name'].strip() formats = [] - for format_id, format_dict in self._download_json( - '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), - display_id, headers=headers)['data']['attributes']['streaming'].items(): + try: + streaming = self._download_json( + disco_base + 'playback/videoPlaybackInfo/' + video_id, + display_id, headers=headers)['data']['attributes']['streaming'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + info = self._parse_json(e.cause.read().decode('utf-8'), display_id) + error = info['errors'][0] + error_code = error.get('code') + if error_code == 'access.denied.geoblocked': + self.raise_geo_restricted(countries=geo_countries) + elif error_code == 'access.denied.missingpackage': + self.raise_login_required() + raise ExtractorError(info['errors'][0]['detail'], expected=True) + raise + for format_id, format_dict in streaming.items(): if not isinstance(format_dict, dict): continue format_url = format_dict.get('url') @@ -142,235 +189,55 @@ class DPlayIE(InfoExtractor): }) self._sort_formats(formats) - series = None - try: - included = video.get('included') - if isinstance(included, list): - show = next(e for e in included if e.get('type') == 'show') - series = try_get( - show, lambda x: x['attributes']['name'], compat_str) - except StopIteration: - pass + creator = series = None + tags = [] + thumbnails = [] + included = video.get('included') or [] + if isinstance(included, list): + for e in included: + attributes = e.get('attributes') + if not attributes: + continue + e_type = e.get('type') + if e_type == 'channel': + creator = attributes.get('name') + elif e_type == 'image': + src = attributes.get('src') + if src: + thumbnails.append({ + 'url': src, + 'width': int_or_none(attributes.get('width')), + 'height': int_or_none(attributes.get('height')), + }) + if e_type == 'show': + series = attributes.get('name') + elif e_type == 'tag': + name = attributes.get('name') + if name: + tags.append(name) return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': info.get('description'), - 'duration': float_or_none( - info.get('videoDuration'), scale=1000), + 'duration': float_or_none(info.get('videoDuration'), 1000), 'timestamp': unified_timestamp(info.get('publishStart')), 'series': series, 'season_number': int_or_none(info.get('seasonNumber')), 'episode_number': int_or_none(info.get('episodeNumber')), 'age_limit': int_or_none(info.get('minimum_age')), + 'creator': creator, + 'tags': tags, + 'thumbnails': thumbnails, 'formats': formats, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('id') - domain = mobj.group('domain') - - self._initialize_geo_bypass({ - 'countries': [mobj.group('country').upper()], - }) - - webpage = self._download_webpage(url, display_id) - - video_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) - - if not video_id: - host = mobj.group('host') - return self._get_disco_api_info( - url, display_id, 'disco-api.' + host, host.replace('.', '')) - - info = self._download_json( - 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), - video_id)['data'][0] - - title = info['title'] - - PROTOCOLS = ('hls', 'hds') - formats = [] - - def extract_formats(protocol, manifest_url): - if protocol == 'hls': - m3u8_formats = self._extract_m3u8_formats( - manifest_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False) - # Sometimes final URLs inside m3u8 are unsigned, let's fix this - # ourselves. Also fragments' URLs are only served signed for - # Safari user agent. - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query) - for m3u8_format in m3u8_formats: - m3u8_format.update({ - 'url': update_url_query(m3u8_format['url'], query), - 'http_headers': { - 'User-Agent': USER_AGENTS['Safari'], - }, - }) - formats.extend(m3u8_formats) - elif protocol == 'hds': - formats.extend(self._extract_f4m_formats( - manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', - video_id, f4m_id=protocol, fatal=False)) - - domain_tld = domain.split('.')[-1] - if domain_tld in ('se', 'dk', 'no'): - for protocol in PROTOCOLS: - # Providing dsc-geo allows to bypass geo restriction in some cases - self._set_cookie( - 'secure.dplay.%s' % domain_tld, 'dsc-geo', - json.dumps({ - 'countryCode': domain_tld.upper(), - 'expiry': (time.time() + 20 * 60) * 1000, - })) - stream = self._download_json( - 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s' - % (domain_tld, video_id, protocol), video_id, - 'Downloading %s stream JSON' % protocol, fatal=False) - if stream and stream.get(protocol): - extract_formats(protocol, stream[protocol]) - - # The last resort is to try direct unsigned hls/hds URLs from info dictionary. - # Sometimes this does work even when secure API with dsc-geo has failed (e.g. - # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). - if not formats: - for protocol in PROTOCOLS: - if info.get(protocol): - extract_formats(protocol, info[protocol]) - - self._sort_formats(formats) - - subtitles = {} - for lang in ('se', 'sv', 'da', 'nl', 'no'): - for format_id in ('web_vtt', 'vtt', 'srt'): - subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id)) - if subtitle_url: - subtitles.setdefault(lang, []).append({'url': subtitle_url}) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': info.get('video_metadata_longDescription'), - 'duration': int_or_none(info.get('video_metadata_length'), scale=1000), - 'timestamp': int_or_none(info.get('video_publish_date')), - 'creator': info.get('video_metadata_homeChannel'), - 'series': info.get('video_metadata_show'), - 'season_number': int_or_none(info.get('season')), - 'episode_number': int_or_none(info.get('episode')), - 'age_limit': int_or_none(info.get('minimum_age')), - 'formats': formats, - 'subtitles': subtitles, - } - - -class DPlayItIE(InfoExtractor): - _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' - _GEO_COUNTRIES = ['IT'] - _TEST = { - 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', - 'md5': '2b808ffb00fc47b884a172ca5d13053c', - 'info_dict': { - 'id': '6918', - 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', - 'ext': 'mp4', - 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', - 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', - 'thumbnail': r're:^https?://.*\.jpe?g', - 'upload_date': '20160524', - 'series': 'Biografie imbarazzanti', - 'season_number': 1, - 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', - 'episode_number': 1, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - title = remove_end(self._og_search_title(webpage), ' | Dplay') - - video_id = None - - info = self._search_regex( - r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', - webpage, 'playback JSON', default=None) - if info: - for _ in range(2): - info = self._parse_json(info, display_id, fatal=False) - if not info: - break - else: - video_id = try_get(info, lambda x: x['data']['id']) - - if not info: - info_url = self._search_regex( - (r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', - r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'), - webpage, 'info url', group='url') - - info_url = urljoin(url, info_url) - video_id = info_url.rpartition('/')[-1] - - try: - info = self._download_json( - info_url, display_id, headers={ - 'Authorization': 'Bearer %s' % self._get_cookies(url).get( - 'dplayit_token').value, - 'Referer': url, - }) - if isinstance(info, compat_str): - info = self._parse_json(info, display_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): - info = self._parse_json(e.cause.read().decode('utf-8'), display_id) - error = info['errors'][0] - if error.get('code') == 'access.denied.geoblocked': - self.raise_geo_restricted( - msg=error.get('detail'), countries=self._GEO_COUNTRIES) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - raise - - hls_url = info['data']['attributes']['streaming']['hls']['url'] - - formats = self._extract_m3u8_formats( - hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - self._sort_formats(formats) - - series = self._html_search_regex( - r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>', - webpage, 'series', fatal=False) - episode = self._search_regex( - r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)', - webpage, 'episode', fatal=False) - - mobj = re.search( - r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})', - webpage) - if mobj: - season_number = int(mobj.group('season_number')) - episode_number = int(mobj.group('episode_number')) - upload_date = unified_strdate(mobj.group('upload_date')) - else: - season_number = episode_number = upload_date = None - - return { - 'id': compat_str(video_id or display_id), - 'display_id': display_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'upload_date': upload_date, - 'formats': formats, - } + domain = mobj.group('domain').lstrip('www.') + country = mobj.group('country') or mobj.group('subdomain_country') + host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com' + return self._get_disco_api_info( + url, display_id, host, 'dplay' + country, country) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1db21529f..a8fe0de1a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -277,10 +277,7 @@ from .douyutv import ( DouyuShowIE, DouyuTVIE, ) -from .dplay import ( - DPlayIE, - DPlayItIE, -) +from .dplay import DPlayIE from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE From 548c395716b1d5aa215e526fcb052a03926c1573 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Oct 2019 17:52:46 +0100 Subject: [PATCH 608/785] [soundcloud] improve extraction - improve format extraction(closes #22123) - extract uploader_id and uploader_url(closes #21916) - extract all known thumbnails(closes #19071)(closes #20659) - fix extration for private playlists(closes #20976) - add support for playlist embeds(#20976) - skip preview formats(closes #22806) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 6 +- youtube_dl/extractor/soundcloud.py | 497 ++++++++++++++--------------- 3 files changed, 248 insertions(+), 256 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a8fe0de1a..388c1ebe6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1033,6 +1033,7 @@ from .snotr import SnotrIE from .sohu import SohuIE from .sonyliv import SonyLIVIE from .soundcloud import ( + SoundcloudEmbedIE, SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f66cae0eb..1c0780e98 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -80,7 +80,7 @@ from .theplatform import ThePlatformIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE -from .soundcloud import SoundcloudIE +from .soundcloud import SoundcloudEmbedIE from .tunein import TuneInBaseIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE @@ -2749,9 +2749,9 @@ class GenericIE(InfoExtractor): return self.url_result(myvi_url) # Look for embedded soundcloud player - soundcloud_urls = SoundcloudIE._extract_urls(webpage) + soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage) if soundcloud_urls: - return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) + return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML) # Look for tunein player tunein_urls = TuneInBaseIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 05538f3d6..875b9d887 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -11,14 +11,13 @@ from .common import ( from ..compat import ( compat_str, compat_urlparse, - compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, float_or_none, + HEADRequest, int_or_none, KNOWN_EXTENSIONS, - merge_dicts, mimetype2ext, str_or_none, try_get, @@ -28,6 +27,20 @@ from ..utils import ( ) +class SoundcloudEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)' + + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', + webpage)] + + def _real_extract(self, url): + return self.url_result(compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query)['url'][0]) + + class SoundcloudIE(InfoExtractor): """Information extractor for soundcloud.com To access the media, the uid of the song and a stream token @@ -44,9 +57,8 @@ class SoundcloudIE(InfoExtractor): (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) - |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) + |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+) (?:/?\?secret_token=(?P<secret_token>[^&]+))?) - |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' IE_NAME = 'soundcloud' @@ -60,6 +72,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music', + 'uploader_id': '1571244', 'timestamp': 1349920598, 'upload_date': '20121011', 'duration': 143.216, @@ -79,6 +92,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Goldrushed', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', + 'uploader_id': '9615865', 'timestamp': 1337635207, 'upload_date': '20120521', 'duration': 30, @@ -92,6 +106,7 @@ class SoundcloudIE(InfoExtractor): # rtmp 'skip_download': True, }, + 'skip': 'Preview', }, # private link { @@ -103,6 +118,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Youtube - Dl Test Video \'\' Ä↭', 'description': 'test chars: \"\'/\\ä↭', 'uploader': 'jaimeMF', + 'uploader_id': '69767071', 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9.927, @@ -123,6 +139,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Youtube - Dl Test Video \'\' Ä↭', 'description': 'test chars: \"\'/\\ä↭', 'uploader': 'jaimeMF', + 'uploader_id': '69767071', 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9.927, @@ -143,6 +160,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Bus Brakes', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'uploader': 'oddsamples', + 'uploader_id': '73680509', 'timestamp': 1389232924, 'upload_date': '20140109', 'duration': 17.346, @@ -163,6 +181,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'uploader': 'Ori Uplift Music', + 'uploader_id': '12563093', 'timestamp': 1504206263, 'upload_date': '20170831', 'duration': 7449.096, @@ -183,6 +202,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'uploader': 'garyvee', + 'uploader_id': '2366352', 'timestamp': 1488152409, 'upload_date': '20170226', 'duration': 207.012, @@ -207,6 +227,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Mezzo Valzer', 'description': 'md5:4138d582f81866a530317bae316e8b61', 'uploader': 'Giovanni Sarani', + 'uploader_id': '3352531', 'timestamp': 1551394171, 'upload_date': '20190228', 'duration': 180.157, @@ -221,114 +242,81 @@ class SoundcloudIE(InfoExtractor): } ] + _API_BASE = 'https://api.soundcloud.com/' + _API_V2_BASE = 'https://api-v2.soundcloud.com/' + _BASE_URL = 'https://soundcloud.com/' _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' + _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' - @staticmethod - def _extract_urls(webpage): - return [m.group('url') for m in re.finditer( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', - webpage)] + _ARTWORK_MAP = { + 'mini': 16, + 'tiny': 20, + 'small': 32, + 'badge': 47, + 't67x67': 67, + 'large': 100, + 't300x300': 300, + 'crop': 400, + 't500x500': 500, + 'original': 0, + } @classmethod def _resolv_url(cls, url): - return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID + return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID - def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): + def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): track_id = compat_str(info['id']) title = info['title'] - name = full_title or track_id - if quiet: - self.report_extraction(name) - thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') - if isinstance(thumbnail, compat_str): - thumbnail = thumbnail.replace('-large', '-t500x500') - username = try_get(info, lambda x: x['user']['username'], compat_str) - - def extract_count(key): - return int_or_none(info.get('%s_count' % key)) - - like_count = extract_count('favoritings') - if like_count is None: - like_count = extract_count('likes') - - result = { - 'id': track_id, - 'uploader': username, - 'timestamp': unified_timestamp(info.get('created_at')), - 'title': title, - 'description': info.get('description'), - 'thumbnail': thumbnail, - 'duration': float_or_none(info.get('duration'), 1000), - 'webpage_url': info.get('permalink_url'), - 'license': info.get('license'), - 'view_count': extract_count('playback'), - 'like_count': like_count, - 'comment_count': extract_count('comment'), - 'repost_count': extract_count('reposts'), - 'genre': info.get('genre'), - } + track_base_url = self._API_BASE + 'tracks/%s' % track_id format_urls = set() formats = [] query = {'client_id': self._CLIENT_ID} - if secret_token is not None: + if secret_token: query['secret_token'] = secret_token - if info.get('downloadable', False): - # We can build a direct link to the song + + if info.get('downloadable'): format_url = update_url_query( - 'https://api.soundcloud.com/tracks/%s/download' % track_id, query) + info.get('download_url') or track_base_url + '/download', query) format_urls.add(format_url) + if version == 2: + v1_info = self._download_json( + track_base_url, track_id, query=query, fatal=False) or {} + else: + v1_info = info formats.append({ 'format_id': 'download', - 'ext': info.get('original_format', 'mp3'), + 'ext': v1_info.get('original_format') or 'mp3', + 'filesize': int_or_none(v1_info.get('original_content_size')), 'url': format_url, - 'vcodec': 'none', 'preference': 10, }) - # Old API, does not work for some tracks (e.g. - # https://soundcloud.com/giovannisarani/mezzo-valzer) - format_dict = self._download_json( - 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, - track_id, 'Downloading track url', query=query, fatal=False) + def invalid_url(url): + return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url) - if format_dict: - for key, stream_url in format_dict.items(): - if stream_url in format_urls: - continue - format_urls.add(stream_url) - ext, abr = 'mp3', None - mobj = re.search(r'_([^_]+)_(\d+)_url', key) - if mobj: - ext, abr = mobj.groups() - abr = int(abr) - if key.startswith('http'): - stream_formats = [{ - 'format_id': key, - 'ext': ext, - 'url': stream_url, - }] - elif key.startswith('rtmp'): - # The url doesn't have an rtmp app, we have to extract the playpath - url, path = stream_url.split('mp3:', 1) - stream_formats = [{ - 'format_id': key, - 'url': url, - 'play_path': 'mp3:' + path, - 'ext': 'flv', - }] - elif key.startswith('hls'): - stream_formats = self._extract_m3u8_formats( - stream_url, track_id, ext, entry_protocol='m3u8_native', - m3u8_id=key, fatal=False) - else: - continue - - if abr: - for f in stream_formats: - f['abr'] = abr - - formats.extend(stream_formats) + def add_format(f, protocol): + mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url) + if mobj: + for k, v in mobj.groupdict().items(): + if not f.get(k): + f[k] = v + format_id_list = [] + if protocol: + format_id_list.append(protocol) + for k in ('ext', 'abr'): + v = f.get(k) + if v: + format_id_list.append(v) + abr = f.get('abr') + if abr: + f['abr'] = int(abr) + f.update({ + 'format_id': '_'.join(format_id_list), + 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', + }) + formats.append(f) # New API transcodings = try_get( @@ -337,129 +325,165 @@ class SoundcloudIE(InfoExtractor): if not isinstance(t, dict): continue format_url = url_or_none(t.get('url')) - if not format_url: + if not format_url or t.get('snipped') or '/preview/' in format_url: continue stream = self._download_json( - update_url_query(format_url, query), track_id, fatal=False) + format_url, track_id, query=query, fatal=False) if not isinstance(stream, dict): continue stream_url = url_or_none(stream.get('url')) - if not stream_url: - continue - if stream_url in format_urls: + if invalid_url(stream_url): continue format_urls.add(stream_url) - protocol = try_get(t, lambda x: x['format']['protocol'], compat_str) + stream_format = t.get('format') or {} + protocol = stream_format.get('protocol') if protocol != 'hls' and '/hls' in format_url: protocol = 'hls' ext = None preset = str_or_none(t.get('preset')) if preset: ext = preset.split('_')[0] - if ext not in KNOWN_EXTENSIONS: - mimetype = try_get( - t, lambda x: x['format']['mime_type'], compat_str) - ext = mimetype2ext(mimetype) or 'mp3' - format_id_list = [] - if protocol: - format_id_list.append(protocol) - format_id_list.append(ext) - format_id = '_'.join(format_id_list) - formats.append({ + if ext not in KNOWN_EXTENSIONS: + ext = mimetype2ext(stream_format.get('mime_type')) + add_format({ 'url': stream_url, - 'format_id': format_id, 'ext': ext, - 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', - }) + }, 'http' if protocol == 'progressive' else protocol) + + if not formats: + # Old API, does not work for some tracks (e.g. + # https://soundcloud.com/giovannisarani/mezzo-valzer) + # and might serve preview URLs (e.g. + # http://www.soundcloud.com/snbrn/ele) + format_dict = self._download_json( + track_base_url + '/streams', track_id, + 'Downloading track url', query=query, fatal=False) or {} + + for key, stream_url in format_dict.items(): + if invalid_url(stream_url): + continue + format_urls.add(stream_url) + mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key) + if mobj: + protocol, ext, abr = mobj.groups() + add_format({ + 'abr': abr, + 'ext': ext, + 'url': stream_url, + }, protocol) if not formats: # We fallback to the stream_url in the original info, this # cannot be always used, sometimes it can give an HTTP 404 error - formats.append({ - 'format_id': 'fallback', - 'url': update_url_query(info['stream_url'], query), - 'ext': 'mp3', - }) - self._check_formats(formats, track_id) + urlh = self._request_webpage( + HEADRequest(info.get('stream_url') or track_base_url + '/stream'), + track_id, query=query, fatal=False) + if urlh: + stream_url = urlh.geturl() + if not invalid_url(stream_url): + add_format({'url': stream_url}, 'http') for f in formats: f['vcodec'] = 'none' self._sort_formats(formats) - result['formats'] = formats - return result + user = info.get('user') or {} + + thumbnails = [] + artwork_url = info.get('artwork_url') + thumbnail = artwork_url or user.get('avatar_url') + if isinstance(thumbnail, compat_str): + if re.search(self._IMAGE_REPL_RE, thumbnail): + for image_id, size in self._ARTWORK_MAP.items(): + i = { + 'id': image_id, + 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail), + } + if image_id == 'tiny' and not artwork_url: + size = 18 + elif image_id == 'original': + i['preference'] = 10 + if size: + i.update({ + 'width': size, + 'height': size, + }) + thumbnails.append(i) + else: + thumbnails = [{'url': thumbnail}] + + def extract_count(key): + return int_or_none(info.get('%s_count' % key)) + + return { + 'id': track_id, + 'uploader': user.get('username'), + 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), + 'uploader_url': user.get('permalink_url'), + 'timestamp': unified_timestamp(info.get('created_at')), + 'title': title, + 'description': info.get('description'), + 'thumbnails': thumbnails, + 'duration': float_or_none(info.get('duration'), 1000), + 'webpage_url': info.get('permalink_url'), + 'license': info.get('license'), + 'view_count': extract_count('playback'), + 'like_count': extract_count('favoritings') or extract_count('likes'), + 'comment_count': extract_count('comment'), + 'repost_count': extract_count('reposts'), + 'genre': info.get('genre'), + 'formats': formats + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) - if mobj is None: - raise ExtractorError('Invalid URL: %s' % url) + mobj = re.match(self._VALID_URL, url) track_id = mobj.group('track_id') - new_info = {} - if track_id is not None: - info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID + query = { + 'client_id': self._CLIENT_ID, + } + if track_id: + info_json_url = self._API_V2_BASE + 'tracks/' + track_id full_title = track_id token = mobj.group('secret_token') if token: - info_json_url += '&secret_token=' + token - elif mobj.group('player'): - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - real_url = query['url'][0] - # If the token is in the query of the original url we have to - # manually add it - if 'secret_token' in query: - real_url += '?secret_token=' + query['secret_token'][0] - return self.url_result(real_url) + query['secret_token'] = token else: - # extract uploader (which is in the url) - uploader = mobj.group('uploader') - # extract simple title (uploader + slug of song title) - slug_title = mobj.group('title') + full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title') token = mobj.group('token') - full_title = resolve_title = '%s/%s' % (uploader, slug_title) if token: resolve_title += '/%s' % token + info_json_url = self._resolv_url(self._BASE_URL + resolve_title) - webpage = self._download_webpage(url, full_title, fatal=False) - if webpage: - entries = self._parse_json( - self._search_regex( - r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage, - 'data', default='[]'), full_title, fatal=False) - if entries: - for e in entries: - if not isinstance(e, dict): - continue - if e.get('id') != 67: - continue - data = try_get(e, lambda x: x['data'][0], dict) - if data: - new_info = data - break - info_json_url = self._resolv_url( - 'https://soundcloud.com/%s' % resolve_title) - - # Contains some additional info missing from new_info + version = 2 info = self._download_json( - info_json_url, full_title, 'Downloading info JSON') + info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False) + if not info: + info = self._download_json( + info_json_url.replace(self._API_V2_BASE, self._API_BASE), + full_title, 'Downloading info JSON', query=query) + version = 1 - return self._extract_info_dict( - merge_dicts(info, new_info), full_title, secret_token=token) + return self._extract_info_dict(info, full_title, token, version) class SoundcloudPlaylistBaseIE(SoundcloudIE): - @staticmethod - def _extract_id(e): - return compat_str(e['id']) if e.get('id') else None - - def _extract_track_entries(self, tracks): - return [ - self.url_result( - track['permalink_url'], SoundcloudIE.ie_key(), - video_id=self._extract_id(track)) - for track in tracks if track.get('permalink_url')] + def _extract_track_entries(self, tracks, token=None): + entries = [] + for track in tracks: + track_id = str_or_none(track.get('id')) + url = track.get('permalink_url') + if not url: + if not track_id: + continue + url = self._API_V2_BASE + 'tracks/' + track_id + if token: + url += '?secret_token=' + token + entries.append(self.url_result( + url, SoundcloudIE.ie_key(), track_id)) + return entries class SoundcloudSetIE(SoundcloudPlaylistBaseIE): @@ -480,41 +504,28 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - # extract uploader (which is in the url) - uploader = mobj.group('uploader') - # extract simple title (uploader + slug of song title) - slug_title = mobj.group('slug_title') - full_title = '%s/sets/%s' % (uploader, slug_title) - url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title) - + full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title') token = mobj.group('token') if token: full_title += '/' + token - url += '/' + token - resolv_url = self._resolv_url(url) - info = self._download_json(resolv_url, full_title) + info = self._download_json(self._resolv_url( + self._BASE_URL + full_title), full_title) if 'errors' in info: msgs = (compat_str(err['error_message']) for err in info['errors']) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) - entries = self._extract_track_entries(info['tracks']) + entries = self._extract_track_entries(info['tracks'], token) - return { - '_type': 'playlist', - 'entries': entries, - 'id': '%s' % info['id'], - 'title': info['title'], - } + return self.playlist_result( + entries, str_or_none(info.get('id')), info.get('title')) class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): - _API_V2_BASE = 'https://api-v2.soundcloud.com' - def _extract_playlist(self, base_url, playlist_id, playlist_title): COMMON_QUERY = { - 'limit': 50, + 'limit': 2000000000, 'client_id': self._CLIENT_ID, 'linked_partitioning': '1', } @@ -522,12 +533,13 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): query = COMMON_QUERY.copy() query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse_urlencode(query) + next_href = base_url entries = [] for i in itertools.count(): response = self._download_json( - next_href, playlist_id, 'Downloading track page %s' % (i + 1)) + next_href, playlist_id, + 'Downloading track page %s' % (i + 1), query=query) collection = response['collection'] @@ -546,9 +558,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): continue return self.url_result( permalink_url, - ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=self._extract_id(cand), - video_title=cand.get('title')) + SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + str_or_none(cand.get('id')), cand.get('title')) for e in collection: entry = resolve_entry((e, e.get('track'), e.get('playlist'))) @@ -559,11 +570,10 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): if not next_href: break - parsed_next_href = compat_urlparse.urlparse(response['next_href']) - qs = compat_urlparse.parse_qs(parsed_next_href.query) - qs.update(COMMON_QUERY) - next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + next_href = response['next_href'] + parsed_next_href = compat_urlparse.urlparse(next_href) + query = compat_urlparse.parse_qs(parsed_next_href.query) + query.update(COMMON_QUERY) return { '_type': 'playlist', @@ -609,7 +619,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): 'url': 'https://soundcloud.com/jcv246/sets', 'info_dict': { 'id': '12982173', - 'title': 'Jordi / cv (Playlists)', + 'title': 'Jordi / cv (Sets)', }, 'playlist_mincount': 2, }, { @@ -636,39 +646,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): }] _BASE_URL_MAP = { - 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - } - - _TITLE_MAP = { - 'all': 'All', - 'tracks': 'Tracks', - 'albums': 'Albums', - 'sets': 'Playlists', - 'reposts': 'Reposts', - 'likes': 'Likes', - 'spotlight': 'Spotlight', + 'all': 'stream/users/%s', + 'tracks': 'users/%s/tracks', + 'albums': 'users/%s/albums', + 'sets': 'users/%s/playlists', + 'reposts': 'stream/users/%s/reposts', + 'likes': 'users/%s/likes', + 'spotlight': 'users/%s/spotlight', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') - url = 'https://soundcloud.com/%s/' % uploader - resolv_url = self._resolv_url(url) user = self._download_json( - resolv_url, uploader, 'Downloading user info') + self._resolv_url(self._BASE_URL + uploader), + uploader, 'Downloading user info') resource = mobj.group('rsrc') or 'all' return self._extract_playlist( - self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), - '%s (%s)' % (user['username'], self._TITLE_MAP[resource])) + self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'], + str_or_none(user.get('id')), + '%s (%s)' % (user['username'], resource.capitalize())) class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): @@ -678,7 +678,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', 'info_dict': { 'id': '286017854', - 'title': 'Track station: your-text', + 'title': 'Track station: your text', }, 'playlist_mincount': 47, }] @@ -686,19 +686,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): track_name = self._match_id(url) - webpage = self._download_webpage(url, track_name) - + track = self._download_json(self._resolv_url(url), track_name) track_id = self._search_regex( - r'soundcloud:track-stations:(\d+)', webpage, 'track id') + r'soundcloud:track-stations:(\d+)', track['id'], 'track id') return self._extract_playlist( - '%s/stations/soundcloud:track-stations:%s/tracks' - % (self._API_V2_BASE, track_id), - track_id, 'Track station: %s' % track_name) + self._API_V2_BASE + 'stations/%s/tracks' % track['id'], + track_id, 'Track station: %s' % track['title']) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): - _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' + _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ 'url': 'https://api.soundcloud.com/playlists/4110309', @@ -713,29 +711,22 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) playlist_id = mobj.group('id') - base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) - data_dict = { + query = { 'client_id': self._CLIENT_ID, } token = mobj.group('token') - if token: - data_dict['secret_token'] = token + query['secret_token'] = token - data = compat_urllib_parse_urlencode(data_dict) data = self._download_json( - base_url + data, playlist_id, 'Downloading playlist') + self._API_V2_BASE + 'playlists/' + playlist_id, + playlist_id, 'Downloading playlist', query=query) - entries = self._extract_track_entries(data['tracks']) + entries = self._extract_track_entries(data['tracks'], token) - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': data.get('title'), - 'description': data.get('description'), - 'entries': entries, - } + return self.playlist_result( + entries, playlist_id, data.get('title'), data.get('description')) class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): @@ -753,18 +744,18 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): _SEARCH_KEY = 'scsearch' _MAX_RESULTS_PER_PAGE = 200 _DEFAULT_RESULTS_PER_PAGE = 50 - _API_V2_BASE = 'https://api-v2.soundcloud.com' def _get_collection(self, endpoint, collection_id, **query): limit = min( query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), self._MAX_RESULTS_PER_PAGE) - query['limit'] = limit - query['client_id'] = self._CLIENT_ID - query['linked_partitioning'] = '1' - query['offset'] = 0 - data = compat_urllib_parse_urlencode(query) - next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) + query.update({ + 'limit': limit, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': 1, + 'offset': 0, + }) + next_url = update_url_query(self._API_V2_BASE + endpoint, query) collected_results = 0 @@ -791,5 +782,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): break def _get_n_results(self, query, n): - tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + tracks = self._get_collection('search/tracks', query, limit=n, q=query) return self.playlist_result(tracks, playlist_title=query) From dd90451f0f4867480c5ed8cb3588b30312204e3f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Oct 2019 22:02:46 +0100 Subject: [PATCH 609/785] [tenplay] Add new extractor(closes #21446) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tenplay.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/tenplay.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 388c1ebe6..339a141a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1133,6 +1133,7 @@ from .telequebec import ( from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .tennistv import TennisTVIE +from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py new file mode 100644 index 000000000..dff44a4e2 --- /dev/null +++ b/youtube_dl/extractor/tenplay.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_age_limit, + parse_iso8601, + smuggle_url, +) + + +class TenPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/[^/]+/episodes/[^/]+/[^/]+/(?P<id>tpv\d{6}[a-z]{5})' + _TEST = { + 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga', + 'info_dict': { + 'id': '6060533435001', + 'ext': 'mp4', + 'title': 'MasterChef - S1 Ep. 1', + 'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c', + 'age_limit': 10, + 'timestamp': 1240828200, + 'upload_date': '20090427', + 'uploader_id': '2199827728001', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + } + } + BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s' + + def _real_extract(self, url): + content_id = self._match_id(url) + data = self._download_json( + 'https://10play.com.au/api/video/' + content_id, content_id) + video = data.get('video') or {} + metadata = data.get('metaData') or {} + brightcove_id = video.get('videoId') or metadata['showContentVideoId'] + brightcove_url = smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['AU']}) + + return { + '_type': 'url_transparent', + 'url': brightcove_url, + 'id': content_id, + 'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'), + 'description': video.get('description'), + 'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')), + 'series': metadata.get('showName'), + 'season': metadata.get('showContentSeason'), + 'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')), + 'ie_key': 'BrightcoveNew', + } From 71fa0b04f9099090f43f6747632a9bdc3a4b1015 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 13:30:30 +0100 Subject: [PATCH 610/785] [makertv] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/makertv.py | 32 ------------------------------ 2 files changed, 33 deletions(-) delete mode 100644 youtube_dl/extractor/makertv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 339a141a5..4229518fd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -604,7 +604,6 @@ from .mailru import ( MailRuMusicIE, MailRuMusicSearchIE, ) -from .makertv import MakerTVIE from .malltv import MallTVIE from .mangomolo import ( MangomoloVideoIE, diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py deleted file mode 100644 index 8eda69cfc..000000000 --- a/youtube_dl/extractor/makertv.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' - _TEST = { - 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', - 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', - 'info_dict': { - 'id': 'Fh3QgymL9gsc', - 'ext': 'mp4', - 'title': 'Maze Runner: The Scorch Trials Official Movie Review', - 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a', - 'upload_date': '20150918', - 'timestamp': 1442549540, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id') - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': 'jwplatform:%s' % jwplatform_id, - 'ie_key': 'JWPlatform', - } From 80c2126e80bc41f7b66d325c4c67c61887c58fb0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 13:32:35 +0100 Subject: [PATCH 611/785] [thesun] fix extraction(closes #16966) --- youtube_dl/extractor/thesun.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/thesun.py b/youtube_dl/extractor/thesun.py index 22d003776..15d4a6932 100644 --- a/youtube_dl/extractor/thesun.py +++ b/youtube_dl/extractor/thesun.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import extract_attributes class TheSunIE(InfoExtractor): @@ -16,6 +16,7 @@ class TheSunIE(InfoExtractor): }, 'playlist_count': 2, } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): article_id = self._match_id(url) @@ -23,10 +24,15 @@ class TheSunIE(InfoExtractor): webpage = self._download_webpage(url, article_id) entries = [] - for ooyala_id in re.findall( - r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)', + for video in re.findall( + r'<video[^>]+data-video-id-pending=[^>]+>', webpage): - entries.append(OoyalaIE._build_url_result(ooyala_id)) + attrs = extract_attributes(video) + video_id = attrs['data-video-id-pending'] + account_id = attrs.get('data-account', '5067014667001') + entries.append(self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), + 'BrightcoveNew', video_id)) return self.playlist_result( entries, article_id, self._og_search_title(webpage, fatal=False)) From 0f9d53566a5956854af77173c0e910ed7454aadf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 15:17:06 +0100 Subject: [PATCH 612/785] [la7] update Kaltura service URL(closes #22358) --- youtube_dl/extractor/la7.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index 6373268c4..c3b4ffa7e 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -20,7 +20,7 @@ class LA7IE(InfoExtractor): 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'info_dict': { - 'id': 'inccool8-02-10-2015-163722', + 'id': '0_42j6wd36', 'ext': 'mp4', 'title': 'Inc.Cool8', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', @@ -57,7 +57,7 @@ class LA7IE(InfoExtractor): return { '_type': 'url_transparent', 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], { - 'service_url': 'http://kdam.iltrovatore.it', + 'service_url': 'http://nkdam.iltrovatore.it', }), 'id': video_id, 'title': player_data['title'], From 3e252cca0e81aef55b0288f86991bb566878a9fc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 17:39:01 +0100 Subject: [PATCH 613/785] [macgamestore] remove extractor Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/macgamestore.py | 42 ---------------------------- 2 files changed, 43 deletions(-) delete mode 100644 youtube_dl/extractor/macgamestore.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4229518fd..1807744be 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -598,7 +598,6 @@ from .lynda import ( LyndaCourseIE ) from .m6 import M6IE -from .macgamestore import MacGameStoreIE from .mailru import ( MailRuIE, MailRuMusicIE, diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py deleted file mode 100644 index 43db9929c..000000000 --- a/youtube_dl/extractor/macgamestore.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class MacGameStoreIE(InfoExtractor): - IE_NAME = 'macgamestore' - IE_DESC = 'MacGameStore trailers' - _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', - 'md5': '8649b8ea684b6666b4c5be736ecddc61', - 'info_dict': { - 'id': '2450', - 'ext': 'm4v', - 'title': 'Crow', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, 'Downloading trailer page') - - if '>Missing Media<' in webpage: - raise ExtractorError( - 'Trailer %s does not exist' % video_id, expected=True) - - video_title = self._html_search_regex( - r'<title>MacGameStore: (.*?) Trailer', webpage, 'title') - - video_url = self._html_search_regex( - r'(?s)', - webpage, 'video URL') - - return { - 'id': video_id, - 'url': video_url, - 'title': video_title - } From 831b732da1d0796a1927af8767d76af780cc90f0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 17:41:17 +0100 Subject: [PATCH 614/785] [learnr] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/learnr.py | 33 ------------------------------ 2 files changed, 34 deletions(-) delete mode 100644 youtube_dl/extractor/learnr.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1807744be..9f3a5f8a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -546,7 +546,6 @@ from .lcp import ( LcpPlayIE, LcpIE, ) -from .learnr import LearnrIE from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py deleted file mode 100644 index 1435e090e..000000000 --- a/youtube_dl/extractor/learnr.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class LearnrIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript', - 'md5': '3719fdf0a68397f49899e82c308a89de', - 'info_dict': { - 'id': '51624', - 'ext': 'mp4', - 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript', - 'description': 'md5:b36dbfa92350176cdf12b4d388485503', - 'uploader': 'LearnCode.academy', - 'uploader_id': 'learncodeacademy', - 'upload_date': '20131021', - }, - 'add_ie': ['Youtube'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - return { - '_type': 'url_transparent', - 'url': self._search_regex( - r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'), - 'id': video_id, - } From b3c2fa6dad607da6455a13d232461d4380e4b53c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 17:42:33 +0100 Subject: [PATCH 615/785] [tutv] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/tutv.py | 36 ------------------------------ 2 files changed, 37 deletions(-) delete mode 100644 youtube_dl/extractor/tutv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f3a5f8a5..39282b785 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1183,7 +1183,6 @@ from .tunein import ( ) from .tunepk import TunePkIE from .turbo import TurboIE -from .tutv import TutvIE from .tv2 import ( TV2IE, TV2ArticleIE, diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py deleted file mode 100644 index 362318b24..000000000 --- a/youtube_dl/extractor/tutv.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_parse_qs, -) - - -class TutvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' - _TEST = { - 'url': 'http://tu.tv/videos/robots-futbolistas', - 'md5': '0cd9e28ad270488911b0d2a72323395d', - 'info_dict': { - 'id': '2973058', - 'ext': 'mp4', - 'title': 'Robots futbolistas', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') - - data_content = self._download_webpage( - 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') - video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') - - return { - 'id': internal_id, - 'url': video_url, - 'title': self._og_search_title(webpage), - } From 702984eca955f61811078c33337faf9eebeb48c8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 17:49:05 +0100 Subject: [PATCH 616/785] [hark] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/hark.py | 33 ------------------------------ 2 files changed, 34 deletions(-) delete mode 100644 youtube_dl/extractor/hark.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 39282b785..114ede8b9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -428,7 +428,6 @@ from .googlesearch import GoogleSearchIE from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE -from .hark import HarkIE from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py deleted file mode 100644 index 342a6130e..000000000 --- a/youtube_dl/extractor/hark.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class HarkIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P.+?)-.+' - _TEST = { - 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', - 'md5': '6783a58491b47b92c7c1af5a77d4cbee', - 'info_dict': { - 'id': 'mmbzyhkgny', - 'ext': 'mp3', - 'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013', - 'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.', - 'duration': 11, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - 'http://www.hark.com/clips/%s.json' % video_id, video_id) - - return { - 'id': video_id, - 'url': data['url'], - 'title': data['name'], - 'description': data.get('description'), - 'thumbnail': data.get('image_original'), - 'duration': data.get('duration'), - } From 895e5c03db310ee97d585360ef8e6ae117e4cbd6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 19:31:20 +0100 Subject: [PATCH 617/785] [nbcnews] fix extraction closes #12569 closes #12576 closes #21703 closes #21923 --- youtube_dl/extractor/nbc.py | 86 +++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 10680b202..5bc39d002 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,9 +9,13 @@ from .theplatform import ThePlatformIE from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( - smuggle_url, - update_url_query, int_or_none, + js_to_json, + parse_duration, + smuggle_url, + try_get, + unified_timestamp, + update_url_query, ) @@ -285,13 +289,12 @@ class NBCNewsIE(ThePlatformIE): _TESTS = [ { 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', - 'md5': 'af1adfa51312291a017720403826bb64', + 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf', 'info_dict': { 'id': '269389891880', 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', - 'uploader': 'NBCU-NEWS', 'timestamp': 1401363060, 'upload_date': '20140529', }, @@ -309,28 +312,26 @@ class NBCNewsIE(ThePlatformIE): }, { 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', - 'md5': '73135a2e0ef819107bbb55a5a9b2a802', + 'md5': '8eb831eca25bfa7d25ddd83e85946548', 'info_dict': { 'id': '394064451844', 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', 'timestamp': 1423104900, - 'uploader': 'NBCU-NEWS', 'upload_date': '20150205', }, }, { 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', - 'md5': 'a49e173825e5fcd15c13fc297fced39d', + 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0', 'info_dict': { - 'id': '529953347624', + 'id': 'n431456', 'ext': 'mp4', - 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', - 'description': 'md5:c8be487b2d80ff0594c005add88d8351', + 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'", + 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', 'upload_date': '20150922', 'timestamp': 1442917800, - 'uploader': 'NBCU-NEWS', }, }, { @@ -343,7 +344,6 @@ class NBCNewsIE(ThePlatformIE): 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'upload_date': '20160420', 'timestamp': 1461152093, - 'uploader': 'NBCU-NEWS', }, }, { @@ -357,7 +357,6 @@ class NBCNewsIE(ThePlatformIE): 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1406937606, 'upload_date': '20140802', - 'uploader': 'NBCU-NEWS', }, }, { @@ -373,20 +372,61 @@ class NBCNewsIE(ThePlatformIE): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'window\.__data\s*=\s*({.+});', webpage, - 'bootstrap json'), video_id) - video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id'] + data = self._parse_json(self._search_regex( + r'window\.__data\s*=\s*({.+});', webpage, + 'bootstrap json'), video_id, js_to_json) + video_data = try_get(data, lambda x: x['video']['current'], dict) + if not video_data: + video_data = data['article']['content'][0]['primaryMedia']['video'] + title = video_data['headline']['primary'] + + formats = [] + for va in video_data.get('videoAssets', []): + public_url = va.get('publicUrl') + if not public_url: + continue + if '://link.theplatform.com/' in public_url: + public_url = update_url_query(public_url, {'format': 'redirect'}) + format_id = va.get('format') + if format_id == 'M3U': + formats.extend(self._extract_m3u8_formats( + public_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False)) + continue + tbr = int_or_none(va.get('bitrate'), 1000) + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': public_url, + 'width': int_or_none(va.get('width')), + 'height': int_or_none(va.get('height')), + 'tbr': tbr, + 'ext': 'mp4', + }) + self._sort_formats(formats) + + subtitles = {} + closed_captioning = video_data.get('closedCaptioning') + if closed_captioning: + for cc_url in closed_captioning.values(): + if not cc_url: + continue + subtitles.setdefault('en', []).append({ + 'url': cc_url, + }) return { - '_type': 'url_transparent', 'id': video_id, - # http://feed.theplatform.com/f/2E2eJC/nbcnews also works - 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}), - 'ie_key': 'ThePlatformFeed', + 'title': title, + 'description': try_get(video_data, lambda x: x['description']['primary']), + 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']), + 'duration': parse_duration(video_data.get('duration')), + 'timestamp': unified_timestamp(video_data.get('datePublished')), + 'formats': formats, + 'subtitles': subtitles, } From 83e49259bfd4e0b54a4b53c30742109555087e3a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 19:45:42 +0100 Subject: [PATCH 618/785] [internetvideoarchive] fix extraction --- youtube_dl/extractor/internetvideoarchive.py | 92 ++++++-------------- 1 file changed, 28 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 76cc5ec3e..59b0a90c3 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -1,15 +1,13 @@ from __future__ import unicode_literals +import json +import re + from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, ) -from ..utils import ( - determine_ext, - int_or_none, - xpath_text, -) class InternetVideoArchiveIE(InfoExtractor): @@ -20,7 +18,7 @@ class InternetVideoArchiveIE(InfoExtractor): 'info_dict': { 'id': '194487', 'ext': 'mp4', - 'title': 'KICK-ASS 2', + 'title': 'Kick-Ass 2', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', }, 'params': { @@ -33,68 +31,34 @@ class InternetVideoArchiveIE(InfoExtractor): def _build_json_url(query): return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query - @staticmethod - def _build_xml_url(query): - return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query - def _real_extract(self, url): - query = compat_urlparse.urlparse(url).query - query_dic = compat_parse_qs(query) - video_id = query_dic['publishedid'][0] - - if '/player/' in url: - configuration = self._download_json(url, video_id) - - # There are multiple videos in the playlist whlie only the first one - # matches the video played in browsers - video_info = configuration['playlist'][0] - title = video_info['title'] - - formats = [] - for source in video_info['sources']: - file_url = source['file'] - if determine_ext(file_url) == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) - file_url = m3u8_formats[0]['url'] - formats.extend(self._extract_f4m_formats( - file_url.replace('.m3u8', '.f4m'), - video_id, f4m_id='hds', fatal=False)) - formats.extend(self._extract_mpd_formats( - file_url.replace('.m3u8', '.mpd'), - video_id, mpd_id='dash', fatal=False)) - else: - a_format = { - 'url': file_url, - } - - if source.get('label') and source['label'][-4:] == ' kbs': - tbr = int_or_none(source['label'][:-4]) - a_format.update({ - 'tbr': tbr, - 'format_id': 'http-%d' % tbr, - }) - formats.append(a_format) - - self._sort_formats(formats) - - description = video_info.get('description') - thumbnail = video_info.get('image') - else: - configuration = self._download_xml(url, video_id) - formats = [{ - 'url': xpath_text(configuration, './file', 'file URL', fatal=True), - }] - thumbnail = xpath_text(configuration, './image', 'thumbnail') - title = 'InternetVideoArchive video %s' % video_id - description = None + query = compat_parse_qs(compat_urlparse.urlparse(url).query) + video_id = query['publishedid'][0] + data = self._download_json( + 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx', + video_id, data=json.dumps({ + 'customerid': query['customerid'][0], + 'publishedid': video_id, + }).encode()) + title = data['Title'] + formats = self._extract_m3u8_formats( + data['VideoUrl'], video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + file_url = formats[0]['url'] + if '.ism/' in file_url: + replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url) + formats.extend(self._extract_f4m_formats( + replace_url('.f4m'), video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + replace_url('.mpd'), video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_ism_formats( + replace_url('Manifest'), video_id, ism_id='mss', fatal=False)) + self._sort_formats(formats) return { 'id': video_id, 'title': title, 'formats': formats, - 'thumbnail': thumbnail, - 'description': description, + 'thumbnail': data.get('PosterUrl'), + 'description': data.get('Description'), } From 0086726e8674e9edec0682e7a84275c3c25ce646 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 19:48:34 +0100 Subject: [PATCH 619/785] [videodetective] fix extraction --- youtube_dl/extractor/videodetective.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index a19411a05..fe70db713 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse from .internetvideoarchive import InternetVideoArchiveIE @@ -13,7 +12,7 @@ class VideoDetectiveIE(InfoExtractor): 'info_dict': { 'id': '194487', 'ext': 'mp4', - 'title': 'KICK-ASS 2', + 'title': 'Kick-Ass 2', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', }, 'params': { @@ -24,7 +23,7 @@ class VideoDetectiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage) - query = compat_urlparse.urlparse(og_video).query - return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key()) + query = 'customerid=69249&publishedid=' + video_id + return self.url_result( + InternetVideoArchiveIE._build_json_url(query), + ie=InternetVideoArchiveIE.ie_key()) From cfabc505984acb3830aeac7759d913bb885d64b6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 22:55:01 +0100 Subject: [PATCH 620/785] [mtv] fix extraction for mtv.de (closes #22113) --- youtube_dl/extractor/mtv.py | 51 ++++++++++++++----------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7a3b57abd..7e95ca18e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -425,14 +425,14 @@ class MTVVideoIE(MTVServicesInfoExtractor): class MTVDEIE(MTVServicesInfoExtractor): IE_NAME = 'mtv.de' - _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P\d+)-[^/#?]+/*(?:[#?].*)?$' + _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P[0-9a-z]+)' _TESTS = [{ - 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', + 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum', 'info_dict': { - 'id': 'music_video-a50bc5f0b3aa4b3190aa', - 'ext': 'flv', - 'title': 'MusicVideo_cro-traum', - 'description': 'Cro - Traum', + 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5', + 'ext': 'mp4', + 'title': 'Traum', + 'description': 'Traum', }, 'params': { # rtmp download @@ -441,11 +441,12 @@ class MTVDEIE(MTVServicesInfoExtractor): 'skip': 'Blocked at Travis CI', }, { # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) - 'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', + 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1', 'info_dict': { - 'id': 'local_playlist-f5ae778b9832cc837189', - 'ext': 'flv', - 'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', + 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285', + 'ext': 'mp4', + 'title': 'Teen Mom 2', + 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7', }, 'params': { # rtmp download @@ -453,7 +454,7 @@ class MTVDEIE(MTVServicesInfoExtractor): }, 'skip': 'Blocked at Travis CI', }, { - 'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', + 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3', 'info_dict': { 'id': 'local_playlist-4e760566473c4c8c5344', 'ext': 'mp4', @@ -466,25 +467,11 @@ class MTVDEIE(MTVServicesInfoExtractor): }, 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.', }] + _GEO_COUNTRIES = ['DE'] + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - playlist = self._parse_json( - self._search_regex( - r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'), - video_id) - - def _mrss_url(item): - return item['mrss'] + item.get('mrssvars', '') - - # news pages contain single video in playlist with different id - if len(playlist) == 1: - return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id) - - for item in playlist: - item_id = item.get('id') - if item_id and compat_str(item_id) == video_id: - return self._get_videos_info_from_url(_mrss_url(item), video_id) + def _get_feed_query(self, uri): + return { + 'arcEp': 'mtv.de', + 'mgid': uri, + } From 3cdcebf5470a56df7d52e6f8acbcde5b4b9f0241 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 23:31:14 +0100 Subject: [PATCH 621/785] [mtv] add support for mtvjapan.com --- youtube_dl/extractor/mtv.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7e95ca18e..fedd5f46b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -349,33 +350,29 @@ class MTVIE(MTVServicesInfoExtractor): }] -class MTV81IE(InfoExtractor): - IE_NAME = 'mtv81' - _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P[^/?#.]+)' +class MTVJapanIE(MTVServicesInfoExtractor): + IE_NAME = 'mtvjapan' + _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P[0-9a-z]+)' _TEST = { - 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', - 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade', 'info_dict': { - 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5', 'ext': 'mp4', - 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', - 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', - 'timestamp': 1468846800, - 'upload_date': '20160718', + 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition', + }, + 'params': { + 'skip_download': True, }, } + _GEO_COUNTRIES = ['JP'] + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - def _extract_mgid(self, webpage): - return self._search_regex( - r'getTheVideo\((["\'])(?Pmgid:.+?)\1', webpage, - 'mgid', group='id') - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - mgid = self._extract_mgid(webpage) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + def _get_feed_query(self, uri): + return { + 'arcEp': 'mtvjapan.com', + 'mgid': uri, + } class MTVVideoIE(MTVServicesInfoExtractor): From 01358b9fc198cafb619a03ed5ad7865a74805611 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 23:34:31 +0100 Subject: [PATCH 622/785] [extractors] add import for MTVJapanIE --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 114ede8b9..c10bcbcc1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -666,7 +666,7 @@ from .mtv import ( MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, - MTV81IE, + MTVJapanIE, ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE From dd90a21c28cb1ec592e5961a5f67556edfb3ce87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 05:49:36 +0700 Subject: [PATCH 623/785] [go] Add support for abc.com and freeform.com (closes #22823, closes #22864) --- youtube_dl/extractor/go.py | 44 ++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 03e48f4ea..107059023 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -40,8 +40,8 @@ class GoIE(AdobePassIE): 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pdisneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ - % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pabc|freeform|disneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + % '|'.join(list(_SITE_INFO.keys())) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'info_dict': { @@ -54,6 +54,7 @@ class GoIE(AdobePassIE): # m3u8 download 'skip_download': True, }, + 'skip': 'This content is no longer available.', }, { 'url': 'http://watchdisneyxd.go.com/doraemon', 'info_dict': { @@ -61,6 +62,34 @@ class GoIE(AdobePassIE): 'id': 'SH55574025', }, 'playlist_mincount': 51, + }, { + 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood', + 'info_dict': { + 'id': 'VDKA3609139', + 'ext': 'mp4', + 'title': 'This Guilty Blood', + 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292', + 'age_limit': 14, + }, + 'params': { + 'geo_bypass_ip_block': '3.244.239.0/24', + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet', + 'info_dict': { + 'id': 'VDKA13435179', + 'ext': 'mp4', + 'title': 'The Bet', + 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404', + 'age_limit': 14, + }, + 'params': { + 'geo_bypass_ip_block': '3.244.239.0/24', + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'only_matching': True, @@ -95,10 +124,13 @@ class GoIE(AdobePassIE): if not video_id or not site_info: webpage = self._download_webpage(url, display_id or video_id) video_id = self._search_regex( - # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" - # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood - r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', - default=video_id) + ( + # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" + # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood + r'data-video-id=["\']*(VDKA\w+)', + # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet + r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)' + ), webpage, 'video id', default=video_id) if not site_info: brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', From aef9f87ea4dcfe483c5b776f1c37310766ad818d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 05:52:15 +0700 Subject: [PATCH 624/785] [go] Improve and beautify _VALID_URL --- youtube_dl/extractor/go.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 107059023..03cfba91f 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -40,8 +40,17 @@ class GoIE(AdobePassIE): 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pabc|freeform|disneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ - % '|'.join(list(_SITE_INFO.keys())) + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?P%s)\.)?go| + (?Pabc|freeform|disneynow) + )\.com/ + (?: + (?:[^/]+/)*(?P[Vv][Dd][Kk][Aa]\w+)| + (?:[^/]+/)*(?P[^/?\#]+) + ) + ''' % '|'.join(list(_SITE_INFO.keys())) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'info_dict': { From 0d7392e68b7ebb7215651da0784e859d7bdff826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 05:54:32 +0700 Subject: [PATCH 625/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/ChangeLog b/ChangeLog index 64233b03b..b664368a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,48 @@ +version + +Extractors ++ [go] Add support for abc.com and freeform.com (#22823, #22864) ++ [mtv] Add support for mtvjapan.com +* [mtv] Fix extraction for mtv.de (#22113) +* [videodetective] Fix extraction +* [internetvideoarchive] Fix extraction +* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923) +- [hark] Remove extractor +- [tutv] Remove extractor +- [learnr] Remove extractor +- [macgamestore] Remove extractor +* [la7] Update Kaltura service URL (#22358) +* [thesun] Fix extraction (#16966) +- [makertv] Remove extractor ++ [tenplay] Add support for 10play.com.au (#21446) +* [soundcloud] Improve extraction + * Improve format extraction (#22123) + + Extract uploader_id and uploader_url (#21916) + + Extract all known thumbnails (#19071, #20659) + * Fix extration for private playlists (#20976) + + Add support for playlist embeds (#20976) + * Skip preview formats (#22806) +* [dplay] Improve extraction + + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969) + * Fix it.dplay.com extraction (#22826) + + Extract creator, tags and thumbnails + * Handle playback API call errors ++ [discoverynetworks] Add support for dplay.co.uk +* [vk] Improve extraction + + Add support for Odnoklassniki embeds + + Extract more videos from user lists (#4470) + + Fix wall post audio extraction (#18332) + * Improve error detection (#22568) ++ [odnoklassniki] Add support for embeds +* [puhutv] Improve extraction + * Fix subtitles extraction + * Transform HLS URLs to HTTP URLs + * Improve metadata extraction +* [ceskatelevize] Skip DRM media ++ [facebook] Extract subtitles (#22777) +* [globo] Handle alternative hash signing method + + version 2019.10.22 Core From 53896ca5be9a629c2cbaceb3fe43c707bb217437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 06:10:20 +0700 Subject: [PATCH 626/785] [utils] Actualize major IPv4 address blocks per country --- youtube_dl/utils.py | 71 +++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 53117ea90..aed988b88 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4979,7 +4979,7 @@ class ISO3166Utils(object): class GeoUtils(object): # Major IPv4 address blocks per country _country_ip_map = { - 'AD': '85.94.160.0/19', + 'AD': '46.172.224.0/19', 'AE': '94.200.0.0/13', 'AF': '149.54.0.0/17', 'AG': '209.59.64.0/18', @@ -4987,28 +4987,30 @@ class GeoUtils(object): 'AL': '46.99.0.0/16', 'AM': '46.70.0.0/15', 'AO': '105.168.0.0/13', - 'AP': '159.117.192.0/21', + 'AP': '182.50.184.0/21', + 'AQ': '23.154.160.0/24', 'AR': '181.0.0.0/12', 'AS': '202.70.112.0/20', - 'AT': '84.112.0.0/13', + 'AT': '77.116.0.0/14', 'AU': '1.128.0.0/11', 'AW': '181.41.0.0/18', - 'AZ': '5.191.0.0/16', + 'AX': '185.217.4.0/22', + 'AZ': '5.197.0.0/16', 'BA': '31.176.128.0/17', 'BB': '65.48.128.0/17', 'BD': '114.130.0.0/16', 'BE': '57.0.0.0/8', - 'BF': '129.45.128.0/17', + 'BF': '102.178.0.0/15', 'BG': '95.42.0.0/15', 'BH': '37.131.0.0/17', 'BI': '154.117.192.0/18', 'BJ': '137.255.0.0/16', - 'BL': '192.131.134.0/24', + 'BL': '185.212.72.0/23', 'BM': '196.12.64.0/18', 'BN': '156.31.0.0/16', 'BO': '161.56.0.0/16', 'BQ': '161.0.80.0/20', - 'BR': '152.240.0.0/12', + 'BR': '191.128.0.0/12', 'BS': '24.51.64.0/18', 'BT': '119.2.96.0/19', 'BW': '168.167.0.0/16', @@ -5016,20 +5018,20 @@ class GeoUtils(object): 'BZ': '179.42.192.0/18', 'CA': '99.224.0.0/11', 'CD': '41.243.0.0/16', - 'CF': '196.32.200.0/21', - 'CG': '197.214.128.0/17', + 'CF': '197.242.176.0/21', + 'CG': '160.113.0.0/16', 'CH': '85.0.0.0/13', - 'CI': '154.232.0.0/14', + 'CI': '102.136.0.0/14', 'CK': '202.65.32.0/19', 'CL': '152.172.0.0/14', - 'CM': '165.210.0.0/15', + 'CM': '102.244.0.0/14', 'CN': '36.128.0.0/10', 'CO': '181.240.0.0/12', 'CR': '201.192.0.0/12', 'CU': '152.206.0.0/15', 'CV': '165.90.96.0/19', 'CW': '190.88.128.0/17', - 'CY': '46.198.0.0/15', + 'CY': '31.153.0.0/16', 'CZ': '88.100.0.0/14', 'DE': '53.0.0.0/8', 'DJ': '197.241.0.0/17', @@ -5046,6 +5048,7 @@ class GeoUtils(object): 'EU': '2.16.0.0/13', 'FI': '91.152.0.0/13', 'FJ': '144.120.0.0/16', + 'FK': '80.73.208.0/21', 'FM': '119.252.112.0/20', 'FO': '88.85.32.0/19', 'FR': '90.0.0.0/9', @@ -5055,8 +5058,8 @@ class GeoUtils(object): 'GE': '31.146.0.0/16', 'GF': '161.22.64.0/18', 'GG': '62.68.160.0/19', - 'GH': '45.208.0.0/14', - 'GI': '85.115.128.0/19', + 'GH': '154.160.0.0/12', + 'GI': '95.164.0.0/16', 'GL': '88.83.0.0/19', 'GM': '160.182.0.0/15', 'GN': '197.149.192.0/18', @@ -5085,13 +5088,13 @@ class GeoUtils(object): 'JE': '87.244.64.0/18', 'JM': '72.27.0.0/17', 'JO': '176.29.0.0/16', - 'JP': '126.0.0.0/8', + 'JP': '133.0.0.0/8', 'KE': '105.48.0.0/12', 'KG': '158.181.128.0/17', 'KH': '36.37.128.0/17', 'KI': '103.25.140.0/22', 'KM': '197.255.224.0/20', - 'KN': '198.32.32.0/19', + 'KN': '198.167.192.0/19', 'KP': '175.45.176.0/22', 'KR': '175.192.0.0/10', 'KW': '37.36.0.0/14', @@ -5099,10 +5102,10 @@ class GeoUtils(object): 'KZ': '2.72.0.0/13', 'LA': '115.84.64.0/18', 'LB': '178.135.0.0/16', - 'LC': '192.147.231.0/24', + 'LC': '24.92.144.0/20', 'LI': '82.117.0.0/19', 'LK': '112.134.0.0/15', - 'LR': '41.86.0.0/19', + 'LR': '102.183.0.0/16', 'LS': '129.232.0.0/17', 'LT': '78.56.0.0/13', 'LU': '188.42.0.0/16', @@ -5127,7 +5130,7 @@ class GeoUtils(object): 'MT': '46.11.0.0/16', 'MU': '105.16.0.0/12', 'MV': '27.114.128.0/18', - 'MW': '105.234.0.0/16', + 'MW': '102.70.0.0/15', 'MX': '187.192.0.0/11', 'MY': '175.136.0.0/13', 'MZ': '197.218.0.0/15', @@ -5158,23 +5161,23 @@ class GeoUtils(object): 'PW': '202.124.224.0/20', 'PY': '181.120.0.0/14', 'QA': '37.210.0.0/15', - 'RE': '139.26.0.0/16', + 'RE': '102.35.0.0/16', 'RO': '79.112.0.0/13', - 'RS': '178.220.0.0/14', + 'RS': '93.86.0.0/15', 'RU': '5.136.0.0/13', - 'RW': '105.178.0.0/15', + 'RW': '41.186.0.0/16', 'SA': '188.48.0.0/13', 'SB': '202.1.160.0/19', 'SC': '154.192.0.0/11', - 'SD': '154.96.0.0/13', + 'SD': '102.120.0.0/13', 'SE': '78.64.0.0/12', - 'SG': '152.56.0.0/14', + 'SG': '8.128.0.0/10', 'SI': '188.196.0.0/14', 'SK': '78.98.0.0/15', - 'SL': '197.215.0.0/17', + 'SL': '102.143.0.0/17', 'SM': '89.186.32.0/19', 'SN': '41.82.0.0/15', - 'SO': '197.220.64.0/19', + 'SO': '154.115.192.0/18', 'SR': '186.179.128.0/17', 'SS': '105.235.208.0/21', 'ST': '197.159.160.0/19', @@ -5197,15 +5200,15 @@ class GeoUtils(object): 'TV': '202.2.96.0/19', 'TW': '120.96.0.0/11', 'TZ': '156.156.0.0/14', - 'UA': '93.72.0.0/13', - 'UG': '154.224.0.0/13', - 'US': '3.0.0.0/8', + 'UA': '37.52.0.0/14', + 'UG': '102.80.0.0/13', + 'US': '6.0.0.0/8', 'UY': '167.56.0.0/13', - 'UZ': '82.215.64.0/18', + 'UZ': '84.54.64.0/18', 'VA': '212.77.0.0/19', - 'VC': '24.92.144.0/20', + 'VC': '207.191.240.0/21', 'VE': '186.88.0.0/13', - 'VG': '172.103.64.0/18', + 'VG': '66.81.192.0/20', 'VI': '146.226.0.0/16', 'VN': '14.160.0.0/11', 'VU': '202.80.32.0/20', @@ -5214,8 +5217,8 @@ class GeoUtils(object): 'YE': '134.35.0.0/16', 'YT': '41.242.116.0/22', 'ZA': '41.0.0.0/11', - 'ZM': '165.56.0.0/13', - 'ZW': '41.85.192.0/19', + 'ZM': '102.144.0.0/13', + 'ZW': '102.177.192.0/18', } @classmethod From cae0bbc53831eed38c4af3755de43e223c503270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 06:11:09 +0700 Subject: [PATCH 627/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index b664368a1..2957b7ced 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ version +Core +* [utils] Actualize major IPv4 address blocks per country + Extractors + [go] Add support for abc.com and freeform.com (#22823, #22864) + [mtv] Add support for mtvjapan.com From c4bd9cb7bb57c6e4bbc04fb054dfea14d4ecb171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 06:12:33 +0700 Subject: [PATCH 628/785] release 2019.10.29 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 10 +++------- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 21 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f1afe704c..f82502bd1 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.22 + [debug] youtube-dl version 2019.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a4dc9b005..5ef983d43 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 5bf86adce..8f05aa79f 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 7aa5534e5..e90900d8d 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.22 + [debug] youtube-dl version 2019.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5d3645e3d..7021d7397 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 2957b7ced..fcab1102c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.10.29 Core * [utils] Actualize major IPv4 address blocks per country diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a1b0edeeb..af905db5a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -232,7 +232,6 @@ - **DouyuShow** - **DouyuTV**: 斗鱼 - **DPlay** - - **DPlayIt** - **DRBonanza** - **Dropbox** - **DrTuber** @@ -339,7 +338,6 @@ - **Goshgay** - **GPUTechConf** - **Groupon** - - **Hark** - **hbo** - **HearThisAt** - **Heise** @@ -432,7 +430,6 @@ - **Lcp** - **LcpPlay** - **Le**: 乐视网 - - **Learnr** - **Lecture2Go** - **Lecturio** - **LecturioCourse** @@ -466,11 +463,9 @@ - **lynda**: lynda.com videos - **lynda:course**: lynda.com online courses - **m6** - - **macgamestore**: MacGameStore trailers - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru - - **MakerTV** - **MallTV** - **mangomolo:live** - **mangomolo:video** @@ -526,8 +521,8 @@ - **mtg**: MTG services - **mtv** - **mtv.de** - - **mtv81** - **mtv:video** + - **mtvjapan** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** @@ -815,6 +810,7 @@ - **soundcloud:set** - **soundcloud:trackstation** - **soundcloud:user** + - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** - **southpark.cc.com** @@ -887,6 +883,7 @@ - **TeleTask** - **Telewebion** - **TennisTV** + - **TenPlay** - **TF1** - **TFO** - **TheIntercept** @@ -925,7 +922,6 @@ - **tunein:topic** - **TunePk** - **Turbo** - - **Tutv** - **tv.dfb.de** - **TV2** - **tv2.hu** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 39b355b9e..924f26ca8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.10.22' +__version__ = '2019.10.29' From 7455832f311843663b416968b9e5a0a0c6134d8d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 29 Oct 2019 09:43:17 +0100 Subject: [PATCH 629/785] [fox9] fix extraction --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/fox9.py | 43 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c10bcbcc1..15f96fb8f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -367,7 +367,10 @@ from .fourtube import ( FuxIE, ) from .fox import FOXIE -from .fox9 import FOX9IE +from .fox9 import ( + FOX9IE, + FOX9NewsIE, +) from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py index 17dfffa7b..91f8f7b8a 100644 --- a/youtube_dl/extractor/fox9.py +++ b/youtube_dl/extractor/fox9.py @@ -1,13 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals -from .anvato import AnvatoIE +from .common import InfoExtractor -class FOX9IE(AnvatoIE): - _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P\d+)-story' - _TESTS = [{ - 'url': 'http://www.fox9.com/news/215123287-story', +class FOX9IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id, + 'Anvato', video_id) + + +class FOX9NewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P[^/?&#]+)' + _TEST = { + 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota', 'md5': 'd6e1b2572c3bab8a849c9103615dd243', 'info_dict': { 'id': '314473', @@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE): 'categories': ['News', 'Sports'], 'tags': ['news', 'video'], }, - }, { - 'url': 'http://www.fox9.com/news/investigators/214070684-story', - 'only_matching': True, - }] + } def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_id = self._parse_json( - self._search_regex( - r"this\.videosJson\s*=\s*'(\[.+?\])';", - webpage, 'anvato playlist'), - video_id)[0]['video'] - - return self._get_anvato_videos( - 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b', - video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + anvato_id = self._search_regex( + r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id') + return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9') From 8989349e6dcaa98204f77fb9f1e15a86eecb823d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 29 Oct 2019 09:44:07 +0100 Subject: [PATCH 630/785] [onet] improve extraction - add support for onet100.vod.pl domain - extract m3u8 formats - correct audio only format info --- youtube_dl/extractor/onet.py | 54 ++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 58da1bc27..e55b2ac89 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -20,6 +20,8 @@ from ..utils import ( class OnetBaseIE(InfoExtractor): + _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/' + def _search_mvp_id(self, webpage): return self._search_regex( r'id=(["\'])mvp:(?P.+?)\1', webpage, 'mvp id', group='id') @@ -45,7 +47,7 @@ class OnetBaseIE(InfoExtractor): video = response['result'].get('0') formats = [] - for _, formats_dict in video['formats'].items(): + for format_type, formats_dict in video['formats'].items(): if not isinstance(formats_dict, dict): continue for format_id, format_list in formats_dict.items(): @@ -56,21 +58,31 @@ class OnetBaseIE(InfoExtractor): if not video_url: continue ext = determine_ext(video_url) - if format_id == 'ism': + if format_id.startswith('ism'): formats.extend(self._extract_ism_formats( video_url, video_id, 'mss', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) + elif format_id.startswith('hls'): + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) else: - formats.append({ + http_f = { 'url': video_url, 'format_id': format_id, - 'height': int_or_none(f.get('vertical_resolution')), - 'width': int_or_none(f.get('horizontal_resolution')), 'abr': float_or_none(f.get('audio_bitrate')), - 'vbr': float_or_none(f.get('video_bitrate')), - }) + } + if format_type == 'audio': + http_f['vcodec'] = 'none' + else: + http_f.update({ + 'height': int_or_none(f.get('vertical_resolution')), + 'width': int_or_none(f.get('horizontal_resolution')), + 'vbr': float_or_none(f.get('video_bitrate')), + }) + formats.append(http_f) self._sort_formats(formats) meta = video.get('meta', {}) @@ -105,12 +117,12 @@ class OnetMVPIE(OnetBaseIE): class OnetIE(OnetBaseIE): - _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)' + _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)' IE_NAME = 'onet.tv' - _TEST = { + _TESTS = [{ 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', - 'md5': 'e3ffbf47590032ac3f27249204173d50', + 'md5': '436102770fb095c75b8bb0392d3da9ff', 'info_dict': { 'id': 'qbpyqc', 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', @@ -120,7 +132,10 @@ class OnetIE(OnetBaseIE): 'upload_date': '20160705', 'timestamp': 1467721580, }, - } + }, { + 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -140,18 +155,21 @@ class OnetIE(OnetBaseIE): class OnetChannelIE(OnetBaseIE): - _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P[a-z]+)(?:[?#]|$)' + _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P[a-z]+)(?:[?#]|$)' IE_NAME = 'onet.tv:channel' - _TEST = { + _TESTS = [{ 'url': 'http://onet.tv/k/openerfestival', 'info_dict': { 'id': 'openerfestival', - 'title': 'Open\'er Festival Live', - 'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.', + 'title': "Open'er Festival", + 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.", }, - 'playlist_mincount': 46, - } + 'playlist_mincount': 35, + }, { + 'url': 'https://onet100.vod.pl/k/openerfestival', + 'only_matching': True, + }] def _real_extract(self, url): channel_id = self._match_id(url) @@ -173,7 +191,7 @@ class OnetChannelIE(OnetBaseIE): 'Downloading channel %s - add --no-playlist to just download video %s' % ( channel_id, video_name)) matches = re.findall( - r']+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)', + r']+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE, webpage) entries = [ self.url_result(video_link, OnetIE.ie_key()) From c56b2ac43ca27b32fb4f7b230d851a61b5fc7cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Oct 2019 02:21:03 +0700 Subject: [PATCH 631/785] [tv2dk] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tv2dk.py | 82 ++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 youtube_dl/extractor/tv2dk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15f96fb8f..5d20ba863 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1189,6 +1189,7 @@ from .tv2 import ( TV2IE, TV2ArticleIE, ) +from .tv2dk import TV2DKIE from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py new file mode 100644 index 000000000..eb39424df --- /dev/null +++ b/youtube_dl/extractor/tv2dk.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class TV2DKIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + tvsyd| + tv2ostjylland| + tvmidtvest| + tv2fyn| + tv2east| + tv2lorry| + tv2nord + )\.dk/ + (:[^/]+/)* + (?P[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player', + 'info_dict': { + 'id': '0_52jmwa0p', + 'ext': 'mp4', + 'title': '19:30 - 28. okt. 2019', + 'timestamp': 1572290248, + 'upload_date': '20191028', + 'uploader_id': 'tvsyd', + 'duration': 1347, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Kaltura'], + }, { + 'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi', + 'only_matching': True, + }, { + 'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player', + 'only_matching': True, + }, { + 'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019', + 'only_matching': True, + }, { + 'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof', + 'only_matching': True, + }, { + 'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy', + 'only_matching': True, + }, { + 'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player', + 'only_matching': True, + }, { + 'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + entries = [] + for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage): + video = extract_attributes(video_el) + kaltura_id = video.get('data-entryid') + if not kaltura_id: + continue + partner_id = video.get('data-partnerid') + if not partner_id: + continue + entries.append(self.url_result( + 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', + video_id=kaltura_id)) + return self.playlist_result(entries) From 9a621ddc3a42769f107f8bd0d67b2c7073ea8256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Oct 2019 02:21:52 +0700 Subject: [PATCH 632/785] [tv2] Fix and improve extraction (closes #22787) --- youtube_dl/extractor/tv2.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index d5071e8a5..1b6590767 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -11,6 +11,7 @@ from ..utils import ( js_to_json, parse_iso8601, remove_end, + try_get, ) @@ -44,7 +45,14 @@ class TV2IE(InfoExtractor): data = self._download_json( 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), video_id, 'Downloading play JSON')['playback'] - for item in data['items']['item']: + items = try_get(data, lambda x: x['items']['item']) + if not items: + continue + if not isinstance(items, list): + items = [items] + for item in items: + if not isinstance(item, dict): + continue video_url = item.get('url') if not video_url or video_url in format_urls: continue From 45f4a433894556301204b704caca7d6a14286287 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 30 Oct 2019 23:07:35 +0100 Subject: [PATCH 633/785] [yahoo] improve extraction - add support for live streams(closes #3597)(closes #3779)(closes #22178) - bypass cookie consent page for european domains(closes #16948)(closes #22576) - add generic support for embeds(closes #20332) --- youtube_dl/extractor/yahoo.py | 672 +++++++++++++--------------------- 1 file changed, 264 insertions(+), 408 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e5ebdd180..ee68096d0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -3,453 +3,309 @@ from __future__ import unicode_literals import hashlib import itertools -import json import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_str, compat_urllib_parse, - compat_urlparse, ) from ..utils import ( clean_html, - determine_ext, - ExtractorError, - extract_attributes, int_or_none, mimetype2ext, + parse_iso8601, smuggle_url, try_get, - unescapeHTML, url_or_none, ) -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .nbc import NBCSportsVPlayerIE +from .brightcove import BrightcoveNewIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P.+)?-)?(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?' - _TESTS = [ - { - 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - 'info_dict': { - 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', - 'ext': 'mp4', - 'title': 'Julian Smith & Travis Legg Watch Julian Smith', - 'description': 'Julian and Travis watch Julian Smith', - 'duration': 6863, - }, + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)\.html)' + _TESTS = [{ + 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', + 'info_dict': { + 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', + 'ext': 'mp4', + 'title': 'Julian Smith & Travis Legg Watch Julian Smith', + 'description': 'Julian and Travis watch Julian Smith', + 'duration': 6863, + 'timestamp': 1369812016, + 'upload_date': '20130529', }, - { - 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', - 'md5': '251af144a19ebc4a033e8ba91ac726bb', - 'info_dict': { - 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', - 'ext': 'mp4', - 'title': 'Codefellas - The Cougar Lies with Spanish Moss', - 'description': 'md5:66b627ab0a282b26352136ca96ce73c1', - 'duration': 151, - }, - 'skip': 'HTTP Error 404', + }, { + 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', + 'md5': '7993e572fac98e044588d0b5260f4352', + 'info_dict': { + 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', + 'ext': 'mp4', + 'title': "Yahoo Saves 'Community'", + 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', + 'duration': 170, + 'timestamp': 1406838636, + 'upload_date': '20140731', }, - { - 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', - 'md5': '7993e572fac98e044588d0b5260f4352', - 'info_dict': { - 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', - 'ext': 'mp4', - 'title': "Yahoo Saves 'Community'", - 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', - 'duration': 170, - } - }, - { - 'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html', - 'md5': '45c024bad51e63e9b6f6fad7a43a8c23', - 'info_dict': { - 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f', - 'ext': 'mp4', - 'title': '敢問市長/黃秀霜批賴清德「非常高傲」', - 'description': '直言台南沒捷運 交通居五都之末', - 'duration': 396, - }, - }, - { - 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', - 'md5': '71298482f7c64cbb7fa064e4553ff1c1', - 'info_dict': { - 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', - 'ext': 'webm', - 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', - 'description': 'md5:f66c890e1490f4910a9953c941dee944', - 'duration': 97, - } - }, - { - 'url': 'https://ca.sports.yahoo.com/video/program-makes-hockey-more-affordable-013127711.html', - 'md5': '57e06440778b1828a6079d2f744212c4', - 'info_dict': { - 'id': 'c9fa2a36-0d4d-3937-b8f6-cc0fb1881e73', - 'ext': 'mp4', - 'title': 'Program that makes hockey more affordable not offered in Manitoba', - 'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4', - 'duration': 121, - }, - 'skip': 'Video gone', - }, { - 'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html', - 'info_dict': { - 'id': '154609075', - }, - 'playlist': [{ - 'md5': '000887d0dc609bc3a47c974151a40fb8', - 'info_dict': { - 'id': 'e624c4bc-3389-34de-9dfc-025f74943409', - 'ext': 'mp4', - 'title': '\'The Interview\' TV Spot: War', - 'description': 'The Interview', - 'duration': 30, - }, - }, { - 'md5': '81bc74faf10750fe36e4542f9a184c66', - 'info_dict': { - 'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9', - 'ext': 'mp4', - 'title': '\'The Interview\' TV Spot: Guys', - 'description': 'The Interview', - 'duration': 30, - }, - }], - }, { - 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', - 'md5': '88e209b417f173d86186bef6e4d1f160', - 'info_dict': { - 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521', - 'ext': 'mp4', - 'title': 'China Moses Is Crazy About the Blues', - 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', - 'duration': 128, - } - }, { - 'url': 'https://in.lifestyle.yahoo.com/video/connect-dots-dark-side-virgo-090247395.html', - 'md5': 'd9a083ccf1379127bf25699d67e4791b', - 'info_dict': { - 'id': '52aeeaa3-b3d1-30d8-9ef8-5d0cf05efb7c', - 'ext': 'mp4', - 'title': 'Connect the Dots: Dark Side of Virgo', - 'description': 'md5:1428185051cfd1949807ad4ff6d3686a', - 'duration': 201, - }, - 'skip': 'Domain name in.lifestyle.yahoo.com gone', - }, { - 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': '989396ae73d20c6f057746fb226aa215', - 'info_dict': { - 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', - 'ext': 'mp4', - 'title': '\'True Story\' Trailer', - 'description': 'True Story', - 'duration': 150, - }, - }, { - 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', - 'only_matching': True, - }, { - 'note': 'NBC Sports embeds', - 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', - 'info_dict': { - 'id': '9CsDKds0kvHI', - 'ext': 'flv', - 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', - 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', - 'upload_date': '20150313', - 'uploader': 'NBCU-SPORTS', - 'timestamp': 1426270238, - } - }, { - 'url': 'https://tw.news.yahoo.com/-100120367.html', - 'only_matching': True, - }, { - # Query result is embedded in webpage, but explicit request to video API fails with geo restriction - 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', - 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', - 'info_dict': { - 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', - 'ext': 'mp4', - 'title': 'Communitary - Community Episode 1: Ladders', - 'description': 'md5:8fc39608213295748e1e289807838c97', - 'duration': 1646, - }, - }, { - # it uses an alias to get the video_id - 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html', - 'info_dict': { - 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737', - 'ext': 'mp4', - 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking', - 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', - }, - }, - { - # config['models']['applet_model']['data']['sapi'] has no query - 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016', - 'md5': 'dac0c72d502bc5facda80c9e6d5c98db', - 'info_dict': { - 'id': 'a6015640-e9e5-3efb-bb60-05589a183919', - 'ext': 'mp4', - 'description': 'Galactic', - 'title': 'Dolla Diva (feat. Maggie Koerner)', - }, - 'skip': 'redirect to https://www.yahoo.com/music', - }, - { - # yahoo://article/ - 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html', - 'info_dict': { - 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', - 'ext': 'mp4', - 'title': "'True Story' Trailer", - 'description': 'True Story', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # ytwnews://cavideo/ - 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', - 'info_dict': { - 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff', - 'ext': 'mp4', - 'title': '單車天使 - 中文版預', - 'description': '中文版預', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # custom brightcove - 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/', - 'info_dict': { - 'id': '5575377707001', - 'ext': 'mp4', - 'title': "Clown entertainers say 'It' is hurting their business", - 'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.', - 'timestamp': 1505341164, - 'upload_date': '20170913', - 'uploader_id': '2376984109001', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # custom brightcove, geo-restricted to Australia, bypassable - 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/', - 'only_matching': True, + }, { + 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', + 'md5': '0b51660361f0e27c9789e7037ef76f4b', + 'info_dict': { + 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', + 'ext': 'mp4', + 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', + 'description': 'md5:f66c890e1490f4910a9953c941dee944', + 'duration': 97, + 'timestamp': 1414489862, + 'upload_date': '20141028', } - ] + }, { + 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', + 'md5': '88e209b417f173d86186bef6e4d1f160', + 'info_dict': { + 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521', + 'ext': 'mp4', + 'title': 'China Moses Is Crazy About the Blues', + 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', + 'duration': 128, + 'timestamp': 1385722202, + 'upload_date': '20131129', + } + }, { + 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', + 'md5': '2a9752f74cb898af5d1083ea9f661b58', + 'info_dict': { + 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', + 'ext': 'mp4', + 'title': '\'True Story\' Trailer', + 'description': 'True Story', + 'duration': 150, + 'timestamp': 1418919206, + 'upload_date': '20141218', + }, + }, { + 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', + 'only_matching': True, + }, { + 'note': 'NBC Sports embeds', + 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', + 'info_dict': { + 'id': '9CsDKds0kvHI', + 'ext': 'flv', + 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', + 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + 'upload_date': '20150313', + 'uploader': 'NBCU-SPORTS', + 'timestamp': 1426270238, + }, + }, { + 'url': 'https://tw.news.yahoo.com/-100120367.html', + 'only_matching': True, + }, { + # Query result is embedded in webpage, but explicit request to video API fails with geo restriction + 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', + 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', + 'info_dict': { + 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', + 'ext': 'mp4', + 'title': 'Communitary - Community Episode 1: Ladders', + 'description': 'md5:8fc39608213295748e1e289807838c97', + 'duration': 1646, + 'timestamp': 1440436550, + 'upload_date': '20150824', + 'series': 'Communitary', + 'season_number': 6, + 'episode_number': 1, + }, + }, { + # ytwnews://cavideo/ + 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', + 'info_dict': { + 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff', + 'ext': 'mp4', + 'title': '單車天使 - 中文版預', + 'description': '中文版預', + 'timestamp': 1476696196, + 'upload_date': '20161017', + }, + 'params': { + 'skip_download': True, + }, + }, { + # Contains both a Yahoo hosted video and multiple Youtube embeds + 'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html', + 'info_dict': { + 'id': '46c5d95a-528f-3d03-b732-732fcadd51de', + 'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead', + 'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.', + }, + 'playlist': [{ + 'info_dict': { + 'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6', + 'ext': 'mp4', + 'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs', + 'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.', + 'timestamp': 1572406500, + 'upload_date': '20191030', + }, + }, { + 'info_dict': { + 'id': '352CFDOQrKg', + 'ext': 'mp4', + 'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019', + 'description': 'md5:35b61e94c2ae214bc965ff4245f80d11', + 'uploader': 'The Voice', + 'uploader_id': 'NBCTheVoice', + 'upload_date': '20191029', + }, + }], + 'params': { + 'playlistend': 2, + }, + }, { + 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html', + 'only_matching': True, + }, { + 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - page_id = mobj.group('id') - display_id = mobj.group('display_id') or page_id - host = mobj.group('host') - webpage, urlh = self._download_webpage_handle(url, display_id) - if 'err=404' in urlh.geturl(): - raise ExtractorError('Video gone', expected=True) - - # Look for iframed media first - entries = [] - iframe_urls = re.findall(r']+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage) - for idx, iframe_url in enumerate(iframe_urls): - entries.append(self.url_result(host + iframe_url, 'Yahoo')) - if entries: - return self.playlist_result(entries, page_id) - - # Look for NBCSports iframes - nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) - if nbc_sports_url: - return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key()) - - # Look for Brightcove Legacy Studio embeds - bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - if bc_url: - return self.url_result(bc_url, BrightcoveLegacyIE.ie_key()) - - def brightcove_url_result(bc_url): - return self.url_result( - smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}), - BrightcoveNewIE.ie_key()) - - # Look for Brightcove New Studio embeds - bc_url = BrightcoveNewIE._extract_url(self, webpage) - if bc_url: - return brightcove_url_result(bc_url) - - brightcove_iframe = self._search_regex( - r'(]+data-video-id=["\']\d+[^>]+>)', webpage, - 'brightcove iframe', default=None) - if brightcove_iframe: - attr = extract_attributes(brightcove_iframe) - src = attr.get('src') - if src: - parsed_src = compat_urlparse.urlparse(src) - qs = compat_urlparse.parse_qs(parsed_src.query) - account_id = qs.get('accountId', ['2376984109001'])[0] - brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0] - if account_id and brightcove_id: - return brightcove_url_result( - 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' - % (account_id, brightcove_id)) - - # Query result is often embedded in webpage as JSON. Sometimes explicit requests - # to video API results in a failure with geo restriction reason therefore using - # embedded query result when present sounds reasonable. - config_json = self._search_regex( - r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:|$)', - webpage, 'videoplayer applet', default=None) - if config_json: - config = self._parse_json(config_json, display_id, fatal=False) - if config: - sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') - if sapi and 'query' in sapi: - info = self._extract_info(display_id, sapi, webpage) - self._sort_formats(info['formats']) - return info - - items_json = self._search_regex( - r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, - default=None) - if items_json is None: - alias = self._search_regex( - r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None) - if alias is not None: - alias_info = self._download_json( - 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias, - display_id, 'Downloading alias info') - video_id = alias_info[0]['id'] - else: - CONTENT_ID_REGEXES = [ - r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', - r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', - r'"first_videoid"\s*:\s*"([^"]+)"', - r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), - r']data-uuid=["\']([^"\']+)', - r']+yahoo://article/view\?.*\buuid=([^&"\']+)', - r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']', - ] - video_id = self._search_regex( - CONTENT_ID_REGEXES, webpage, 'content ID') + url, country, display_id = re.match(self._VALID_URL, url).groups() + if not country: + country = 'us' else: - items = json.loads(items_json) - info = items['mediaItems']['query']['results']['mediaObj'][0] - # The 'meta' field is not always in the video webpage, we request it - # from another page - video_id = info['id'] - return self._get_info(video_id, display_id, webpage) + country = country.split('-')[0] + api_base = 'https://%s.yahoo.com/_td/api/resource/' % country - def _extract_info(self, display_id, query, webpage): - info = query['query']['results']['mediaObj'][0] - meta = info.get('meta') - video_id = info.get('id') + for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]): + content = self._download_json( + api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid, + display_id, 'Downloading content JSON metadata', fatal=i == 1) + if content: + item = content['items'][0] + break - if not meta: - msg = info['status'].get('msg') - if msg: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, msg), expected=True) - raise ExtractorError('Unable to extract media object meta') + if item.get('type') != 'video': + entries = [] + cover = item.get('cover') or {} + if cover.get('type') == 'yvideo': + cover_url = cover.get('url') + if cover_url: + entries.append(self.url_result( + cover_url, 'Yahoo', cover.get('uuid'))) + + for e in item.get('body', []): + if e.get('type') == 'videoIframe': + iframe_url = e.get('url') + if not iframe_url: + continue + entries.append(self.url_result(iframe_url)) + + return self.playlist_result( + entries, item.get('uuid'), + item.get('title'), item.get('summary')) + + video_id = item['uuid'] + video = self._download_json( + api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id, + video_id, 'Downloading video JSON metadata')[0] + title = video['title'] + + if country == 'malaysia': + country = 'my' + + is_live = video.get('live_state') == 'live' + fmts = ('m3u8',) if is_live else ('web', 'mp4') + + urls = [] formats = [] - for s in info['streams']: - tbr = int_or_none(s.get('bitrate')) - format_info = { - 'width': int_or_none(s.get('width')), - 'height': int_or_none(s.get('height')), - 'tbr': tbr, - } - - host = s['host'] - path = s['path'] - if host.startswith('rtmp'): - fmt = 'rtmp' - format_info.update({ - 'url': host, - 'play_path': path, - 'ext': 'flv', - }) - else: - if s.get('format') == 'm3u8_playlist': - fmt = 'hls' - format_info.update({ - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - else: - fmt = format_info['ext'] = determine_ext(path) - format_url = compat_urlparse.urljoin(host, path) - format_info['url'] = format_url - format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '') - formats.append(format_info) - - closed_captions = self._html_search_regex( - r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', - default='[]') - - cc_json = self._parse_json(closed_captions, video_id, fatal=False) subtitles = {} - if cc_json: - for closed_caption in cc_json: - lang = closed_caption['lang'] - if lang not in subtitles: - subtitles[lang] = [] - subtitles[lang].append({ - 'url': closed_caption['url'], - 'ext': mimetype2ext(closed_caption['content_type']), + for fmt in fmts: + media_obj = self._download_json( + 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id, + video_id, 'Downloading %s JSON metadata' % fmt, + headers=self.geo_verification_headers(), query={ + 'format': fmt, + 'region': country.upper(), + })['query']['results']['mediaObj'][0] + msg = media_obj.get('status', {}).get('msg') + + for s in media_obj.get('streams', []): + host = s.get('host') + path = s.get('path') + if not host or not path: + continue + s_url = host + path + if s.get('format') == 'm3u8': + formats.extend(self._extract_m3u8_formats( + s_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + continue + tbr = int_or_none(s.get('bitrate')) + formats.append({ + 'url': s_url, + 'format_id': fmt + ('-%d' % tbr if tbr else ''), + 'width': int_or_none(s.get('width')), + 'height': int_or_none(s.get('height')), + 'tbr': tbr, + 'fps': int_or_none(s.get('framerate')), }) + for cc in media_obj.get('closedcaptions', []): + cc_url = cc.get('url') + if not cc_url or cc_url in urls: + continue + urls.append(cc_url) + subtitles.setdefault(cc.get('lang') or 'en-US', []).append({ + 'url': cc_url, + 'ext': mimetype2ext(cc.get('content_type')), + }) + + streaming_url = video.get('streaming_url') + if streaming_url and not is_live: + formats.extend(self._extract_m3u8_formats( + streaming_url, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + + if not formats and msg == 'geo restricted': + self.raise_geo_restricted() + + self._sort_formats(formats) + + thumbnails = [] + for thumb in video.get('thumbnails', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'id': thumb.get('tag'), + 'url': thumb.get('url'), + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + series_info = video.get('series_info') or {} + return { 'id': video_id, - 'display_id': display_id, - 'title': unescapeHTML(meta['title']), + 'title': self._live_title(title) if is_live else title, 'formats': formats, - 'description': clean_html(meta['description']), - 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), - 'duration': int_or_none(meta.get('duration')), + 'display_id': display_id, + 'thumbnails': thumbnails, + 'description': clean_html(video.get('description')), + 'timestamp': parse_iso8601(video.get('publish_time')), 'subtitles': subtitles, + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('view_count')), + 'is_live': is_live, + 'series': video.get('show_name'), + 'season_number': int_or_none(series_info.get('season_number')), + 'episode_number': int_or_none(series_info.get('episode_number')), } - def _get_info(self, video_id, display_id, webpage): - region = self._search_regex( - r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', - webpage, 'region', fatal=False, default='US').upper() - formats = [] - info = {} - for fmt in ('webm', 'mp4'): - query_result = self._download_json( - 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id, - display_id, 'Downloading %s video info' % fmt, query={ - 'protocol': 'http', - 'region': region, - 'format': fmt, - }) - info = self._extract_info(display_id, query_result, webpage) - formats.extend(info['formats']) - formats.extend(self._extract_m3u8_formats( - 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - info['formats'] = formats - return info - class YahooSearchIE(SearchInfoExtractor): IE_DESC = 'Yahoo screen search' From 8040a0d35e11f7b2bf6d698175ab0b12424d696f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 30 Oct 2019 23:52:09 +0100 Subject: [PATCH 634/785] [yahoo] fix typo --- youtube_dl/extractor/yahoo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index ee68096d0..6c6bd76e8 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -51,10 +51,10 @@ class YahooIE(InfoExtractor): }, }, { 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', - 'md5': '0b51660361f0e27c9789e7037ef76f4b', + 'md5': '71298482f7c64cbb7fa064e4553ff1c1', 'info_dict': { 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', 'description': 'md5:f66c890e1490f4910a9953c941dee944', 'duration': 97, @@ -164,6 +164,7 @@ class YahooIE(InfoExtractor): 'params': { 'playlistend': 2, }, + 'expected_warnings': ['HTTP Error 404'], }, { 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html', 'only_matching': True, @@ -219,7 +220,7 @@ class YahooIE(InfoExtractor): country = 'my' is_live = video.get('live_state') == 'live' - fmts = ('m3u8',) if is_live else ('web', 'mp4') + fmts = ('m3u8',) if is_live else ('webm', 'mp4') urls = [] formats = [] From 237513e801671a51cc45d6a2fe5e7df69517958e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 31 Oct 2019 07:38:53 +0100 Subject: [PATCH 635/785] [yahoo] restore support for cbs suffixed URLs --- test/test_all_urls.py | 6 ------ youtube_dl/extractor/yahoo.py | 5 ++++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 465ce0050..81056a999 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -123,12 +123,6 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) - def test_yahoo_https(self): - # https://github.com/ytdl-org/youtube-dl/issues/2701 - self.assertMatch( - 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', - ['Yahoo']) - def test_no_duplicated_ie_names(self): name_accu = collections.defaultdict(list) for ie in self.ies: diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 6c6bd76e8..f041cf5de 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -25,7 +25,7 @@ from .brightcove import BrightcoveNewIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)\.html)' + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)(?:-[a-z]+)?\.html)' _TESTS = [{ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'info_dict': { @@ -171,6 +171,9 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html', 'only_matching': True, + }, { + 'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html', + 'only_matching': True, }] def _real_extract(self, url): From 3cf70bf1590ce364dc223197ba804cb70e704760 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 31 Oct 2019 07:44:21 +0100 Subject: [PATCH 636/785] [yahoo] make cbs URL suffix part of the media alias --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index f041cf5de..b9a9e88a0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -25,7 +25,7 @@ from .brightcove import BrightcoveNewIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)' _TESTS = [{ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'info_dict': { From e993f1a0959fc04507b1cb2efeb610ae628d6d98 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 31 Oct 2019 08:13:10 +0100 Subject: [PATCH 637/785] [mixcloud] fix cloudcast data extraction(closes #22821) --- youtube_dl/extractor/mixcloud.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bf5353ef9..e5f631506 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -86,9 +86,10 @@ class MixcloudIE(InfoExtractor): r'', webpage, 'play info'), 'play info') for item in full_info_json: - item_data = try_get( - item, lambda x: x['cloudcast']['data']['cloudcastLookup'], - dict) + item_data = try_get(item, [ + lambda x: x['cloudcast']['data']['cloudcastLookup'], + lambda x: x['cloudcastLookup']['data']['cloudcastLookup'], + ], dict) if try_get(item_data, lambda x: x['streamInfo']['url']): info_json = item_data break From 274bf5e4c58bceed4ff8c283d77457bf1cb76d3e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 11:37:41 +0100 Subject: [PATCH 638/785] [kakao] improve extraction - support embed URLs - support Kakao Legacy vid based embed URLs - only extract fields used for extraction - strip description and extract tags --- youtube_dl/extractor/kakao.py | 45 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 7fa140b0c..96f918b75 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -6,14 +6,15 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + strip_or_none, unified_timestamp, update_url_query, ) class KakaoIE(InfoExtractor): - _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P\d+)/cliplink/(?P\d+)' - _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' + _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P\d+|[^?#&]+@my)' + _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor): 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', 'uploader_id': 2653210, - 'uploader': '쇼 음악중심', + 'uploader': '쇼! 음악중심', 'timestamp': 1485684628, 'upload_date': '20170129', } @@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + display_id = video_id.rstrip('@my') + api_base = self._API_BASE_TMPL % video_id player_header = { 'Referer': update_url_query( @@ -55,20 +58,22 @@ class KakaoIE(InfoExtractor): }) } - QUERY_COMMON = { + query = { 'player': 'monet_html5', 'referer': url, 'uuid': '', 'service': 'kakao_tv', 'section': '', 'dteType': 'PC', + 'fields': ','.join([ + '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title', + 'description', 'channelId', 'createTime', 'duration', 'playCount', + 'likeCount', 'commentCount', 'tagList', 'channel', 'name', + 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault']) } - query = QUERY_COMMON.copy() - query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' impress = self._download_json( - '%s/%s/impress' % (self._API_BASE, video_id), - video_id, 'Downloading video info', + api_base + 'impress', display_id, 'Downloading video info', query=query, headers=player_header) clip_link = impress['clipLink'] @@ -78,30 +83,27 @@ class KakaoIE(InfoExtractor): tid = impress.get('tid', '') - query = QUERY_COMMON.copy() query.update({ + 'fields': '-*,outputList,profile,width,height,label,filesize', 'tid': tid, 'profile': 'HIGH', }) raw = self._download_json( - '%s/%s/raw' % (self._API_BASE, video_id), - video_id, 'Downloading video formats info', + api_base + 'raw', display_id, 'Downloading video formats info', query=query, headers=player_header) formats = [] for fmt in raw.get('outputList', []): try: profile_name = fmt['profile'] + query.update({ + 'profile': profile_name, + 'fields': '-*,url', + }) fmt_url_json = self._download_json( - '%s/%s/raw/videolocation' % (self._API_BASE, video_id), - video_id, + api_base + 'raw/videolocation', display_id, 'Downloading video URL for profile %s' % profile_name, - query={ - 'service': 'kakao_tv', - 'section': '', - 'tid': tid, - 'profile': profile_name - }, headers=player_header, fatal=False) + query=query, headers=player_header, fatal=False) if fmt_url_json is None: continue @@ -134,9 +136,9 @@ class KakaoIE(InfoExtractor): }) return { - 'id': video_id, + 'id': display_id, 'title': title, - 'description': clip.get('description'), + 'description': strip_or_none(clip.get('description')), 'uploader': clip_link.get('channel', {}).get('name'), 'uploader_id': clip_link.get('channelId'), 'thumbnails': thumbs, @@ -146,4 +148,5 @@ class KakaoIE(InfoExtractor): 'like_count': int_or_none(clip.get('likeCount')), 'comment_count': int_or_none(clip.get('commentCount')), 'formats': formats, + 'tags': clip.get('tagList'), } From d439989215fcb1672bc2ac18d4fb6206e12c387a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 11:43:18 +0100 Subject: [PATCH 639/785] [daum] fix VOD and Clip extracton(closes #15015) --- youtube_dl/extractor/daum.py | 106 +++++++++++------------------------ 1 file changed, 32 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 76f021892..137095577 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -2,25 +2,21 @@ from __future__ import unicode_literals -import re import itertools from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, compat_urlparse, ) -from ..utils import ( - int_or_none, - str_to_int, - xpath_text, - unescapeHTML, -) -class DaumIE(InfoExtractor): +class DaumBaseIE(InfoExtractor): + _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/' + + +class DaumIE(DaumBaseIE): _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P[^?#&]+)' IE_NAME = 'daum.net' @@ -36,6 +32,9 @@ class DaumIE(InfoExtractor): 'duration': 2117, 'view_count': int, 'comment_count': int, + 'uploader_id': 186139, + 'uploader': '콘간지', + 'timestamp': 1387310323, }, }, { 'url': 'http://m.tvpot.daum.net/v/65139429', @@ -44,11 +43,14 @@ class DaumIE(InfoExtractor): 'ext': 'mp4', 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118', 'description': 'md5:79794514261164ff27e36a21ad229fc5', - 'upload_date': '20150604', + 'upload_date': '20150118', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 154, 'view_count': int, 'comment_count': int, + 'uploader': 'MBC 예능', + 'uploader_id': 132251, + 'timestamp': 1421604228, }, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', @@ -59,12 +61,15 @@ class DaumIE(InfoExtractor): 'id': 'vwIpVpCQsT8$', 'ext': 'flv', 'title': '01-Korean War ( Trouble on the horizon )', - 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름', + 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름', 'upload_date': '20080223', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 249, 'view_count': int, 'comment_count': int, + 'uploader': '까칠한 墮落始祖 황비홍님의', + 'uploader_id': 560824, + 'timestamp': 1203770745, }, }, { # Requires dte_type=WEB (#9972) @@ -73,60 +78,24 @@ class DaumIE(InfoExtractor): 'info_dict': { 'id': 's3794Uf1NZeZ1qMpGpeqeRU', 'ext': 'mp4', - 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611', - 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회', - 'upload_date': '20160611', + 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', + 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', + 'upload_date': '20170129', + 'uploader': '쇼! 음악중심', + 'uploader_id': 2653210, + 'timestamp': 1485684628, }, }] def _real_extract(self, url): video_id = compat_urllib_parse_unquote(self._match_id(url)) - movie_data = self._download_json( - 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json', - video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'}) - - # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid - if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): - return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) - - info = self._download_xml( - 'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id, - 'Downloading video info', query={'vid': video_id}) - - formats = [] - for format_el in movie_data['output_list']['output_list']: - profile = format_el['profile'] - format_query = compat_urllib_parse_urlencode({ - 'vid': video_id, - 'profile': profile, - }) - url_doc = self._download_xml( - 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, - video_id, note='Downloading video data for %s format' % profile) - format_url = url_doc.find('result/url').text - formats.append({ - 'url': format_url, - 'format_id': profile, - 'width': int_or_none(format_el.get('width')), - 'height': int_or_none(format_el.get('height')), - 'filesize': int_or_none(format_el.get('filesize')), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': info.find('TITLE').text, - 'formats': formats, - 'thumbnail': xpath_text(info, 'THUMB_URL'), - 'description': xpath_text(info, 'CONTENTS'), - 'duration': int_or_none(xpath_text(info, 'DURATION')), - 'upload_date': info.find('REGDTTM').text[:8], - 'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')), - 'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')), - } + if not video_id.isdigit(): + video_id += '@my' + return self.url_result( + self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) -class DaumClipIE(InfoExtractor): +class DaumClipIE(DaumBaseIE): _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P\d+)' IE_NAME = 'daum.net:clip' _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s' @@ -142,6 +111,9 @@ class DaumClipIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 3868, 'view_count': int, + 'uploader': 'GOMeXP', + 'uploader_id': 6667, + 'timestamp': 1377911092, }, }, { 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', @@ -154,22 +126,8 @@ class DaumClipIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - clip_info = self._download_json( - 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, - video_id, 'Downloading clip info')['clip_bean'] - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'], - 'title': unescapeHTML(clip_info['title']), - 'thumbnail': clip_info.get('thumb_url'), - 'description': clip_info.get('contents'), - 'duration': int_or_none(clip_info.get('duration')), - 'upload_date': clip_info.get('up_date')[:8], - 'view_count': int_or_none(clip_info.get('play_count')), - 'ie_key': 'Daum', - } + return self.url_result( + self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) class DaumListIE(InfoExtractor): From e987ce4bda476a387937e4af5b46f4a412a67830 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 12:40:41 +0100 Subject: [PATCH 640/785] [kakao] remove raw request and extract format total bitrate --- youtube_dl/extractor/kakao.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 96f918b75..32935bb28 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -69,7 +69,8 @@ class KakaoIE(InfoExtractor): '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title', 'description', 'channelId', 'createTime', 'duration', 'playCount', 'likeCount', 'commentCount', 'tagList', 'channel', 'name', - 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault']) + 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault', + 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label']) } impress = self._download_json( @@ -81,21 +82,14 @@ class KakaoIE(InfoExtractor): title = clip.get('title') or clip_link.get('displayTitle') - tid = impress.get('tid', '') - - query.update({ - 'fields': '-*,outputList,profile,width,height,label,filesize', - 'tid': tid, - 'profile': 'HIGH', - }) - raw = self._download_json( - api_base + 'raw', display_id, 'Downloading video formats info', - query=query, headers=player_header) + query['tid'] = impress.get('tid', '') formats = [] - for fmt in raw.get('outputList', []): + for fmt in clip.get('videoOutputList', []): try: profile_name = fmt['profile'] + if profile_name == 'AUDIO': + continue query.update({ 'profile': profile_name, 'fields': '-*,url', @@ -115,7 +109,8 @@ class KakaoIE(InfoExtractor): 'width': int_or_none(fmt.get('width')), 'height': int_or_none(fmt.get('height')), 'format_note': fmt.get('label'), - 'filesize': int_or_none(fmt.get('filesize')) + 'filesize': int_or_none(fmt.get('filesize')), + 'tbr': int_or_none(fmt.get('kbps')), }) except KeyError: pass From 20cc7c082b82e82050a4e1f1bb815fee51f6c1c2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 16:36:35 +0100 Subject: [PATCH 641/785] [go90] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/go90.py | 149 ----------------------------- 2 files changed, 150 deletions(-) delete mode 100644 youtube_dl/extractor/go90.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5d20ba863..e9b59ce52 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -422,7 +422,6 @@ from .globo import ( GloboArticleIE, ) from .go import GoIE -from .go90 import Go90IE from .godtube import GodTubeIE from .golem import GolemIE from .googledrive import GoogleDriveIE diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py deleted file mode 100644 index c3ea717bc..000000000 --- a/youtube_dl/extractor/go90.py +++ /dev/null @@ -1,149 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - parse_age_limit, - parse_iso8601, -) - - -class Go90IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'https://www.go90.com/videos/84BUqjLpf9D', - 'md5': 'efa7670dbbbf21a7b07b360652b24a32', - 'info_dict': { - 'id': '84BUqjLpf9D', - 'ext': 'mp4', - 'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention', - 'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.', - 'timestamp': 1491868800, - 'upload_date': '20170411', - 'age_limit': 14, - } - }, { - 'url': 'https://www.go90.com/embed/261MflWkD3N', - 'only_matching': True, - }] - _GEO_BYPASS = False - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - headers = self.geo_verification_headers() - headers.update({ - 'Content-Type': 'application/json; charset=utf-8', - }) - video_data = self._download_json( - 'https://www.go90.com/api/view/items/' + video_id, video_id, - headers=headers, data=b'{"client":"web","device_type":"pc"}') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - message = self._parse_json(e.cause.read().decode(), None)['error']['message'] - if 'region unavailable' in message: - self.raise_geo_restricted(countries=['US']) - raise ExtractorError(message, expected=True) - raise - - if video_data.get('requires_drm'): - raise ExtractorError('This video is DRM protected.', expected=True) - main_video_asset = video_data['main_video_asset'] - - episode_number = int_or_none(video_data.get('episode_number')) - series = None - season = None - season_id = None - season_number = None - for metadata in video_data.get('__children', {}).get('Item', {}).values(): - if metadata.get('type') == 'show': - series = metadata.get('title') - elif metadata.get('type') == 'season': - season = metadata.get('title') - season_id = metadata.get('id') - season_number = int_or_none(metadata.get('season_number')) - - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - thumbnails = [] - formats = [] - subtitles = {} - for asset in video_data.get('assets'): - if asset.get('id') == main_video_asset: - for source in asset.get('sources', []): - source_location = source.get('location') - if not source_location: - continue - source_type = source.get('type') - if source_type == 'hls': - m3u8_formats = self._extract_m3u8_formats( - source_location, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - for f in m3u8_formats: - mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url']) - if mobj: - height, tbr = mobj.groups() - height = int_or_none(height) - f.update({ - 'height': f.get('height') or height, - 'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None), - 'tbr': f.get('tbr') or int_or_none(tbr), - }) - formats.extend(m3u8_formats) - elif source_type == 'dash': - formats.extend(self._extract_mpd_formats( - source_location, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': source.get('name'), - 'url': source_location, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - 'tbr': int_or_none(source.get('bitrate')), - }) - - for caption in asset.get('caption_metadata', []): - caption_url = caption.get('source_url') - if not caption_url: - continue - subtitles.setdefault(caption.get('language', 'en'), []).append({ - 'url': caption_url, - 'ext': determine_ext(caption_url, 'vtt'), - }) - elif asset.get('type') == 'image': - asset_location = asset.get('location') - if not asset_location: - continue - thumbnails.append({ - 'url': asset_location, - 'width': int_or_none(asset.get('width')), - 'height': int_or_none(asset.get('height')), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': video_data.get('short_description'), - 'like_count': int_or_none(video_data.get('like_count')), - 'timestamp': parse_iso8601(video_data.get('released_at')), - 'series': series, - 'episode': episode, - 'season': season, - 'season_id': season_id, - 'season_number': season_number, - 'episode_number': episode_number, - 'subtitles': subtitles, - 'age_limit': parse_age_limit(video_data.get('rating')), - } From 152f22920d73bb0dc24fa357d5904a8dd97a5bf6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 17:44:34 +0100 Subject: [PATCH 642/785] [wistia] reduce embed extraction false positives and support inline embeds(closes #22931) --- youtube_dl/extractor/wistia.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index fa142b974..0fbc888ec 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -12,7 +12,7 @@ from ..utils import ( class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]{10})' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -43,25 +43,26 @@ class WistiaIE(InfoExtractor): 'only_matching': True, }] + # https://wistia.com/support/embed-and-share/video-on-your-website @staticmethod def _extract_url(webpage): match = re.search( - r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) + r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage) if match: return unescapeHTML(match.group('url')) - match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) - if match: - return 'wistia:%s' % match.group('id') - match = re.search( r'''(?sx) ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? - ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]{10})\b.*?\2 ''', webpage) if match: return 'wistia:%s' % match.group('id') + match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P[a-z0-9]{10})', webpage) + if match: + return 'wistia:%s' % match.group('id') + def _real_extract(self, url): video_id = self._match_id(url) From 4c95fcf9e8fa2ed113698d13df55df4aaecd8433 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 21:16:47 +0100 Subject: [PATCH 643/785] [bambuser] remove extractor https://web.archive.org/web/20190808014227/https://go.bambuser.com/shutdown-announcement --- youtube_dl/extractor/bambuser.py | 142 ----------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 143 deletions(-) delete mode 100644 youtube_dl/extractor/bambuser.py diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py deleted file mode 100644 index 4400ff9c1..000000000 --- a/youtube_dl/extractor/bambuser.py +++ /dev/null @@ -1,142 +0,0 @@ -from __future__ import unicode_literals - -import re -import itertools - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - sanitized_Request, - urlencode_postdata, -) - - -class BambuserIE(InfoExtractor): - IE_NAME = 'bambuser' - _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' - _API_KEY = '005f64509e19a868399060af746a00aa' - _LOGIN_URL = 'https://bambuser.com/user' - _NETRC_MACHINE = 'bambuser' - - _TEST = { - 'url': 'http://bambuser.com/v/4050584', - # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388 - # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641', - 'info_dict': { - 'id': '4050584', - 'ext': 'flv', - 'title': 'Education engineering days - lightning talks', - 'duration': 3741, - 'uploader': 'pixelversity', - 'uploader_id': '344706', - 'timestamp': 1382976692, - 'upload_date': '20131028', - 'view_count': int, - }, - 'params': { - # It doesn't respect the 'Range' header, it would download the whole video - # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59 - 'skip_download': True, - }, - } - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_form = { - 'form_id': 'user_login', - 'op': 'Log in', - 'name': username, - 'pass': password, - } - - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Referer', self._LOGIN_URL) - response = self._download_webpage( - request, None, 'Logging in') - - login_error = self._html_search_regex( - r'(?s)
    (.+?)
    ', - response, 'login error', default=None) - if login_error: - raise ExtractorError( - 'Unable to login: %s' % login_error, expected=True) - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - video_id = self._match_id(url) - - info = self._download_json( - 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s' - % (self._API_KEY, video_id), video_id) - - error = info.get('error') - if error: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error), expected=True) - - result = info['result'] - - return { - 'id': video_id, - 'title': result['title'], - 'url': result['url'], - 'thumbnail': result.get('preview'), - 'duration': int_or_none(result.get('length')), - 'uploader': result.get('username'), - 'uploader_id': compat_str(result.get('owner', {}).get('uid')), - 'timestamp': int_or_none(result.get('created')), - 'fps': float_or_none(result.get('framerate')), - 'view_count': int_or_none(result.get('views_total')), - 'comment_count': int_or_none(result.get('comment_count')), - } - - -class BambuserChannelIE(InfoExtractor): - IE_NAME = 'bambuser:channel' - _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' - # The maximum number we can get with each request - _STEP = 50 - _TEST = { - 'url': 'http://bambuser.com/channel/pixelversity', - 'info_dict': { - 'title': 'pixelversity', - }, - 'playlist_mincount': 60, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user = mobj.group('user') - urls = [] - last_id = '' - for i in itertools.count(1): - req_url = ( - 'http://bambuser.com/xhr-api/index.php?username={user}' - '&sort=created&access_mode=0%2C1%2C2&limit={count}' - '&method=broadcast&format=json&vid_older_than={last}' - ).format(user=user, count=self._STEP, last=last_id) - req = sanitized_Request(req_url) - # Without setting this header, we wouldn't get any result - req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) - data = self._download_json( - req, user, 'Downloading page %d' % i) - results = data['result'] - if not results: - break - last_id = results[-1]['vid'] - urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) - - return { - '_type': 'playlist', - 'title': user, - 'entries': urls, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e9b59ce52..af3fff601 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -80,7 +80,6 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bbc import ( BBCCoUkIE, From 836bfcb54e4d1664815ebffb753a9dc7c9c7d72c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 2 Nov 2019 11:08:51 +0100 Subject: [PATCH 644/785] [flipagram] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/flipagram.py | 115 ----------------------------- 2 files changed, 116 deletions(-) delete mode 100644 youtube_dl/extractor/flipagram.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index af3fff601..33fb461a0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -355,7 +355,6 @@ from .firsttv import FirstTVIE from .fivemin import FiveMinIE from .fivetv import FiveTVIE from .flickr import FlickrIE -from .flipagram import FlipagramIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py deleted file mode 100644 index b7be40f1b..000000000 --- a/youtube_dl/extractor/flipagram.py +++ /dev/null @@ -1,115 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - float_or_none, - try_get, - unified_timestamp, -) - - -class FlipagramIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P[^/?#&]+)' - _TEST = { - 'url': 'https://flipagram.com/f/nyvTSJMKId', - 'md5': '888dcf08b7ea671381f00fab74692755', - 'info_dict': { - 'id': 'nyvTSJMKId', - 'ext': 'mp4', - 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', - 'description': 'md5:d55e32edc55261cae96a41fa85ff630e', - 'duration': 35.571, - 'timestamp': 1461244995, - 'upload_date': '20160421', - 'uploader': 'kitty juria', - 'uploader_id': 'sjuria101', - 'creator': 'kitty juria', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - 'comments': list, - 'formats': 'mincount:2', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_data = self._parse_json( - self._search_regex( - r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'), - video_id) - - flipagram = video_data['flipagram'] - video = flipagram['video'] - - json_ld = self._search_json_ld(webpage, video_id, default={}) - title = json_ld.get('title') or flipagram['captionText'] - description = json_ld.get('description') or flipagram.get('captionText') - - formats = [{ - 'url': video['url'], - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'filesize': int_or_none(video_data.get('size')), - }] - - preview_url = try_get( - flipagram, lambda x: x['music']['track']['previewUrl'], compat_str) - if preview_url: - formats.append({ - 'url': preview_url, - 'ext': 'm4a', - 'vcodec': 'none', - }) - - self._sort_formats(formats) - - counts = flipagram.get('counts', {}) - user = flipagram.get('user', {}) - video_data = flipagram.get('video', {}) - - thumbnails = [{ - 'url': self._proto_relative_url(cover['url']), - 'width': int_or_none(cover.get('width')), - 'height': int_or_none(cover.get('height')), - 'filesize': int_or_none(cover.get('size')), - } for cover in flipagram.get('covers', []) if cover.get('url')] - - # Note that this only retrieves comments that are initially loaded. - # For videos with large amounts of comments, most won't be retrieved. - comments = [] - for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): - text = comment.get('comment') - if not text or not isinstance(text, list): - continue - comments.append({ - 'author': comment.get('user', {}).get('name'), - 'author_id': comment.get('user', {}).get('username'), - 'id': comment.get('id'), - 'text': text[0], - 'timestamp': unified_timestamp(comment.get('created')), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': float_or_none(flipagram.get('duration'), 1000), - 'thumbnails': thumbnails, - 'timestamp': unified_timestamp(flipagram.get('iso8601Created')), - 'uploader': user.get('name'), - 'uploader_id': user.get('username'), - 'creator': user.get('name'), - 'view_count': int_or_none(counts.get('plays')), - 'like_count': int_or_none(counts.get('likes')), - 'repost_count': int_or_none(counts.get('reflips')), - 'comment_count': int_or_none(counts.get('comments')), - 'comments': comments, - 'formats': formats, - } From 79b35e7c15f4a285525b5ec52035ff0f8fc6150d Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Sat, 2 Nov 2019 18:32:49 +0700 Subject: [PATCH 645/785] [gameone] Remove extractor (#21778) --- youtube_dl/extractor/extractors.py | 4 - youtube_dl/extractor/gameone.py | 134 ----------------------------- 2 files changed, 138 deletions(-) delete mode 100644 youtube_dl/extractor/gameone.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 33fb461a0..dce08e077 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -401,10 +401,6 @@ from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE from .gameinformer import GameInformerIE -from .gameone import ( - GameOneIE, - GameOnePlaylistIE, -) from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py deleted file mode 100644 index a07d69841..000000000 --- a/youtube_dl/extractor/gameone.py +++ /dev/null @@ -1,134 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - xpath_with_ns, - parse_iso8601, - float_or_none, - int_or_none, -) - -NAMESPACE_MAP = { - 'media': 'http://search.yahoo.com/mrss/', -} - -# URL prefix to download the mp4 files directly instead of streaming via rtmp -# Credits go to XBox-Maniac -# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 -RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' - - -class GameOneIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' - _TESTS = [ - { - 'url': 'http://www.gameone.de/tv/288', - 'md5': '136656b7fb4c9cb4a8e2d500651c499b', - 'info_dict': { - 'id': '288', - 'ext': 'mp4', - 'title': 'Game One - Folge 288', - 'duration': 1238, - 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', - 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', - 'age_limit': 16, - 'upload_date': '20140513', - 'timestamp': 1399980122, - } - }, - { - 'url': 'http://gameone.de/tv/220', - 'md5': '5227ca74c4ae6b5f74c0510a7c48839e', - 'info_dict': { - 'id': '220', - 'ext': 'mp4', - 'upload_date': '20120918', - 'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker', - 'timestamp': 1347971451, - 'title': 'Game One - Folge 220', - 'duration': 896.62, - 'age_limit': 16, - } - } - - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage, secure=False) - description = self._html_search_meta('description', webpage) - age_limit = int( - self._search_regex( - r'age=(\d+)', - self._html_search_meta( - 'age-de-meta-label', - webpage), - 'age_limit', - '0')) - mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') - - mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') - title = mrss.find('.//item/title').text - thumbnail = mrss.find('.//item/image').get('url') - timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') - content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) - content_url = content.get('url') - - content = self._download_xml( - content_url, - video_id, - 'Downloading media:content') - rendition_items = content.findall('.//rendition') - duration = float_or_none(rendition_items[0].get('duration')) - formats = [ - { - 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), - 'width': int_or_none(r.get('width')), - 'height': int_or_none(r.get('height')), - 'tbr': int_or_none(r.get('bitrate')), - } - for r in rendition_items - ] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'description': description, - 'age_limit': age_limit, - 'timestamp': timestamp, - } - - -class GameOnePlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$' - IE_NAME = 'gameone:playlist' - _TEST = { - 'url': 'http://www.gameone.de/tv', - 'info_dict': { - 'title': 'GameOne', - }, - 'playlist_mincount': 294, - } - - def _real_extract(self, url): - webpage = self._download_webpage('http://www.gameone.de/tv', 'TV') - max_id = max(map(int, re.findall(r' Date: Sat, 2 Nov 2019 13:09:44 +0100 Subject: [PATCH 646/785] [keek] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/keek.py | 39 ------------------------------ 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/keek.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dce08e077..08facf8d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -515,7 +515,6 @@ from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .kinopoisk import KinoPoiskIE -from .keek import KeekIE from .konserthusetplay import KonserthusetPlayIE from .kontrtube import KontrTubeIE from .krasview import KrasViewIE diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py deleted file mode 100644 index 94a03d277..000000000 --- a/youtube_dl/extractor/keek.py +++ /dev/null @@ -1,39 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class KeekIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P\w+)' - IE_NAME = 'keek' - _TEST = { - 'url': 'https://www.keek.com/keek/NODfbab', - 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', - 'info_dict': { - 'id': 'NODfbab', - 'ext': 'mp4', - 'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', - 'uploader': 'ytdl', - 'uploader_id': 'eGT5bab', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - return { - 'id': video_id, - 'url': self._og_search_video_url(webpage), - 'ext': 'mp4', - 'title': self._og_search_description(webpage).strip(), - 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader': self._search_regex( - r'data-username=(["\'])(?P.+?)\1', webpage, - 'uploader', fatal=False, group='uploader'), - 'uploader_id': self._search_regex( - r'data-user-id=(["\'])(?P.+?)\1', webpage, - 'uploader id', fatal=False, group='uploader_id'), - } From 5e36b63486794750aca0ee6b9b83f27abf6332dc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 2 Nov 2019 13:25:39 +0100 Subject: [PATCH 647/785] [iconosquare] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/iconosquare.py | 85 ----------------------------- 2 files changed, 86 deletions(-) delete mode 100644 youtube_dl/extractor/iconosquare.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 08facf8d3..dd5f68ca3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -455,7 +455,6 @@ from .hungama import ( HungamaSongIE, ) from .hypem import HypemIE -from .iconosquare import IconosquareIE from .ign import ( IGNIE, OneUPIE, diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py deleted file mode 100644 index a39f422e9..000000000 --- a/youtube_dl/extractor/iconosquare.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - get_element_by_id, - remove_end, -) - - -class IconosquareIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P[^/]+)' - _TEST = { - 'url': 'http://statigr.am/p/522207370455279102_24101272', - 'md5': '6eb93b882a3ded7c378ee1d6884b1814', - 'info_dict': { - 'id': '522207370455279102_24101272', - 'ext': 'mp4', - 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', - 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', - 'timestamp': 1376471991, - 'upload_date': '20130814', - 'uploader': 'aguynamedpatrick', - 'uploader_id': '24101272', - 'comment_count': int, - 'like_count': int, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - media = self._parse_json( - get_element_by_id('mediaJson', webpage), - video_id) - - formats = [{ - 'url': f['url'], - 'format_id': format_id, - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')) - } for format_id, f in media['videos'].items()] - self._sort_formats(formats) - - title = remove_end(self._og_search_title(webpage), ' - via Iconosquare') - - timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) - description = media.get('caption', {}).get('text') - - uploader = media.get('user', {}).get('username') - uploader_id = media.get('user', {}).get('id') - - comment_count = int_or_none(media.get('comments', {}).get('count')) - like_count = int_or_none(media.get('likes', {}).get('count')) - - thumbnails = [{ - 'url': t['url'], - 'id': thumbnail_id, - 'width': int_or_none(t.get('width')), - 'height': int_or_none(t.get('height')) - } for thumbnail_id, t in media.get('images', {}).items()] - - comments = [{ - 'id': comment.get('id'), - 'text': comment['text'], - 'timestamp': int_or_none(comment.get('created_time')), - 'author': comment.get('from', {}).get('full_name'), - 'author_id': comment.get('from', {}).get('username'), - } for comment in media.get('comments', {}).get('data', []) if 'text' in comment] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'comment_count': comment_count, - 'like_count': like_count, - 'formats': formats, - 'comments': comments, - } From e54924c46fac6a9745868424dc14011da2572178 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 2 Nov 2019 18:13:31 +0100 Subject: [PATCH 648/785] [stv] fix extraction(closes #22928) --- youtube_dl/extractor/stv.py | 89 +++++++++++++------------------------ 1 file changed, 31 insertions(+), 58 deletions(-) diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py index ccb074cd4..bae8b71f4 100644 --- a/youtube_dl/extractor/stv.py +++ b/youtube_dl/extractor/stv.py @@ -4,15 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse -) from ..utils import ( - extract_attributes, + compat_str, float_or_none, int_or_none, - str_or_none, ) @@ -20,20 +15,20 @@ class STVPlayerIE(InfoExtractor): IE_NAME = 'stv:player' _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' _TEST = { - 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', - 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/', + 'md5': '5adf9439c31d554f8be0707c7abe7e0a', 'info_dict': { - 'id': '6016487034001', + 'id': '5333973339001', 'ext': 'mp4', - 'upload_date': '20190321', - 'title': 'Interview with the cast ahead of new Victoria', - 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', - 'timestamp': 1553179628, + 'upload_date': '20170301', + 'title': '60 seconds on set with Laura Norton', + 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!", + 'timestamp': 1488388054, 'uploader_id': '1486976045', }, 'skip': 'this resource is unavailable outside of the UK', } - _PUBLISHER_ID = '1486976045' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s' _PTYPE_MAP = { 'episode': 'episodes', 'video': 'shortform', @@ -41,54 +36,32 @@ class STVPlayerIE(InfoExtractor): def _real_extract(self, url): ptype, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id), + video_id) - qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( - r'itemprop="embedURL"[^>]+href="([^"]+)', - webpage, 'embed URL', default=None)).query) - publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + result = resp['results'] + video = result['video'] + video_id = compat_str(video['id']) - player_attr = extract_attributes(self._search_regex( - r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) - info = {} - duration = ref_id = series = video_id = None - api_ref_id = player_attr.get('data-player-api-refid') - if api_ref_id: - resp = self._download_json( - 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), - api_ref_id, fatal=False) - if resp: - result = resp.get('results') or {} - video = result.get('video') or {} - video_id = str_or_none(video.get('id')) - ref_id = video.get('guid') - duration = video.get('length') - programme = result.get('programme') or {} - series = programme.get('name') or programme.get('shortName') - subtitles = {} - _subtitles = result.get('_subtitles') or {} - for ext, sub_url in _subtitles.items(): - subtitles.setdefault('en', []).append({ - 'ext': 'vtt' if ext == 'webvtt' else ext, - 'url': sub_url, - }) - info.update({ - 'description': result.get('summary'), - 'subtitles': subtitles, - 'view_count': int_or_none(result.get('views')), - }) - if not video_id: - video_id = qs.get('videoId', [None])[0] or self._search_regex( - r' Date: Sat, 2 Nov 2019 22:33:51 +0100 Subject: [PATCH 649/785] [bellmedia] add support for marilyn.ca videos(#22193) --- youtube_dl/extractor/bellmedia.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index f36a2452d..485173774 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -22,7 +22,8 @@ class BellMediaIE(InfoExtractor): bravo| mtv| space| - etalk + etalk| + marilyn )\.ca| much\.com )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' @@ -70,6 +71,7 @@ class BellMediaIE(InfoExtractor): 'animalplanet': 'aniplan', 'etalk': 'ctv', 'bnnbloomberg': 'bnn', + 'marilyn': 'ctv_marilyn', } def _real_extract(self, url): From 564275e26fc963fb920236e37c6c19e8e2b046f0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 3 Nov 2019 22:04:03 +0100 Subject: [PATCH 650/785] [telegraaf] fix extraction --- youtube_dl/extractor/telegraaf.py | 75 ++++++++++++++++++------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py index 0f576c1ab..2dc020537 100644 --- a/youtube_dl/extractor/telegraaf.py +++ b/youtube_dl/extractor/telegraaf.py @@ -4,21 +4,25 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( determine_ext, - remove_end, + int_or_none, + parse_iso8601, + try_get, ) class TelegraafIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P\d+)/[^/]+\.html' + _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P\d+)' _TEST = { - 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', + 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los', 'info_dict': { - 'id': '24353229', + 'id': 'gaMItuoSeUg2', 'ext': 'mp4', - 'title': 'Tikibad ontruimd wegens brand', - 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 33, + 'title': 'Historisch scheepswrak slaat na 100 jaar los', + 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 55, + 'timestamp': 1572805527, + 'upload_date': '20191103', }, 'params': { # m3u8 download @@ -27,23 +31,30 @@ class TelegraafIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + article_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_id = self._download_json( + 'https://www.telegraaf.nl/graphql', article_id, query={ + 'query': '''{ + article(uid: %s) { + videos { + videoId + } + } +}''' % article_id, + })['data']['article']['videos'][0]['videoId'] - player_url = self._html_search_regex( - r']+src="([^"]+")', webpage, 'player URL') - player_page = self._download_webpage( - player_url, video_id, note='Download player webpage') - playlist_url = self._search_regex( - r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL') - playlist_data = self._download_json(playlist_url, video_id) + item = self._download_json( + 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id, + video_id)['items'][0] + title = item['title'] - item = playlist_data['items'][0] formats = [] - locations = item['locations'] + locations = item.get('locations') or {} for location in locations.get('adaptive', []): - manifest_url = location['src'] + manifest_url = location.get('src') + if not manifest_url: + continue ext = determine_ext(manifest_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -54,25 +65,25 @@ class TelegraafIE(InfoExtractor): else: self.report_warning('Unknown adaptive format %s' % ext) for location in locations.get('progressive', []): + src = try_get(location, lambda x: x['sources'][0]['src']) + if not src: + continue + label = location.get('label') formats.append({ - 'url': location['sources'][0]['src'], - 'width': location.get('width'), - 'height': location.get('height'), - 'format_id': 'http-%s' % location['label'], + 'url': src, + 'width': int_or_none(location.get('width')), + 'height': int_or_none(location.get('height')), + 'format_id': 'http' + ('-%s' % label if label else ''), }) self._sort_formats(formats) - title = remove_end(self._og_search_title(webpage), ' - VIDEO') - description = self._og_search_description(webpage) - duration = item.get('duration') - thumbnail = item.get('poster') - return { 'id': video_id, 'title': title, - 'description': description, + 'description': item.get('description'), 'formats': formats, - 'duration': duration, - 'thumbnail': thumbnail, + 'duration': int_or_none(item.get('duration')), + 'thumbnail': item.get('poster'), + 'timestamp': parse_iso8601(item.get('datecreated'), ' '), } From a6e6673e825f6225c3a316b164ddca03fd20b5d2 Mon Sep 17 00:00:00 2001 From: Manu Cornet Date: Sun, 3 Nov 2019 21:23:27 +0000 Subject: [PATCH 651/785] [README.md] Also read permission to the binary in how to update section (#22903) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c39b13616..01f975958 100644 --- a/README.md +++ b/README.md @@ -752,8 +752,8 @@ As a last resort, you can also uninstall the version installed by your package m Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html): ``` -sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl -sudo chmod a+x /usr/local/bin/youtube-dl +sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl +sudo chmod a+rx /usr/local/bin/youtube-dl hash -r ``` From ef382405c6dc79d2b7e3f81a527232941e2c0b2d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Nov 2019 02:01:01 +0100 Subject: [PATCH 652/785] [mediaset] extract unprotected M3U and MPD manifests(closes #17204) --- youtube_dl/extractor/mediaset.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index df3748798..fcbc064ff 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -62,7 +62,6 @@ class MediasetIE(ThePlatformBaseIE): 'uploader': 'Canale 5', 'uploader_id': 'C5', }, - 'expected_warnings': ['HTTP Error 403: Forbidden'], }, { # clip 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', @@ -109,6 +108,11 @@ class MediasetIE(ThePlatformBaseIE): entries.append(embed_url) return entries + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): + for video in smil.findall(self._xpath_ns('.//video', namespace)): + video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src']) + return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url) + def _real_extract(self, url): guid = self._match_id(url) tp_path = 'PR1GhC/media/guid/2702976343/' + guid @@ -118,14 +122,15 @@ class MediasetIE(ThePlatformBaseIE): subtitles = {} first_e = None for asset_type in ('SD', 'HD'): - for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): + # TODO: fixup ISM+none manifest URLs + for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'): try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { 'mbr': 'true', 'formats': f, 'assetTypes': asset_type, - }), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) + }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type)) except ExtractorError as e: if not first_e: first_e = e From bf45295c5387d0d90b97ca34d377cdaa07c71bcb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Nov 2019 11:13:14 +0100 Subject: [PATCH 653/785] [mediaset] relax URL guid matching(closes #18352) --- youtube_dl/extractor/mediaset.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index fcbc064ff..f976506f4 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -27,7 +27,7 @@ class MediasetIE(ThePlatformBaseIE): (?:video|on-demand)/(?:[^/]+/)+[^/]+_| player/index\.html\?.*?\bprogramGuid= ) - )(?P[0-9A-Z]{16}) + )(?P[0-9A-Z]{16,}) ''' _TESTS = [{ # full episode @@ -77,6 +77,18 @@ class MediasetIE(ThePlatformBaseIE): }, { 'url': 'mediaset:FAFU000000665924', 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295', + 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02', + 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01', + 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135', + 'only_matching': True, }] @staticmethod From e452345fc5cee5e79d2cad6be575da563987a4ff Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Nov 2019 15:43:52 +0100 Subject: [PATCH 654/785] [jamendo] improve extraction - fix album extraction(closes #18564) - improve metadata extraction(closes #18565)(closes #21379) --- youtube_dl/extractor/jamendo.py | 162 +++++++++++++++++++------------- 1 file changed, 99 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index c21827618..12e21eb6f 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -1,38 +1,26 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import hashlib +import random -from ..compat import compat_urlparse +from ..compat import compat_str from .common import InfoExtractor -from ..utils import parse_duration +from ..utils import ( + clean_html, + int_or_none, + try_get, +) -class JamendoBaseIE(InfoExtractor): - def _extract_meta(self, webpage, fatal=True): - title = self._og_search_title( - webpage, default=None) or self._search_regex( - r'([^<]+)', webpage, - 'title', default=None) - if title: - title = self._search_regex( - r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) - if not title: - title = self._html_search_meta( - 'name', webpage, 'title', fatal=fatal) - mobj = re.search(r'(.+) - (.+)', title or '') - artist, second = mobj.groups() if mobj else [None] * 2 - return title, artist, second - - -class JamendoIE(JamendoBaseIE): +class JamendoIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: licensing\.jamendo\.com/[^/]+| (?:www\.)?jamendo\.com ) - /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) + /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))? ''' _TESTS = [{ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', @@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE): 'artist': 'Maya Filipič', 'track': 'Stories from Emona I', 'duration': 210, - 'thumbnail': r're:^https?://.*\.jpg' + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1217438117, + 'upload_date': '20080730', } }, { 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', @@ -53,15 +43,19 @@ class JamendoIE(JamendoBaseIE): }] def _real_extract(self, url): - mobj = self._VALID_URL_RE.match(url) - track_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage( - 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), - display_id) - - title, artist, track = self._extract_meta(webpage) + track_id, display_id = self._VALID_URL_RE.match(url).groups() + webpage = self._download_webpage(url, track_id) + models = self._parse_json(self._html_search_regex( + r"data-bundled-models='([^']+)", + webpage, 'bundled models'), track_id) + track = models['track']['models'][0] + title = track_name = track['name'] + get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {} + artist = get_model('artist') + artist_name = artist.get('name') + if artist_name: + title = '%s - %s' % (artist_name, title) + album = get_model('album') formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -77,31 +71,58 @@ class JamendoIE(JamendoBaseIE): ))] self._sort_formats(formats) - thumbnail = self._html_search_meta( - 'image', webpage, 'thumbnail', fatal=False) - duration = parse_duration(self._search_regex( - r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', - webpage, 'duration', fatal=False)) + urls = [] + thumbnails = [] + for _, covers in track.get('cover', {}).items(): + for cover_id, cover_url in covers.items(): + if not cover_url or cover_url in urls: + continue + urls.append(cover_url) + size = int_or_none(cover_id.lstrip('size')) + thumbnails.append({ + 'id': cover_id, + 'url': cover_url, + 'width': size, + 'height': size, + }) + + tags = [] + for tag in track.get('tags', []): + tag_name = tag.get('name') + if not tag_name: + continue + tags.append(tag_name) + + stats = track.get('stats') or {} return { 'id': track_id, 'display_id': display_id, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'title': title, - 'duration': duration, - 'artist': artist, - 'track': track, - 'formats': formats + 'description': track.get('description'), + 'duration': int_or_none(track.get('duration')), + 'artist': artist_name, + 'track': track_name, + 'album': album.get('name'), + 'formats': formats, + 'license': '-'.join(track.get('licenseCC', [])) or None, + 'timestamp': int_or_none(track.get('dateCreated')), + 'view_count': int_or_none(stats.get('listenedAll')), + 'like_count': int_or_none(stats.get('favorited')), + 'average_rating': int_or_none(stats.get('averageNote')), + 'tags': tags, } -class JamendoAlbumIE(JamendoBaseIE): - _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' +class JamendoAlbumIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)' _TEST = { 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 'info_dict': { 'id': '121486', - 'title': 'Shearer - Duck On Cover' + 'title': 'Duck On Cover', + 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239', }, 'playlist': [{ 'md5': 'e1a2fcb42bda30dfac990212924149a8', @@ -111,6 +132,8 @@ class JamendoAlbumIE(JamendoBaseIE): 'title': 'Shearer - Warmachine', 'artist': 'Shearer', 'track': 'Warmachine', + 'timestamp': 1368089771, + 'upload_date': '20130509', } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', @@ -120,6 +143,8 @@ class JamendoAlbumIE(JamendoBaseIE): 'title': 'Shearer - Without Your Ghost', 'artist': 'Shearer', 'track': 'Without Your Ghost', + 'timestamp': 1368089771, + 'upload_date': '20130509', } }], 'params': { @@ -127,24 +152,35 @@ class JamendoAlbumIE(JamendoBaseIE): } } + def _call_api(self, resource, resource_id): + path = '/api/%ss' % resource + rand = compat_str(random.random()) + return self._download_json( + 'https://www.jamendo.com' + path, resource_id, query={ + 'id[]': resource_id, + }, headers={ + 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) + })[0] + def _real_extract(self, url): - mobj = self._VALID_URL_RE.match(url) - album_id = mobj.group('id') + album_id = self._match_id(url) + album = self._call_api('album', album_id) + album_name = album.get('name') - webpage = self._download_webpage(url, mobj.group('display_id')) + entries = [] + for track in album.get('tracks', []): + track_id = track.get('id') + if not track_id: + continue + track_id = compat_str(track_id) + entries.append({ + '_type': 'url_transparent', + 'url': 'https://www.jamendo.com/track/' + track_id, + 'ie_key': JamendoIE.ie_key(), + 'id': track_id, + 'album': album_name, + }) - title, artist, album = self._extract_meta(webpage, fatal=False) - - entries = [{ - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, m.group('path')), - 'ie_key': JamendoIE.ie_key(), - 'id': self._search_regex( - r'/track/(\d+)', m.group('path'), 'track id', default=None), - 'artist': artist, - 'album': album, - } for m in re.finditer( - r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', - webpage)] - - return self.playlist_result(entries, album_id, title) + return self.playlist_result( + entries, album_id, album_name, + clean_html(try_get(album, lambda x: x['description']['en'], compat_str))) From 2349255abdf822e0bb9508d510db926cae777f8c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 4 Nov 2019 15:51:44 +0100 Subject: [PATCH 655/785] [jamendo] restore track url modification --- youtube_dl/extractor/jamendo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 12e21eb6f..490efa8fb 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -44,7 +44,8 @@ class JamendoIE(InfoExtractor): def _real_extract(self, url): track_id, display_id = self._VALID_URL_RE.match(url).groups() - webpage = self._download_webpage(url, track_id) + webpage = self._download_webpage( + 'https://www.jamendo.com/track/' + track_id, track_id) models = self._parse_json(self._html_search_regex( r"data-bundled-models='([^']+)", webpage, 'bundled models'), track_id) From 3e4908360417bc29e1446bfa85145193fa2c8462 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 4 Nov 2019 20:05:27 +0100 Subject: [PATCH 656/785] [myspass] fix video URL extraction and improve metadata extraction(closes #22448) --- youtube_dl/extractor/myspass.py | 75 +++++++++++++-------------------- 1 file changed, 29 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 2afe535b5..db7ebc94c 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -1,73 +1,56 @@ +# coding: utf-8 from __future__ import unicode_literals -import os.path + +import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( - ExtractorError, + int_or_none, + parse_duration, + xpath_text, ) class MySpassIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' + _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', 'info_dict': { 'id': '11741', 'ext': 'mp4', - 'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?', - 'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2', + 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?', + 'title': '17.02.2013 - Die Highlights, Teil 2', }, } def _real_extract(self, url): - META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' + video_id = self._match_id(url) - # video id is the last path element of the URL - # usually there is a trailing slash, so also try the second but last - url_path = compat_urllib_parse_urlparse(url).path - url_parent_path, video_id = os.path.split(url_path) - if not video_id: - _, video_id = os.path.split(url_parent_path) - - # get metadata - metadata_url = META_DATA_URL_TEMPLATE % video_id metadata = self._download_xml( - metadata_url, video_id, transform_source=lambda s: s.strip()) + 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id, + video_id) - # extract values from metadata - url_flv_el = metadata.find('url_flv') - if url_flv_el is None: - raise ExtractorError('Unable to extract download url') - video_url = url_flv_el.text - title_el = metadata.find('title') - if title_el is None: - raise ExtractorError('Unable to extract title') - title = title_el.text - format_id_el = metadata.find('format_id') - if format_id_el is None: - format = 'mp4' - else: - format = format_id_el.text - description_el = metadata.find('description') - if description_el is not None: - description = description_el.text - else: - description = None - imagePreview_el = metadata.find('imagePreview') - if imagePreview_el is not None: - thumbnail = imagePreview_el.text - else: - thumbnail = None + title = xpath_text(metadata, 'title', fatal=True) + video_url = xpath_text(metadata, 'url_flv', 'download url', True) + video_id_int = int(video_id) + for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups(): + group_int = int(group) + if group_int > video_id_int: + video_url = video_url.replace( + group, compat_str(group_int // video_id_int)) return { 'id': video_id, 'url': video_url, 'title': title, - 'format': format, - 'thumbnail': thumbnail, - 'description': description, + 'thumbnail': xpath_text(metadata, 'imagePreview'), + 'description': xpath_text(metadata, 'description'), + 'duration': parse_duration(xpath_text(metadata, 'duration')), + 'series': xpath_text(metadata, 'format'), + 'season_number': int_or_none(xpath_text(metadata, 'season')), + 'season_id': xpath_text(metadata, 'season_id'), + 'episode': title, + 'episode_number': int_or_none(xpath_text(metadata, 'episode')), } From c69e71733d9619cb1a2bee769b9a381b52901de3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 4 Nov 2019 22:21:00 +0100 Subject: [PATCH 657/785] [msn] add support for Vidible and AOL embeds(closes #22195)(closes #22227) --- youtube_dl/extractor/msn.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index 0460cf4d5..0c3813dda 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -41,6 +41,14 @@ class MSNIE(InfoExtractor): }, { 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6', 'only_matching': True, + }, { + # Vidible(AOL) Embed + 'url': 'https://www.msn.com/en-us/video/animals/yellowstone-park-staffers-catch-deer-engaged-in-behavior-they-cant-explain/vi-AAGfdg1', + 'only_matching': True, + }, { + # Dailymotion Embed + 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L', + 'only_matching': True, }] def _real_extract(self, url): @@ -61,6 +69,18 @@ class MSNIE(InfoExtractor): webpage, 'error', group='error')) raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + player_name = video.get('playerName') + if player_name: + provider_id = video.get('providerId') + if provider_id: + if player_name == 'AOL': + return self.url_result( + 'aol-video:' + provider_id, 'Aol', provider_id) + elif player_name == 'Dailymotion': + return self.url_result( + 'https://www.dailymotion.com/video/' + provider_id, + 'Dailymotion', provider_id) + title = video['title'] formats = [] From 20218040db2b1e063191cc470ce403d35d394e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Nov 2019 05:21:16 +0700 Subject: [PATCH 658/785] [scte] Add extractor (closes #22975) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/scte.py | 144 +++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 youtube_dl/extractor/scte.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd5f68ca3..9f43b284d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -980,6 +980,10 @@ from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE +from .scte import ( + SCTEIE, + SCTECourseIE, +) from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE diff --git a/youtube_dl/extractor/scte.py b/youtube_dl/extractor/scte.py new file mode 100644 index 000000000..ca1de63b6 --- /dev/null +++ b/youtube_dl/extractor/scte.py @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + decode_packed_codes, + ExtractorError, + urlencode_postdata, +) + + +class SCTEBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' + _NETRC_MACHINE = 'scte' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']welcome\b', r'>Sign Out<')) + + # already logged in + if is_logged(login_popup): + return + + login_form = self._hidden_inputs(login_popup) + + login_form.update({ + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', + }) + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + if '|pageRedirect|' not in response and not is_logged(response): + error = self._html_search_regex( + r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + +class SCTEIE(SCTEBaseIE): + _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', + 'info_dict': { + 'title': 'Introduction to DOCSIS Engineering Professional', + 'id': '31484', + }, + 'playlist_count': 5, + 'skip': 'Requires account credentials', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title') + + context_id = self._search_regex(r'context-(\d+)', webpage, video_id) + content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id + context = decode_packed_codes(self._download_webpage( + '%smobile/data.js' % content_base, video_id)) + + data = self._parse_xml( + self._search_regex( + r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), + video_id) + + entries = [] + for asset in data.findall('.//asset'): + asset_url = asset.get('url') + if not asset_url or not asset_url.endswith('.mp4'): + continue + asset_id = self._search_regex( + r'video_([^_]+)_', asset_url, 'asset id', default=None) + if not asset_id: + continue + entries.append({ + 'id': asset_id, + 'title': title, + 'url': content_base + asset_url, + }) + + return self.playlist_result(entries, video_id, title) + + +class SCTECourseIE(SCTEBaseIE): + _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3639', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3073', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + title = self._search_regex( + r'<h1>(.+?)</h1>', webpage, 'title', default=None) + + entries = [] + for mobj in re.finditer( + r'''(?x) + <a[^>]+ + href=(["\']) + (?P<url> + https?://learning\.scte\.org/mod/ + (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*? + \bid=\d+ + ) + ''', + webpage): + item_url = mobj.group('url') + if item_url == url: + continue + ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' + else SCTECourseIE.ie_key()) + entries.append(self.url_result(item_url, ie=ie)) + + return self.playlist_result(entries, course_id, title) From 1a4e4b0bfeb83b24755f80630d1e7f3427a5bf48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Nov 2019 05:31:40 +0700 Subject: [PATCH 659/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/ChangeLog b/ChangeLog index fcab1102c..338dd456b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,47 @@ +version <unreleased> + +Extractors ++ [scte] Add support for learning.scte.org (#22975) ++ [msn] Add support for Vidible and AOL embeds (#22195, #22227) +* [myspass] Fix video URL extraction and improve metadata extraction (#22448) +* [jamendo] Improve extraction + * Fix album extraction (#18564) + * Improve metadata extraction (#18565, #21379) +* [mediaset] Relax URL guid matching (#18352) ++ [mediaset] Extract unprotected M3U and MPD manifests (#17204) +* [telegraaf] Fix extraction ++ [bellmedia] Add support for marilyn.ca videos (#22193) +* [stv] Fix extraction (#22928) +- [iconosquare] Remove extractor +- [keek] Remove extractor +- [gameone] Remove extractor (#21778) +- [flipagram] Remove extractor +- [bambuser] Remove extractor +* [wistia] Reduce embed extraction false positives ++ [wistia] Add support for inline embeds (#22931) +- [go90] Remove extractor +* [kakao] Remove raw request ++ [kakao] Extract format total bitrate +* [daum] Fix VOD and Clip extracton (#15015) +* [kakao] Improve extraction + + Add support for embed URLs + + Add support for Kakao Legacy vid based embed URLs + * Only extract fields used for extraction + * Strip description and extract tags +* [mixcloud] Fix cloudcast data extraction (#22821) +* [yahoo] Improve extraction + + Add support for live streams (#3597, #3779, #22178) + * Bypass cookie consent page for european domains (#16948, #22576) + + Add generic support for embeds (#20332) +* [tv2] Fix and improve extraction (#22787) ++ [tv2dk] Add support for TV2 DK sites +* [onet] Improve extraction … + + Add support for onet100.vod.pl + + Extract m3u8 formats + * Correct audio only format info +* [fox9] Fix extraction + + version 2019.10.29 Core From ea07412ebf6fff7c17bcac9960cfe4e92ed62f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Nov 2019 05:32:56 +0700 Subject: [PATCH 660/785] release 2019.11.05 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 12 ++++-------- youtube_dl/version.py | 2 +- 8 files changed, 18 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f82502bd1..12de9add2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.29 + [debug] youtube-dl version 2019.11.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 5ef983d43..8a6202cf6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 8f05aa79f..83f91d5fe 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index e90900d8d..be8e70f1e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.29 + [debug] youtube-dl version 2019.11.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 7021d7397..7544d171c 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 338dd456b..d46d20082 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.11.05 Extractors + [scte] Add support for learning.scte.org (#22975) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index af905db5a..536b87479 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -76,8 +76,6 @@ - **awaan:video** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 - - **bambuser** - - **bambuser:channel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:weekly** @@ -284,12 +282,12 @@ - **FiveThirtyEight** - **FiveTV** - **Flickr** - - **Flipagram** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** - **Formula1** - **FOX** - **FOX9** + - **FOX9News** - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** @@ -315,8 +313,6 @@ - **FXNetworks** - **Gaia** - **GameInformer** - - **GameOne** - - **gameone:playlist** - **GameSpot** - **GameStar** - **Gaskrank** @@ -331,7 +327,6 @@ - **Globo** - **GloboArticle** - **Go** - - **Go90** - **GodTube** - **Golem** - **GoogleDrive** @@ -366,7 +361,6 @@ - **Hungama** - **HungamaSong** - **Hypem** - - **Iconosquare** - **ign.com** - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists @@ -406,7 +400,6 @@ - **Kankan** - **Karaoketv** - **KarriereVideos** - - **keek** - **KeezMovies** - **Ketnet** - **KhanAcademy** @@ -777,6 +770,8 @@ - **Screencast** - **ScreencastOMatic** - **scrippsnetworks:watch** + - **SCTE** + - **SCTECourse** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -926,6 +921,7 @@ - **TV2** - **tv2.hu** - **TV2Article** + - **TV2DK** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **TVA** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 924f26ca8..8012a66db 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.10.29' +__version__ = '2019.11.05' From e9b95167af3f9cacd16e379a40bacb27999840b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 10:03:38 +0100 Subject: [PATCH 661/785] [roosterteeth] fix login request(closes #16094)(closes #22689) --- youtube_dl/extractor/roosterteeth.py | 55 +++++++++++----------------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 8d88ee499..8883639b2 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import ( compat_HTTPError, @@ -18,7 +16,6 @@ from ..utils import ( class RoosterTeethIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' - _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', @@ -53,48 +50,40 @@ class RoosterTeethIE(InfoExtractor): 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'only_matching': True, }] + _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/' def _login(self): username, password = self._get_login_info() if username is None: return - login_page = self._download_webpage( - self._LOGIN_URL, None, - note='Downloading login page', - errnote='Unable to download login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'username': username, - 'password': password, - }) - - login_request = self._download_webpage( - self._LOGIN_URL, None, - note='Logging in', - data=urlencode_postdata(login_form), - headers={ - 'Referer': self._LOGIN_URL, - }) - - if not any(re.search(p, login_request) for p in ( - r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"', - r'>Sign Out<')): - error = self._html_search_regex( - r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>', - login_request, 'alert', default=None, group='error') - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') + try: + self._download_json( + 'https://auth.roosterteeth.com/oauth/token', + None, 'Logging in', data=urlencode_postdata({ + 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', + 'grant_type': 'password', + 'username': username, + 'password': password, + })) + except ExtractorError as e: + msg = 'Unable to login' + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + resp = self._parse_json(e.cause.read().decode(), None, fatal=False) + if resp: + error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') + if error: + msg += ': ' + error + self.report_warning(msg) def _real_initialize(self): + if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'): + return self._login() def _real_extract(self, url): display_id = self._match_id(url) - api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id + api_episode_url = self._EPISODE_BASE_URL + display_id try: m3u8_url = self._download_json( From b77c3949e899902de78b140f6e444dc55bac824f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 14:04:17 +0100 Subject: [PATCH 662/785] [patreon] minimize reponse size and extract uploader_id and filesize --- youtube_dl/extractor/patreon.py | 52 +++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 426dd8121..761a4b1de 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -6,7 +6,11 @@ from ..utils import ( clean_html, determine_ext, int_or_none, + KNOWN_EXTENSIONS, + mimetype2ext, parse_iso8601, + str_or_none, + try_get, ) @@ -24,6 +28,7 @@ class PatreonIE(InfoExtractor): 'thumbnail': 're:^https?://.*$', 'timestamp': 1406473987, 'upload_date': '20140727', + 'uploader_id': '87145', }, }, { 'url': 'http://www.patreon.com/creation?hid=754133', @@ -90,7 +95,13 @@ class PatreonIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) post = self._download_json( - 'https://www.patreon.com/api/posts/' + video_id, video_id) + 'https://www.patreon.com/api/posts/' + video_id, video_id, query={ + 'fields[media]': 'download_url,mimetype,size_bytes', + 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title', + 'fields[user]': 'full_name,url', + 'json-api-use-default-includes': 'false', + 'include': 'media,user', + }) attributes = post['data']['attributes'] title = attributes['title'].strip() image = attributes.get('image') or {} @@ -104,33 +115,42 @@ class PatreonIE(InfoExtractor): 'comment_count': int_or_none(attributes.get('comment_count')), } - def add_file(file_data): - file_url = file_data.get('url') - if file_url: - info.update({ - 'url': file_url, - 'ext': determine_ext(file_data.get('name'), 'mp3'), - }) - for i in post.get('included', []): i_type = i.get('type') - if i_type == 'attachment': - add_file(i.get('attributes') or {}) + if i_type == 'media': + media_attributes = i.get('attributes') or {} + download_url = media_attributes.get('download_url') + ext = mimetype2ext(media_attributes.get('mimetype')) + if download_url and ext in KNOWN_EXTENSIONS: + info.update({ + 'ext': ext, + 'filesize': int_or_none(media_attributes.get('size_bytes')), + 'url': download_url, + }) elif i_type == 'user': user_attributes = i.get('attributes') if user_attributes: info.update({ 'uploader': user_attributes.get('full_name'), + 'uploader_id': str_or_none(i.get('id')), 'uploader_url': user_attributes.get('url'), }) if not info.get('url'): - add_file(attributes.get('post_file') or {}) + embed_url = try_get(attributes, lambda x: x['embed']['url']) + if embed_url: + info.update({ + '_type': 'url', + 'url': embed_url, + }) if not info.get('url'): - info.update({ - '_type': 'url', - 'url': attributes['embed']['url'], - }) + post_file = attributes['post_file'] + ext = determine_ext(post_file.get('name')) + if ext in KNOWN_EXTENSIONS: + info.update({ + 'ext': ext, + 'url': post_file['url'], + }) return info From 2318629b2b79cad5fcab743bce86233a7592ed46 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 14:04:50 +0100 Subject: [PATCH 663/785] [dplay] minimize response size --- youtube_dl/extractor/dplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index d9c3d59cd..a7b9db568 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -146,6 +146,11 @@ class DPlayIE(InfoExtractor): video = self._download_json( disco_base + 'content/videos/' + display_id, display_id, headers=headers, query={ + 'fields[channel]': 'name', + 'fields[image]': 'height,src,width', + 'fields[show]': 'name', + 'fields[tag]': 'name', + 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', 'include': 'images,primaryChannel,show,tags' }) video_id = video['data']['id'] @@ -226,7 +231,6 @@ class DPlayIE(InfoExtractor): 'series': series, 'season_number': int_or_none(info.get('seasonNumber')), 'episode_number': int_or_none(info.get('episodeNumber')), - 'age_limit': int_or_none(info.get('minimum_age')), 'creator': creator, 'tags': tags, 'thumbnails': thumbnails, From b6139cb0c3635eb96e39973ab288c17a9f104067 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 22:56:25 +0100 Subject: [PATCH 664/785] [common] pass headers to _extract_(m3u8|mpd)_formats methods --- youtube_dl/extractor/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 50d48c40d..2688b19e4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1586,12 +1586,12 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True, live=False): + fatal=True, live=False, headers=None): res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', - fatal=fatal) + fatal=fatal, headers=headers) if res is False: return [] @@ -2009,12 +2009,12 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers=None): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', - fatal=fatal) + fatal=fatal, headers=None) if res is False: return [] mpd_doc, urlh = res From d7def23d0539430f5d816f1cfd733e436f62c257 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 23:08:42 +0100 Subject: [PATCH 665/785] [hotstar] pass Referer header to format requests(closes #22836) --- youtube_dl/extractor/hotstar.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index f9f7c5a64..f97eefa3d 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -118,6 +118,7 @@ class HotStarIE(HotStarBaseIE): if video_data.get('drmProtected'): raise ExtractorError('This video is DRM protected.', expected=True) + headers = {'Referer': url} formats = [] geo_restricted = False playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] @@ -137,10 +138,11 @@ class HotStarIE(HotStarBaseIE): if 'package:hls' in tags or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls')) + entry_protocol='m3u8_native', + m3u8_id='hls', headers=headers)) elif 'package:dash' in tags or ext == 'mpd': formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash')) + format_url, video_id, mpd_id='dash', headers=headers)) elif ext == 'f4m': # produce broken files pass @@ -158,6 +160,9 @@ class HotStarIE(HotStarBaseIE): self.raise_geo_restricted(countries=['IN']) self._sort_formats(formats) + for f in formats: + f.setdefault('http_headers', {}).update(headers) + return { 'id': video_id, 'title': title, From 57033e35e58e1d57ab3be5ffe5df5a80a5dbcf83 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 23:41:57 +0100 Subject: [PATCH 666/785] [common] fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2688b19e4..1e6b66d25 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2014,7 +2014,7 @@ class InfoExtractor(object): mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', - fatal=fatal, headers=None) + fatal=fatal, headers=headers) if res is False: return [] mpd_doc, urlh = res From 3ec86619e33a3d1e29c14ec053d7e420ac8b62ae Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 07:18:29 +0100 Subject: [PATCH 667/785] [common] initialize headers param with empty dict --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1e6b66d25..4a683f6d6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1586,7 +1586,7 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True, live=False, headers=None): + fatal=True, live=False, headers={}): res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', @@ -2009,7 +2009,7 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers=None): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers={}): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', From d64ec1242e9dec03ea2aa86b6e913db78c8619e0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 10:44:19 +0100 Subject: [PATCH 668/785] [onionstudios] fix extraction --- youtube_dl/extractor/onionstudios.py | 78 ++++++++++++++++------------ 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py index c6e3d5640..7f8c6f0d3 100644 --- a/youtube_dl/extractor/onionstudios.py +++ b/youtube_dl/extractor/onionstudios.py @@ -5,10 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, + compat_str, int_or_none, - float_or_none, - mimetype2ext, + js_to_json, + parse_iso8601, + try_get, ) @@ -17,14 +18,16 @@ class OnionStudiosIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', - 'md5': '719d1f8c32094b8c33902c17bcae5e34', + 'md5': '5a118d466d62b5cd03647cf2c593977f', 'info_dict': { 'id': '2937', 'ext': 'mp4', 'title': 'Hannibal charges forward, stops for a cocktail', + 'description': 'md5:545299bda6abf87e5ec666548c6a9448', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'The A.V. Club', - 'uploader_id': 'the-av-club', + 'uploader': 'a.v. club', + 'upload_date': '20150619', + 'timestamp': 1434728546, }, }, { 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', @@ -44,38 +47,49 @@ class OnionStudiosIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js', + video_id) + mcp_id = compat_str(self._parse_json(self._search_regex( + r'window\.mcpMapping\s*=\s*({.+?});', webpage, + 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) video_data = self._download_json( - 'http://www.onionstudios.com/video/%s.json' % video_id, video_id) - - title = video_data['title'] - + 'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id, + mcp_id)['videoMetadata'] + iptc = video_data['photoVideoMetadataIPTC'] + title = iptc['title']['en'] + fmg = video_data.get('photoVideoMetadata_fmg') or {} + tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' + data = self._download_json( + tvss_domain + '/api/v3/video-auth/url-signature-tokens', + mcp_id, query={'mcpids': mcp_id})['data'][0] formats = [] - for source in video_data.get('sources', []): - source_url = source.get('url') - if not source_url: - continue - ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - tbr = int_or_none(source.get('bitrate')) - formats.append({ - 'format_id': ext + ('-%d' % tbr if tbr else ''), - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'tbr': tbr, - 'ext': ext, - }) + + rendition_url = data.get('renditionUrl') + if rendition_url: + formats = self._extract_m3u8_formats( + rendition_url, mcp_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + + fallback_rendition_url = data.get('fallbackRenditionUrl') + if fallback_rendition_url: + formats.append({ + 'format_id': 'fallback', + 'tbr': int_or_none(self._search_regex( + r'_(\d+)\.mp4', fallback_rendition_url, + 'bitrate', default=None)), + 'url': fallback_rendition_url, + }) + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('poster_url'), - 'uploader': video_data.get('channel_name'), - 'uploader_id': video_data.get('channel_slug'), - 'duration': float_or_none(video_data.get('duration', 1000)), - 'tags': video_data.get('tags'), + 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), + 'uploader': fmg.get('network'), + 'duration': int_or_none(iptc.get('fileDuration')), 'formats': formats, + 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), + 'timestamp': parse_iso8601(iptc.get('dateReleased')), } From 55adb63e5412fa5556be22e97d61b8d27c7a5e67 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 19:56:10 +0100 Subject: [PATCH 669/785] [kinja] add support for Kinja embeds closes #5756 closes #11282 closes #22237 closes #22384 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 17 ++- youtube_dl/extractor/kinja.py | 221 +++++++++++++++++++++++++++ youtube_dl/extractor/onionstudios.py | 54 +------ 4 files changed, 241 insertions(+), 52 deletions(-) create mode 100644 youtube_dl/extractor/kinja.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f43b284d..9e3b554fa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -513,6 +513,7 @@ from .keezmovies import KeezMoviesIE from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE +from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .konserthusetplay import KonserthusetPlayIE from .kontrtube import KontrTubeIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c0780e98..3d919f656 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -119,6 +119,7 @@ from .viqeo import ViqeoIE from .expressen import ExpressenIE from .zype import ZypeIE from .odnoklassniki import OdnoklassnikiIE +from .kinja import KinjaEmbedIE class GenericIE(InfoExtractor): @@ -1487,16 +1488,18 @@ class GenericIE(InfoExtractor): 'timestamp': 1432570283, }, }, - # OnionStudios embed + # Kinja embed { 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', 'info_dict': { - 'id': '2855', + 'id': '106351', 'ext': 'mp4', 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', + 'description': 'Migrated from OnionStudios', 'thumbnail': r're:^https?://.*\.jpe?g$', - 'uploader': 'ClickHole', - 'uploader_id': 'clickhole', + 'uploader': 'clickhole', + 'upload_date': '20150527', + 'timestamp': 1432744860, } }, # SnagFilms embed @@ -2894,6 +2897,12 @@ class GenericIE(InfoExtractor): if senate_isvp_url: return self.url_result(senate_isvp_url, 'SenateISVP') + # Look for Kinja embeds + kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url) + if kinja_embed_urls: + return self.playlist_from_matches( + kinja_embed_urls, video_id, video_title) + # Look for OnionStudios embeds onionstudios_url = OnionStudiosIE._extract_url(webpage) if onionstudios_url: diff --git a/youtube_dl/extractor/kinja.py b/youtube_dl/extractor/kinja.py new file mode 100644 index 000000000..79e3026d2 --- /dev/null +++ b/youtube_dl/extractor/kinja.py @@ -0,0 +1,221 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) +from ..utils import ( + int_or_none, + parse_iso8601, + strip_or_none, + try_get, + unescapeHTML, + urljoin, +) + + +class KinjaEmbedIE(InfoExtractor): + IENAME = 'kinja:embed' + _DOMAIN_REGEX = r'''(?:[^.]+\.)? + (?: + avclub| + clickhole| + deadspin| + gizmodo| + jalopnik| + jezebel| + kinja| + kotaku| + lifehacker| + splinternews| + the(?:inventory|onion|root|takeout) + )\.com''' + _COMMON_REGEX = r'''/ + (?: + ajax/inset| + embed/video + )/iframe\?.*?\bid=''' + _VALID_URL = r'''(?x)https?://%s%s + (?P<type> + fb| + imgur| + instagram| + jwp(?:layer)?-video| + kinjavideo| + mcp| + megaphone| + ooyala| + soundcloud(?:-playlist)?| + tumblr-post| + twitch-stream| + twitter| + ustream-channel| + vimeo| + vine| + youtube-(?:list|video) + )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) + _TESTS = [{ + 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', + 'only_matching': True, + }] + _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') + _PROVIDER_MAP = { + 'fb': ('facebook.com/video.php?v=', 'Facebook'), + 'imgur': ('imgur.com/', 'Imgur'), + 'instagram': ('instagram.com/p/', 'Instagram'), + 'jwplayer-video': _JWPLATFORM_PROVIDER, + 'jwp-video': _JWPLATFORM_PROVIDER, + 'megaphone': ('player.megaphone.fm/', 'Generic'), + 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), + 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), + 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), + 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), + 'twitch-stream': ('twitch.tv/', 'TwitchStream'), + 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), + 'ustream-channel': ('ustream.tv/embed/', 'Ustream'), + 'vimeo': ('vimeo.com/', 'Vimeo'), + 'vine': ('vine.co/v/', 'Vine'), + 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), + 'youtube-video': ('youtube.com/embed/', 'Youtube'), + } + + @staticmethod + def _extract_urls(webpage, url): + return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( + r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), + webpage)] + + def _real_extract(self, url): + video_type, video_id = re.match(self._VALID_URL, url).groups() + + provider = self._PROVIDER_MAP.get(video_type) + if provider: + video_id = compat_urllib_parse_unquote(video_id) + if video_type == 'tumblr-post': + video_id, blog = video_id.split('-', 1) + result_url = provider[0] % (blog, video_id) + elif video_type == 'youtube-list': + video_id, playlist_id = video_id.split('/') + result_url = provider[0] % (video_id, playlist_id) + else: + if video_type == 'ooyala': + video_id = video_id.split('/')[0] + result_url = provider[0] + video_id + return self.url_result('http://' + result_url, provider[1]) + + if video_type == 'kinjavideo': + data = self._download_json( + 'https://kinja.com/api/core/video/views/videoById', + video_id, query={'videoId': video_id})['data'] + title = data['title'] + + formats = [] + for k in ('signedPlaylist', 'streaming'): + m3u8_url = data.get(k + 'Url') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + thumbnail = None + poster = data.get('poster') or {} + poster_id = poster.get('id') + if poster_id: + thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') + + return { + 'id': video_id, + 'title': title, + 'description': strip_or_none(data.get('description')), + 'formats': formats, + 'tags': data.get('tags'), + 'timestamp': int_or_none(try_get( + data, lambda x: x['postInfo']['publishTimeMillis']), 1000), + 'thumbnail': thumbnail, + 'uploader': data.get('network'), + } + else: + video_data = self._download_json( + 'https://api.vmh.univision.com/metadata/v1/content/' + video_id, + video_id)['videoMetadata'] + iptc = video_data['photoVideoMetadataIPTC'] + title = iptc['title']['en'] + fmg = video_data.get('photoVideoMetadata_fmg') or {} + tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' + data = self._download_json( + tvss_domain + '/api/v3/video-auth/url-signature-tokens', + video_id, query={'mcpids': video_id})['data'][0] + formats = [] + + rendition_url = data.get('renditionUrl') + if rendition_url: + formats = self._extract_m3u8_formats( + rendition_url, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + + fallback_rendition_url = data.get('fallbackRenditionUrl') + if fallback_rendition_url: + formats.append({ + 'format_id': 'fallback', + 'tbr': int_or_none(self._search_regex( + r'_(\d+)\.mp4', fallback_rendition_url, + 'bitrate', default=None)), + 'url': fallback_rendition_url, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), + 'uploader': fmg.get('network'), + 'duration': int_or_none(iptc.get('fileDuration')), + 'formats': formats, + 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), + 'timestamp': parse_iso8601(iptc.get('dateReleased')), + } diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py index 7f8c6f0d3..cf5c39e66 100644 --- a/youtube_dl/extractor/onionstudios.py +++ b/youtube_dl/extractor/onionstudios.py @@ -4,13 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - compat_str, - int_or_none, - js_to_json, - parse_iso8601, - try_get, -) +from ..compat import compat_str +from ..utils import js_to_json class OnionStudiosIE(InfoExtractor): @@ -20,7 +15,7 @@ class OnionStudiosIE(InfoExtractor): 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', 'md5': '5a118d466d62b5cd03647cf2c593977f', 'info_dict': { - 'id': '2937', + 'id': '3459881', 'ext': 'mp4', 'title': 'Hannibal charges forward, stops for a cocktail', 'description': 'md5:545299bda6abf87e5ec666548c6a9448', @@ -53,43 +48,6 @@ class OnionStudiosIE(InfoExtractor): mcp_id = compat_str(self._parse_json(self._search_regex( r'window\.mcpMapping\s*=\s*({.+?});', webpage, 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) - video_data = self._download_json( - 'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id, - mcp_id)['videoMetadata'] - iptc = video_data['photoVideoMetadataIPTC'] - title = iptc['title']['en'] - fmg = video_data.get('photoVideoMetadata_fmg') or {} - tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' - data = self._download_json( - tvss_domain + '/api/v3/video-auth/url-signature-tokens', - mcp_id, query={'mcpids': mcp_id})['data'][0] - formats = [] - - rendition_url = data.get('renditionUrl') - if rendition_url: - formats = self._extract_m3u8_formats( - rendition_url, mcp_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - - fallback_rendition_url = data.get('fallbackRenditionUrl') - if fallback_rendition_url: - formats.append({ - 'format_id': 'fallback', - 'tbr': int_or_none(self._search_regex( - r'_(\d+)\.mp4', fallback_rendition_url, - 'bitrate', default=None)), - 'url': fallback_rendition_url, - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), - 'uploader': fmg.get('network'), - 'duration': int_or_none(iptc.get('fileDuration')), - 'formats': formats, - 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), - 'timestamp': parse_iso8601(iptc.get('dateReleased')), - } + return self.url_result( + 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id, + 'KinjaEmbed', mcp_id) From 5d92b407e0ea856e3dbadfef35e5258e94e0bb23 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 20:41:49 +0100 Subject: [PATCH 670/785] [mixcloud] improve extraction - improve metadata extraction(closes #11721) - fix playlist extraction(closes #22378) - fix user mixes extraction(closes #15197)(closes #17865) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/mixcloud.py | 498 +++++++++++++---------------- 2 files changed, 225 insertions(+), 274 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9e3b554fa..2f9ba6893 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -639,7 +639,6 @@ from .mixcloud import ( MixcloudIE, MixcloudUserIE, MixcloudPlaylistIE, - MixcloudStreamIE, ) from .mlb import MLBIE from .mnet import MnetIE diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index e5f631506..9759560f1 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import functools import itertools import re @@ -11,28 +10,37 @@ from ..compat import ( compat_ord, compat_str, compat_urllib_parse_unquote, - compat_urlparse, compat_zip ) from ..utils import ( - clean_html, - ExtractorError, int_or_none, - OnDemandPagedList, - str_to_int, + parse_iso8601, + strip_or_none, try_get, - urljoin, ) -class MixcloudIE(InfoExtractor): +class MixcloudBaseIE(InfoExtractor): + def _call_api(self, object_type, object_fields, display_id, username, slug=None): + lookup_key = object_type + 'Lookup' + return self._download_json( + 'https://www.mixcloud.com/graphql', display_id, query={ + 'query': '''{ + %s(lookup: {username: "%s"%s}) { + %s + } +}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) + })['data'][lookup_key] + + +class MixcloudIE(MixcloudBaseIE): _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' _TESTS = [{ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 'info_dict': { - 'id': 'dholbach-cryptkeeper', + 'id': 'dholbach_cryptkeeper', 'ext': 'm4a', 'title': 'Cryptkeeper', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', @@ -40,11 +48,13 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'dholbach', 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, + 'timestamp': 1321359578, + 'upload_date': '20111115', }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 'info_dict': { - 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', + 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', 'ext': 'mp3', 'title': 'Caribou 7 inch Vinyl Mix & Chat', 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', @@ -52,11 +62,14 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, + 'timestamp': 1422987057, + 'upload_date': '20150203', }, }, { 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', 'only_matching': True, }] + _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD' @staticmethod def _decrypt_xor_cipher(key, ciphertext): @@ -66,177 +79,193 @@ class MixcloudIE(InfoExtractor): for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader = mobj.group(1) - cloudcast_name = mobj.group(2) - track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) + username, slug = re.match(self._VALID_URL, url).groups() + username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) + track_id = '%s_%s' % (username, slug) - webpage = self._download_webpage(url, track_id) + cloudcast = self._call_api('cloudcast', '''audioLength + comments(first: 100) { + edges { + node { + comment + created + user { + displayName + username + } + } + } + totalCount + } + description + favorites { + totalCount + } + featuringArtistList + isExclusive + name + owner { + displayName + url + username + } + picture(width: 1024, height: 1024) { + url + } + plays + publishDate + reposts { + totalCount + } + streamInfo { + dashUrl + hlsUrl + url + } + tags { + tag { + name + } + }''', track_id, username, slug) - # Legacy path - encrypted_play_info = self._search_regex( - r'm-play-info="([^"]+)"', webpage, 'play info', default=None) + title = cloudcast['name'] - if encrypted_play_info is not None: - # Decode - encrypted_play_info = compat_b64decode(encrypted_play_info) - else: - # New path - full_info_json = self._parse_json(self._html_search_regex( - r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>', - webpage, 'play info'), 'play info') - for item in full_info_json: - item_data = try_get(item, [ - lambda x: x['cloudcast']['data']['cloudcastLookup'], - lambda x: x['cloudcastLookup']['data']['cloudcastLookup'], - ], dict) - if try_get(item_data, lambda x: x['streamInfo']['url']): - info_json = item_data - break - else: - raise ExtractorError('Failed to extract matching stream info') + stream_info = cloudcast['streamInfo'] + formats = [] - message = self._html_search_regex( - r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', - webpage, 'error message', default=None) - - js_url = self._search_regex( - r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)', - webpage, 'js url') - js = self._download_webpage(js_url, track_id, 'Downloading JS') - # Known plaintext attack - if encrypted_play_info: - kps = ['{"stream_url":'] - kpa_target = encrypted_play_info - else: - kps = ['https://', 'http://'] - kpa_target = compat_b64decode(info_json['streamInfo']['url']) - for kp in kps: - partial_key = self._decrypt_xor_cipher(kpa_target, kp) - for quote in ["'", '"']: - key = self._search_regex( - r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), - js, 'encryption key', default=None) - if key is not None: - break - else: + for url_key in ('url', 'hlsUrl', 'dashUrl'): + format_url = stream_info.get(url_key) + if not format_url: continue - break - else: - raise ExtractorError('Failed to extract encryption key') + decrypted = self._decrypt_xor_cipher( + self._DECRYPTION_KEY, compat_b64decode(format_url)) + if url_key == 'hlsUrl': + formats.extend(self._extract_m3u8_formats( + decrypted, track_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif url_key == 'dashUrl': + formats.extend(self._extract_mpd_formats( + decrypted, track_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'format_id': 'http', + 'url': decrypted, + 'downloader_options': { + # Mixcloud starts throttling at >~5M + 'http_chunk_size': 5242880, + }, + }) - if encrypted_play_info is not None: - play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info') - if message and 'stream_url' not in play_info: - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - song_url = play_info['stream_url'] - formats = [{ - 'format_id': 'normal', - 'url': song_url - }] + if not formats and cloudcast.get('isExclusive'): + self.raise_login_required() - title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') - thumbnail = self._proto_relative_url(self._html_search_regex( - r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) - uploader = self._html_search_regex( - r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) - uploader_id = self._search_regex( - r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) - description = self._og_search_description(webpage) - view_count = str_to_int(self._search_regex( - [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', - r'/listeners/?">([0-9,.]+)</a>', - r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], - webpage, 'play count', default=None)) + self._sort_formats(formats) - else: - title = info_json['name'] - thumbnail = urljoin( - 'https://thumbnailer.mixcloud.com/unsafe/600x600/', - try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str)) - uploader = try_get(info_json, lambda x: x['owner']['displayName']) - uploader_id = try_get(info_json, lambda x: x['owner']['username']) - description = try_get(info_json, lambda x: x['description']) - view_count = int_or_none(try_get(info_json, lambda x: x['plays'])) + comments = [] + for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): + node = edge.get('node') or {} + text = strip_or_none(node.get('comment')) + if not text: + continue + user = node.get('user') or {} + comments.append({ + 'author': user.get('displayName'), + 'author_id': user.get('username'), + 'text': text, + 'timestamp': parse_iso8601(node.get('created')), + }) - stream_info = info_json['streamInfo'] - formats = [] + tags = [] + for t in cloudcast.get('tags'): + tag = try_get(t, lambda x: x['tag']['name'], compat_str) + if not tag: + tags.append(tag) - def decrypt_url(f_url): - for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): - decrypted_url = self._decrypt_xor_cipher(k, f_url) - if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): - return decrypted_url + get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount'])) - for url_key in ('url', 'hlsUrl', 'dashUrl'): - format_url = stream_info.get(url_key) - if not format_url: - continue - decrypted = decrypt_url(compat_b64decode(format_url)) - if not decrypted: - continue - if url_key == 'hlsUrl': - formats.extend(self._extract_m3u8_formats( - decrypted, track_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif url_key == 'dashUrl': - formats.extend(self._extract_mpd_formats( - decrypted, track_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': 'http', - 'url': decrypted, - 'downloader_options': { - # Mixcloud starts throttling at >~5M - 'http_chunk_size': 5242880, - }, - }) - self._sort_formats(formats) + owner = cloudcast.get('owner') or {} return { 'id': track_id, 'title': title, 'formats': formats, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'view_count': view_count, + 'description': cloudcast.get('description'), + 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str), + 'uploader': owner.get('displayName'), + 'timestamp': parse_iso8601(cloudcast.get('publishDate')), + 'uploader_id': owner.get('username'), + 'uploader_url': owner.get('url'), + 'duration': int_or_none(cloudcast.get('audioLength')), + 'view_count': int_or_none(cloudcast.get('plays')), + 'like_count': get_count('favorites'), + 'repost_count': get_count('reposts'), + 'comment_count': get_count('comments'), + 'comments': comments, + 'tags': tags, + 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, } -class MixcloudPlaylistBaseIE(InfoExtractor): - _PAGE_SIZE = 24 +class MixcloudPlaylistBaseIE(MixcloudBaseIE): + def _get_cloudcast(self, node): + return node - def _find_urls_in_page(self, page): - for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page): - yield self.url_result( - compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)), - MixcloudIE.ie_key()) + def _get_playlist_title(self, title, slug): + return title - def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None): - real_page_number = real_page_number or current_page + 1 - return self._download_webpage( - 'https://www.mixcloud.com/%s/' % path, video_id, - note='Download %s (page %d)' % (page_name, current_page + 1), - errnote='Unable to download %s' % page_name, - query={'page': real_page_number, 'list': 'main', '_ajax': '1'}, - headers={'X-Requested-With': 'XMLHttpRequest'}) + def _real_extract(self, url): + username, slug = re.match(self._VALID_URL, url).groups() + username = compat_urllib_parse_unquote(username) + if not slug: + slug = 'uploads' + else: + slug = compat_urllib_parse_unquote(slug) + playlist_id = '%s_%s' % (username, slug) - def _tracks_page_func(self, page, video_id, page_name, current_page): - resp = self._fetch_tracks_page(page, video_id, page_name, current_page) + is_playlist_type = self._ROOT_TYPE == 'playlist' + playlist_type = 'items' if is_playlist_type else slug + list_filter = '' - for item in self._find_urls_in_page(resp): - yield item + has_next_page = True + entries = [] + while has_next_page: + playlist = self._call_api( + self._ROOT_TYPE, '''%s + %s + %s(first: 100%s) { + edges { + node { + %s + } + } + pageInfo { + endCursor + hasNextPage + } + }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), + playlist_id, username, slug if is_playlist_type else None) - def _get_user_description(self, page_content): - return self._html_search_regex( - r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>', - page_content, 'user description', fatal=False) + items = playlist.get(playlist_type) or {} + for edge in items.get('edges', []): + cloudcast = self._get_cloudcast(edge.get('node') or {}) + cloudcast_url = cloudcast.get('url') + if not cloudcast_url: + continue + entries.append(self.url_result( + cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug'))) + + page_info = items['pageInfo'] + has_next_page = page_info['hasNextPage'] + list_filter = ', after: "%s"' % page_info['endCursor'] + + return self.playlist_result( + entries, playlist_id, + self._get_playlist_title(playlist[self._TITLE_KEY], slug), + playlist.get(self._DESCRIPTION_KEY)) class MixcloudUserIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' + _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$' IE_NAME = 'mixcloud:user' _TESTS = [{ @@ -244,68 +273,58 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'playlist_mincount': 11, + 'playlist_mincount': 36, }, { 'url': 'http://www.mixcloud.com/dholbach/uploads/', 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'playlist_mincount': 11, + 'playlist_mincount': 36, }, { 'url': 'http://www.mixcloud.com/dholbach/favorites/', 'info_dict': { 'id': 'dholbach_favorites', 'title': 'Daniel Holbach (favorites)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'params': { - 'playlist_items': '1-100', - }, - 'playlist_mincount': 100, + # 'params': { + # 'playlist_items': '1-100', + # }, + 'playlist_mincount': 396, }, { 'url': 'http://www.mixcloud.com/dholbach/listens/', 'info_dict': { 'id': 'dholbach_listens', 'title': 'Daniel Holbach (listens)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'params': { - 'playlist_items': '1-100', + # 'params': { + # 'playlist_items': '1-100', + # }, + 'playlist_mincount': 1623, + 'skip': 'Large list', + }, { + 'url': 'https://www.mixcloud.com/FirstEar/stream/', + 'info_dict': { + 'id': 'FirstEar_stream', + 'title': 'First Ear (stream)', + 'description': 'Curators of good music\r\n\r\nfirstearmusic.com', }, - 'playlist_mincount': 100, + 'playlist_mincount': 271, }] - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user') - list_type = mobj.group('type') + _TITLE_KEY = 'displayName' + _DESCRIPTION_KEY = 'biog' + _ROOT_TYPE = 'user' + _NODE_TEMPLATE = '''slug + url''' - # if only a profile URL was supplied, default to download all uploads - if list_type is None: - list_type = 'uploads' - - video_id = '%s_%s' % (user_id, list_type) - - profile = self._download_webpage( - 'https://www.mixcloud.com/%s/' % user_id, video_id, - note='Downloading user profile', - errnote='Unable to download user profile') - - username = self._og_search_title(profile) - description = self._get_user_description(profile) - - entries = OnDemandPagedList( - functools.partial( - self._tracks_page_func, - '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), - self._PAGE_SIZE) - - return self.playlist_result( - entries, video_id, '%s (%s)' % (username, list_type), description) + def _get_playlist_title(self, title, slug): + return '%s (%s)' % (title, slug) class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): @@ -313,87 +332,20 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): IE_NAME = 'mixcloud:playlist' _TESTS = [{ - 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/', - 'info_dict': { - 'id': 'RedBullThre3style_tokyo-finalists-2015', - 'title': 'National Champions 2015', - 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3', - }, - 'playlist_mincount': 16, - }, { 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user') - playlist_id = mobj.group('playlist') - video_id = '%s_%s' % (user_id, playlist_id) - - webpage = self._download_webpage( - url, user_id, - note='Downloading playlist page', - errnote='Unable to download playlist page') - - title = self._html_search_regex( - r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)', - webpage, 'playlist title', - default=None) or self._og_search_title(webpage, fatal=False) - description = self._get_user_description(webpage) - - entries = OnDemandPagedList( - functools.partial( - self._tracks_page_func, - '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), - self._PAGE_SIZE) - - return self.playlist_result(entries, video_id, title, description) - - -class MixcloudStreamIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$' - IE_NAME = 'mixcloud:stream' - - _TEST = { - 'url': 'https://www.mixcloud.com/FirstEar/stream/', 'info_dict': { - 'id': 'FirstEar', - 'title': 'First Ear', - 'description': 'Curators of good music\nfirstearmusic.com', + 'id': 'maxvibes_jazzcat-on-ness-radio', + 'title': 'Ness Radio sessions', }, - 'playlist_mincount': 192, - } + 'playlist_mincount': 59, + }] + _TITLE_KEY = 'name' + _DESCRIPTION_KEY = 'description' + _ROOT_TYPE = 'playlist' + _NODE_TEMPLATE = '''cloudcast { + slug + url + }''' - def _real_extract(self, url): - user_id = self._match_id(url) - - webpage = self._download_webpage(url, user_id) - - entries = [] - prev_page_url = None - - def _handle_page(page): - entries.extend(self._find_urls_in_page(page)) - return self._search_regex( - r'm-next-page-url="([^"]+)"', page, - 'next page URL', default=None) - - next_page_url = _handle_page(webpage) - - for idx in itertools.count(0): - if not next_page_url or prev_page_url == next_page_url: - break - - prev_page_url = next_page_url - current_page = int(self._search_regex( - r'\?page=(\d+)', next_page_url, 'next page number')) - - next_page_url = _handle_page(self._fetch_tracks_page( - '%s/stream' % user_id, user_id, 'stream', idx, - real_page_number=current_page)) - - username = self._og_search_title(webpage) - description = self._get_user_description(webpage) - - return self.playlist_result(entries, user_id, username, description) + def _get_cloudcast(self, node): + return node.get('cloudcast') or {} From d4f53af482cc47b0473a3576da7ad902bea4ac39 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 23:14:26 +0100 Subject: [PATCH 671/785] [lnkgo] fix extraction(closes #16834) --- youtube_dl/extractor/lnkgo.py | 100 ++++++++++++---------------------- 1 file changed, 36 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index cfec0d3d0..3e71852aa 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -5,24 +5,27 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + compat_str, int_or_none, - unified_strdate, + parse_iso8601, ) class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?' _TESTS = [{ - 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', + 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', 'info_dict': { - 'id': '46712', + 'id': '10809', 'ext': 'mp4', - 'title': 'Yra kaip yra', - 'upload_date': '20150107', - 'description': 'md5:d82a5e36b775b7048617f263a0e3475e', - 'age_limit': 7, - 'duration': 3019, - 'thumbnail': r're:^https?://.*\.jpg$' + 'title': "Put'ka: Trys Klausimai", + 'upload_date': '20161216', + 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.', + 'age_limit': 18, + 'duration': 117, + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1481904000, }, 'params': { 'skip_download': True, # HLS download @@ -30,20 +33,21 @@ class LnkGoIE(InfoExtractor): }, { 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2', 'info_dict': { - 'id': '47289', + 'id': '10467', 'ext': 'mp4', 'title': 'Nėrdas: Kompiuterio Valymas', 'upload_date': '20150113', 'description': 'md5:7352d113a242a808676ff17e69db6a69', 'age_limit': 18, 'duration': 346, - 'thumbnail': r're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1421164800, }, 'params': { 'skip_download': True, # HLS download }, }, { - 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', + 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413', 'only_matching': True, }] _AGE_LIMITS = { @@ -51,66 +55,34 @@ class LnkGoIE(InfoExtractor): 'N-14': 14, 'S': 18, } + _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s' def _real_extract(self, url): - display_id = self._match_id(url) + display_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - url, display_id, 'Downloading player webpage') - - video_id = self._search_regex( - r'data-ep="([^"]+)"', webpage, 'video ID') - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - upload_date = unified_strdate(self._search_regex( - r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False)) - - thumbnail_w = int_or_none( - self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False)) - thumbnail_h = int_or_none( - self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False)) - thumbnail = { - 'url': self._og_search_thumbnail(webpage), - } - if thumbnail_w and thumbnail_h: - thumbnail.update({ - 'width': thumbnail_w, - 'height': thumbnail_h, - }) - - config = self._parse_json(self._search_regex( - r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id) - - if config.get('pGeo'): - self.report_warning( - 'This content might not be available in your country due to copyright reasons') - - formats = [{ - 'format_id': 'hls', - 'ext': 'mp4', - 'url': config['EpisodeVideoLink_HLS'], - }] - - m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink']) - if m: - formats.append({ - 'format_id': 'rtmp', - 'ext': 'flv', - 'url': m.group('url'), - 'play_path': m.group('play_path'), - 'page_url': url, - }) + video_info = self._download_json( + 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'), + display_id)['videoConfig']['videoInfo'] + video_id = compat_str(video_info['id']) + title = video_info['title'] + prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4' + formats = self._extract_m3u8_formats( + self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''), + video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) + poster_image = video_info.get('posterImage') + return { 'id': video_id, 'display_id': display_id, 'title': title, 'formats': formats, - 'thumbnails': [thumbnail], - 'duration': int_or_none(config.get('VideoTime')), - 'description': description, - 'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0), - 'upload_date': upload_date, + 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None, + 'duration': int_or_none(video_info.get('duration')), + 'description': clean_html(video_info.get('htmlDescription')), + 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0), + 'timestamp': parse_iso8601(video_info.get('airDate')), + 'view_count': int_or_none(video_info.get('viewsCount')), } From 0b16b3c2d35d1706ec5c55e5b06352c753127368 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 9 Nov 2019 09:22:24 +0100 Subject: [PATCH 672/785] [twitch] add support for Clip embed URLs --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index ca7676fe2..a5681409c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -644,7 +644,7 @@ class TwitchStreamIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE): IE_NAME = 'twitch:clips' - _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', @@ -667,6 +667,9 @@ class TwitchClipsIE(TwitchBaseIE): }, { 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', 'only_matching': True, + }, { + 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited', + 'only_matching': True, }] def _real_extract(self, url): From 18ca61c5e153d1c1cb8b9a2de3c8b9dfdaa69b0e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 9 Nov 2019 09:23:20 +0100 Subject: [PATCH 673/785] [twitter] improve extraction - add support for generic embeds(closes #22168) - always extract http formats for native videos(closes #14934) - add support for Twitter Broadcasts(closes #21369) - extract more metadata - improve VMap format extraction - unify extraction code for both twitter statuses and cards --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/periscope.py | 80 ++-- youtube_dl/extractor/twitter.py | 570 +++++++++++++++-------------- 3 files changed, 344 insertions(+), 307 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2f9ba6893..598006061 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1241,6 +1241,7 @@ from .twitter import ( TwitterCardIE, TwitterIE, TwitterAmplifyIE, + TwitterBroadcastIE, ) from .udemy import ( UdemyIE, diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index b337a56c0..c02e34aba 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -17,12 +17,54 @@ class PeriscopeBaseIE(InfoExtractor): 'https://api.periscope.tv/api/v2/%s' % method, item_id, query=query) + def _parse_broadcast_data(self, broadcast, video_id): + title = broadcast['status'] + uploader = broadcast.get('user_display_name') or broadcast.get('username') + title = '%s - %s' % (uploader, title) if uploader else title + is_live = broadcast.get('state').lower() == 'running' + + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + + return { + 'id': broadcast.get('id') or video_id, + 'title': self._live_title(title) if is_live else title, + 'timestamp': parse_iso8601(broadcast.get('created_at')), + 'uploader': uploader, + 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), + 'thumbnails': thumbnails, + 'view_count': int_or_none(broadcast.get('total_watched')), + 'tags': broadcast.get('tags'), + 'is_live': is_live, + } + + @staticmethod + def _extract_common_format_info(broadcast): + return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height')) + + @staticmethod + def _add_width_and_height(f, width, height): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True): + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=fatal) + if len(m3u8_formats) == 1: + self._add_width_and_height(m3u8_formats[0], width, height) + return m3u8_formats + class PeriscopeIE(PeriscopeBaseIE): IE_DESC = 'Periscope' IE_NAME = 'periscope' _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' - # Alive example URLs can be found here http://onperiscope.com/ + # Alive example URLs can be found here https://www.periscope.tv/ _TESTS = [{ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'md5': '65b57957972e503fcbbaeed8f4fa04ca', @@ -61,21 +103,9 @@ class PeriscopeIE(PeriscopeBaseIE): 'accessVideoPublic', {'broadcast_id': token}, token) broadcast = stream['broadcast'] - title = broadcast['status'] + info = self._parse_broadcast_data(broadcast, token) - uploader = broadcast.get('user_display_name') or broadcast.get('username') - uploader_id = (broadcast.get('user_id') or broadcast.get('username')) - - title = '%s - %s' % (uploader, title) if uploader else title state = broadcast.get('state').lower() - if state == 'running': - title = self._live_title(title) - timestamp = parse_iso8601(broadcast.get('created_at')) - - thumbnails = [{ - 'url': broadcast[image], - } for image in ('image_url', 'image_url_small') if broadcast.get(image)] - width = int_or_none(broadcast.get('width')) height = int_or_none(broadcast.get('height')) @@ -92,32 +122,20 @@ class PeriscopeIE(PeriscopeBaseIE): continue video_urls.add(video_url) if format_id != 'rtmp': - m3u8_formats = self._extract_m3u8_formats( - video_url, token, 'mp4', - entry_protocol='m3u8_native' - if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=False) - if len(m3u8_formats) == 1: - add_width_and_height(m3u8_formats[0]) + m3u8_formats = self._extract_pscp_m3u8_formats( + video_url, token, format_id, state, width, height, False) formats.extend(m3u8_formats) continue rtmp_format = { 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', } - add_width_and_height(rtmp_format) + self._add_width_and_height(rtmp_format) formats.append(rtmp_format) self._sort_formats(formats) - return { - 'id': broadcast.get('id') or token, - 'title': title, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'thumbnails': thumbnails, - 'formats': formats, - } + info['formats'] = formats + return info class PeriscopeUserIE(PeriscopeBaseIE): diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index cebb6238c..5f8d90fb4 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -4,32 +4,67 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_HTTPError, + compat_parse_qs, + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) from ..utils import ( - determine_ext, dict_get, ExtractorError, float_or_none, int_or_none, - remove_end, try_get, + strip_or_none, + unified_timestamp, + update_url_query, xpath_text, ) -from .periscope import PeriscopeIE +from .periscope import ( + PeriscopeBaseIE, + PeriscopeIE, +) class TwitterBaseIE(InfoExtractor): + _API_BASE = 'https://api.twitter.com/1.1/' + _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/' + _GUEST_TOKEN = None + + def _extract_variant_formats(self, variant, video_id): + variant_url = variant.get('url') + if not variant_url: + return [] + elif '.m3u8' in variant_url: + return self._extract_m3u8_formats( + variant_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + else: + tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None + f = { + 'url': variant_url, + 'format_id': 'http' + ('-%d' % tbr if tbr else ''), + 'tbr': tbr, + } + self._search_dimensions_in_video_url(f, variant_url) + return [f] + def _extract_formats_from_vmap_url(self, vmap_url, video_id): vmap_data = self._download_xml(vmap_url, video_id) - video_url = xpath_text(vmap_data, './/MediaFile').strip() - if determine_ext(video_url) == 'm3u8': - return self._extract_m3u8_formats( - video_url, video_id, ext='mp4', m3u8_id='hls', - entry_protocol='m3u8_native') - return [{ - 'url': video_url, - }] + formats = [] + urls = [] + for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'): + video_variant.attrib['url'] = compat_urllib_parse_unquote( + video_variant.attrib['url']) + urls.append(video_variant.attrib['url']) + formats.extend(self._extract_variant_formats( + video_variant.attrib, video_id)) + video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile')) + if video_url not in urls: + formats.extend(self._extract_variant_formats({'url': video_url}, video_id)) + return formats @staticmethod def _search_dimensions_in_video_url(a_format, video_url): @@ -40,10 +75,30 @@ class TwitterBaseIE(InfoExtractor): 'height': int(m.group('height')), }) + def _call_api(self, path, video_id, query={}): + headers = { + 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw', + } + if not self._GUEST_TOKEN: + self._GUEST_TOKEN = self._download_json( + self._API_BASE + 'guest/activate.json', video_id, + 'Downloading guest token', data=b'', + headers=headers)['guest_token'] + headers['x-guest-token'] = self._GUEST_TOKEN + try: + return self._download_json( + self._API_BASE + path, video_id, headers=headers, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + raise ExtractorError(self._parse_json( + e.cause.read().decode(), + video_id)['errors'][0]['message'], expected=True) + raise -class TwitterCardIE(TwitterBaseIE): + +class TwitterCardIE(InfoExtractor): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -51,19 +106,28 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '560070183650213889', 'ext': 'mp4', - 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.", + 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96', + 'uploader': 'Twitter', + 'uploader_id': 'Twitter', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 30.033, + 'timestamp': 1422366112, + 'upload_date': '20150127', }, }, { 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', - 'md5': '7ee2a553b63d1bccba97fbed97d9e1c8', + 'md5': '7137eca597f72b9abbe61e5ae0161399', 'info_dict': { 'id': '623160978427936768', 'ext': 'mp4', - 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*$', + 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.", + 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA", + 'uploader': 'NASA', + 'uploader_id': 'NASA', + 'timestamp': 1437408129, + 'upload_date': '20150720', }, }, { @@ -75,7 +139,7 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Ubuntu 11.10 Overview', 'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'upload_date': '20111013', - 'uploader': 'OMG! Ubuntu!', + 'uploader': 'OMG! UBUNTU!', 'uploader_id': 'omgubuntu', }, 'add_ie': ['Youtube'], @@ -99,190 +163,30 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*', + 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.", + 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", + 'uploader': 'Brent Yarina', + 'uploader_id': 'BTNBrentYarina', + 'timestamp': 1456976204, + 'upload_date': '20160303', }, + 'skip': 'This content is no longer available.', }, { 'url': 'https://twitter.com/i/videos/752274308186120192', 'only_matching': True, }, ] - _API_BASE = 'https://api.twitter.com/1.1' - - def _parse_media_info(self, media_info, video_id): - formats = [] - for media_variant in media_info.get('variants', []): - media_url = media_variant['url'] - if media_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls')) - elif media_url.endswith('.mpd'): - formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) - else: - tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) - a_format = { - 'url': media_url, - 'format_id': 'http-%d' % tbr if tbr else 'http', - 'tbr': tbr, - } - # Reported bitRate may be zero - if not a_format['tbr']: - del a_format['tbr'] - - self._search_dimensions_in_video_url(a_format, media_url) - - formats.append(a_format) - return formats - - def _extract_mobile_formats(self, username, video_id): - webpage = self._download_webpage( - 'https://mobile.twitter.com/%s/status/%s' % (username, video_id), - video_id, 'Downloading mobile webpage', - headers={ - # A recent mobile UA is necessary for `gt` cookie - 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0', - }) - main_script_url = self._html_search_regex( - r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL') - main_script = self._download_webpage( - main_script_url, video_id, 'Downloading main script') - bearer_token = self._search_regex( - r'BEARER_TOKEN\s*:\s*"([^"]+)"', - main_script, 'bearer token') - # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id - api_data = self._download_json( - '%s/statuses/show/%s.json' % (self._API_BASE, video_id), - video_id, 'Downloading API data', - headers={ - 'Authorization': 'Bearer ' + bearer_token, - }) - media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {} - return self._parse_media_info(media_info, video_id) - def _real_extract(self, url): - path, video_id = re.search(self._VALID_URL, url).groups() - - config = None - formats = [] - duration = None - - urls = [url] - if path.startswith('cards/'): - urls.append('https://twitter.com/i/videos/' + video_id) - - for u in urls: - webpage = self._download_webpage( - u, video_id, headers={'Referer': 'https://twitter.com/'}) - - iframe_url = self._html_search_regex( - r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', - webpage, 'video iframe', default=None) - if iframe_url: - return self.url_result(iframe_url) - - config = self._parse_json(self._html_search_regex( - r'data-(?:player-)?config="([^"]+)"', webpage, - 'data player config', default='{}'), - video_id) - - if config.get('source_type') == 'vine': - return self.url_result(config['player_url'], 'Vine') - - periscope_url = PeriscopeIE._extract_url(webpage) - if periscope_url: - return self.url_result(periscope_url, PeriscopeIE.ie_key()) - - video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') - - if video_url: - if determine_ext(video_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) - else: - f = { - 'url': video_url, - } - - self._search_dimensions_in_video_url(f, video_url) - - formats.append(f) - - vmap_url = config.get('vmapUrl') or config.get('vmap_url') - if vmap_url: - formats.extend( - self._extract_formats_from_vmap_url(vmap_url, video_id)) - - media_info = None - - for entity in config.get('status', {}).get('entities', []): - if 'mediaInfo' in entity: - media_info = entity['mediaInfo'] - - if media_info: - formats.extend(self._parse_media_info(media_info, video_id)) - duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) - - username = config.get('user', {}).get('screen_name') - if username: - formats.extend(self._extract_mobile_formats(username, video_id)) - - if formats: - title = self._search_regex(r'<title>([^<]+)', webpage, 'title') - thumbnail = config.get('posterImageUrl') or config.get('image_src') - duration = float_or_none(config.get('duration'), scale=1000) or duration - break - - if not formats: - headers = { - 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw', - 'Referer': url, - } - ct0 = self._get_cookies(url).get('ct0') - if ct0: - headers['csrf_token'] = ct0.value - guest_token = self._download_json( - '%s/guest/activate.json' % self._API_BASE, video_id, - 'Downloading guest token', data=b'', - headers=headers)['guest_token'] - headers['x-guest-token'] = guest_token - self._set_cookie('api.twitter.com', 'gt', guest_token) - config = self._download_json( - '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id), - video_id, headers=headers) - track = config['track'] - vmap_url = track.get('vmapUrl') - if vmap_url: - formats = self._extract_formats_from_vmap_url(vmap_url, video_id) - else: - playback_url = track['playbackUrl'] - if determine_ext(playback_url) == 'm3u8': - formats = self._extract_m3u8_formats( - playback_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - else: - formats = [{ - 'url': playback_url, - }] - title = 'Twitter web player' - thumbnail = config.get('posterImage') - duration = float_or_none(track.get('durationMs'), scale=1000) - - self._remove_duplicate_formats(formats) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } + status_id = self._match_id(url) + return self.url_result( + 'https://twitter.com/statuses/' + status_id, + TwitterIE.ie_key(), status_id) -class TwitterIE(InfoExtractor): +class TwitterIE(TwitterBaseIE): IE_NAME = 'twitter' - _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P[^/]+))/status/(?P\d+)' - _TEMPLATE_URL = 'https://twitter.com/%s/status/%s' - _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P\d+)' _TESTS = [{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', @@ -291,10 +195,13 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', + 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', 'duration': 12.922, + 'timestamp': 1442188653, + 'upload_date': '20150913', + 'age_limit': 18, }, }, { 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', @@ -316,19 +223,23 @@ class TwitterIE(InfoExtractor): 'id': '665052190608723968', 'ext': 'mp4', 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.', - 'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."', + 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': 'Star Wars', + 'timestamp': 1447395772, + 'upload_date': '20151113', }, }, { 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880', 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.', - 'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."', + 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.", + 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", 'uploader_id': 'BTNBrentYarina', 'uploader': 'Brent Yarina', + 'timestamp': 1456976204, + 'upload_date': '20160303', }, 'params': { # The same video as https://twitter.com/i/videos/tweet/705235433198714880 @@ -340,12 +251,14 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Simon Vertugo - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'JG', - 'uploader_id': 'jaydingeer', + 'uploader': 'Simon Vertugo', + 'uploader_id': 'simonvertugo', 'duration': 30.0, + 'timestamp': 1455777459, + 'upload_date': '20160218', }, }, { 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', @@ -353,10 +266,9 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': 'MIOxnrUteUd', 'ext': 'mp4', - 'title': 'Vince Mancini - Vine of the day', - 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', - 'uploader': 'Vince Mancini', - 'uploader_id': 'Filmdrunk', + 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン', + 'uploader': 'TAKUMA', + 'uploader_id': '1004126642786242560', 'timestamp': 1402826626, 'upload_date': '20140615', }, @@ -367,21 +279,22 @@ class TwitterIE(InfoExtractor): 'id': '719944021058060289', 'ext': 'mp4', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', - 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"', - 'uploader_id': 'captainamerica', + 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', + 'uploader_id': 'CaptainAmerica', 'uploader': 'Captain America', 'duration': 3.17, + 'timestamp': 1460483005, + 'upload_date': '20160412', }, }, { 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', 'info_dict': { 'id': '1zqKVVlkqLaKB', 'ext': 'mp4', - 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence', - 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"', + 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', 'upload_date': '20160923', - 'uploader_id': 'OPP_HSD', - 'uploader': 'Sgt Kerry Schmidt', + 'uploader_id': '1PmKqpJdOJQoY', + 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', 'timestamp': 1474613214, }, 'add_ie': ['Periscope'], @@ -392,10 +305,12 @@ class TwitterIE(InfoExtractor): 'id': '852138619213144067', 'ext': 'mp4', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', - 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', + 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'uploader': 'عالم الأخبار', 'uploader_id': 'news_al3alm', 'duration': 277.4, + 'timestamp': 1492000653, + 'upload_date': '20170412', }, }, { 'url': 'https://twitter.com/i/web/status/910031516746514432', @@ -404,10 +319,12 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"', + 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'uploader': 'Préfet de Guadeloupe', 'uploader_id': 'Prefet971', 'duration': 47.48, + 'timestamp': 1505803395, + 'upload_date': '20170919', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -420,10 +337,12 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 're:.*?Shep is on a roll today.*?', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'md5:63b036c228772523ae1924d5f8e5ed6b', + 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'uploader': 'Lis Power', 'uploader_id': 'LisPower1', 'duration': 111.278, + 'timestamp': 1527623489, + 'upload_date': '20180529', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -435,88 +354,163 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'md5:66d493500c013e3e2d434195746a7f78', + 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976', 'uploader': 'Twitter', 'uploader_id': 'Twitter', 'duration': 61.567, + 'timestamp': 1548184644, + 'upload_date': '20190122', }, + }, { + # not available in Periscope + 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656', + 'info_dict': { + 'id': '1vOGwqejwoWxB', + 'ext': 'mp4', + 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019', + 'uploader': 'Vivi', + 'uploader_id': '1eVjYOLGkGrQL', + }, + 'add_ie': ['TwitterBroadcast'], + }, { + # Twitch Clip Embed + 'url': 'https://twitter.com/GunB1g/status/1163218564784017422', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - twid = mobj.group('id') - - webpage, urlh = self._download_webpage_handle( - self._TEMPLATE_STATUSES_URL % twid, twid) - - if 'twitter.com/account/suspended' in urlh.geturl(): - raise ExtractorError('Account suspended by Twitter.', expected=True) - - user_id = None - - redirect_mobj = re.match(self._VALID_URL, urlh.geturl()) - if redirect_mobj: - user_id = redirect_mobj.group('user_id') - - if not user_id: - user_id = mobj.group('user_id') - - username = remove_end(self._og_search_title(webpage), ' on Twitter') - - title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”') + twid = self._match_id(url) + status = self._call_api( + 'statuses/show/%s.json' % twid, twid, { + 'cards_platform': 'Web-12', + 'include_cards': 1, + 'include_reply_count': 1, + 'include_user_entities': 0, + 'tweet_mode': 'extended', + }) + title = description = status['full_text'].replace('\n', ' ') # strip 'https -_t.co_BJYgOjSeGA' junk from filenames title = re.sub(r'\s+(https?://[^ ]+)', '', title) + user = status.get('user') or {} + uploader = user.get('name') + if uploader: + title = '%s - %s' % (uploader, title) + uploader_id = user.get('screen_name') + + tags = [] + for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []): + hashtag_text = hashtag.get('text') + if not hashtag_text: + continue + tags.append(hashtag_text) info = { - 'uploader_id': user_id, - 'uploader': username, - 'webpage_url': url, - 'description': '%s on Twitter: "%s"' % (username, description), - 'title': username + ' - ' + title, + 'id': twid, + 'title': title, + 'description': description, + 'uploader': uploader, + 'timestamp': unified_timestamp(status.get('created_at')), + 'uploader_id': uploader_id, + 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None, + 'like_count': int_or_none(status.get('favorite_count')), + 'repost_count': int_or_none(status.get('retweet_count')), + 'comment_count': int_or_none(status.get('reply_count')), + 'age_limit': 18 if status.get('possibly_sensitive') else 0, + 'tags': tags, } - mobj = re.search(r'''(?x) - ]+class="animated-gif"(?P[^>]+)>\s* - ]+video-src="(?P[^"]+)" - ''', webpage) + media = try_get(status, lambda x: x['extended_entities']['media'][0]) + if media and media.get('type') != 'photo': + video_info = media.get('video_info') or {} + + formats = [] + for variant in video_info.get('variants', []): + formats.extend(self._extract_variant_formats(variant, twid)) + self._sort_formats(formats) + + thumbnails = [] + media_url = media.get('media_url_https') or media.get('media_url') + if media_url: + def add_thumbnail(name, size): + thumbnails.append({ + 'id': name, + 'url': update_url_query(media_url, {'name': name}), + 'width': int_or_none(size.get('w') or size.get('width')), + 'height': int_or_none(size.get('h') or size.get('height')), + }) + for name, size in media.get('sizes', {}).items(): + add_thumbnail(name, size) + add_thumbnail('orig', media.get('original_info') or {}) - if mobj: - more_info = mobj.group('more_info') - height = int_or_none(self._search_regex( - r'data-height="(\d+)"', more_info, 'height', fatal=False)) - width = int_or_none(self._search_regex( - r'data-width="(\d+)"', more_info, 'width', fatal=False)) - thumbnail = self._search_regex( - r'poster="([^"]+)"', more_info, 'poster', fatal=False) info.update({ - 'id': twid, - 'url': mobj.group('url'), - 'height': height, - 'width': width, - 'thumbnail': thumbnail, + 'formats': formats, + 'thumbnails': thumbnails, + 'duration': float_or_none(video_info.get('duration_millis'), 1000), }) - return info - - twitter_card_url = None - if 'class="PlayableMedia' in webpage: - twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) else: - twitter_card_iframe_url = self._search_regex( - r'data-full-card-iframe-url=([\'"])(?P(?:(?!\1).)+)\1', - webpage, 'Twitter card iframe URL', default=None, group='url') - if twitter_card_iframe_url: - twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) + card = status.get('card') + if card: + binding_values = card['binding_values'] - if twitter_card_url: - info.update({ - '_type': 'url_transparent', - 'ie_key': 'TwitterCard', - 'url': twitter_card_url, - }) - return info + def get_binding_value(k): + o = binding_values.get(k) or {} + return try_get(o, lambda x: x[x['type'].lower() + '_value']) - raise ExtractorError('There\'s no video in this tweet.') + card_name = card['name'].split(':')[-1] + if card_name == 'amplify': + formats = self._extract_formats_from_vmap_url( + get_binding_value('amplify_url_vmap'), + get_binding_value('amplify_content_id') or twid) + self._sort_formats(formats) + + thumbnails = [] + for suffix in ('_small', '', '_large', '_x_large', '_original'): + image = get_binding_value('player_image' + suffix) or {} + image_url = image.get('url') + if not image_url or '/player-placeholder' in image_url: + continue + thumbnails.append({ + 'id': suffix[1:] if suffix else 'medium', + 'url': image_url, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + }) + + info.update({ + 'formats': formats, + 'thumbnails': thumbnails, + 'duration': int_or_none(get_binding_value( + 'content_duration_seconds')), + }) + elif card_name == 'player': + info.update({ + '_type': 'url', + 'url': get_binding_value('player_url'), + }) + elif card_name == 'periscope_broadcast': + info.update({ + '_type': 'url', + 'url': get_binding_value('url') or get_binding_value('player_url'), + 'ie_key': PeriscopeIE.ie_key(), + }) + elif card_name == 'broadcast': + info.update({ + '_type': 'url', + 'url': get_binding_value('broadcast_url'), + 'ie_key': TwitterBroadcastIE.ie_key(), + }) + else: + raise ExtractorError('Unsupported Twitter Card.') + else: + expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url']) + if not expanded_url: + raise ExtractorError("There's no video in this tweet.") + info.update({ + '_type': 'url', + 'url': expanded_url, + }) + return info class TwitterAmplifyIE(TwitterBaseIE): @@ -573,3 +567,27 @@ class TwitterAmplifyIE(TwitterBaseIE): 'formats': formats, 'thumbnails': thumbnails, } + + +class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): + IE_NAME = 'twitter:broadcast' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P[0-9a-zA-Z]{13})' + + def _real_extract(self, url): + broadcast_id = self._match_id(url) + broadcast = self._call_api( + 'broadcasts/show.json', broadcast_id, + {'ids': broadcast_id})['broadcasts'][broadcast_id] + info = self._parse_broadcast_data(broadcast, broadcast_id) + media_key = broadcast['media_key'] + source = self._call_api( + 'live_video_stream/status/' + media_key, media_key)['source'] + m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] + if '/live_video_stream/geoblocked/' in m3u8_url: + self.raise_geo_restricted() + m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse( + m3u8_url).query).get('type', [None])[0] + state, width, height = self._extract_common_format_info(broadcast) + info['formats'] = self._extract_pscp_m3u8_formats( + m3u8_url, broadcast_id, m3u8_id, state, width, height) + return info From ce112a8c19ebcc9d401ff26a5cdcf58ba565901c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 11:01:07 +0100 Subject: [PATCH 674/785] [twitch] fix video comments URL(#18593)(closes #15828) --- youtube_dl/extractor/twitch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index a5681409c..8c0d70010 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -344,9 +344,8 @@ class TwitchVodIE(TwitchItemBaseIE): info['subtitles'] = { 'rechat': [{ 'url': update_url_query( - 'https://rechat.twitch.tv/rechat-messages', { - 'video_id': 'v%s' % item_id, - 'start': info['timestamp'], + 'https://api.twitch.tv/v5/videos/%s/comments' % item_id, { + 'client_id': self._CLIENT_ID, }), 'ext': 'json', }], From f81dd65ba2c1e7be549e5c8cfe6cbf0f0829edfe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 13:11:59 +0100 Subject: [PATCH 675/785] [extractor/common] clean jwplayer description HTML tags --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4a683f6d6..4c2f9303e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2689,7 +2689,7 @@ class InfoExtractor(object): entry = { 'id': this_video_id, 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), - 'description': video_data.get('description'), + 'description': clean_html(video_data.get('description')), 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), From 8fbf5d2f87fbfe0441bc20cf69d506109b2810bc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 13:14:23 +0100 Subject: [PATCH 676/785] [seeker] remove Revision3 extractors and fix extraction --- youtube_dl/extractor/extractors.py | 4 - youtube_dl/extractor/revision3.py | 170 ----------------------------- youtube_dl/extractor/seeker.py | 45 ++++---- 3 files changed, 23 insertions(+), 196 deletions(-) delete mode 100644 youtube_dl/extractor/revision3.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 598006061..8df9d95b1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -932,10 +932,6 @@ from .rentv import ( from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE -from .revision3 import ( - Revision3EmbedIE, - Revision3IE, -) from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .ro220 import Ro220IE diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py deleted file mode 100644 index 833d8a2f0..000000000 --- a/youtube_dl/extractor/revision3.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_iso8601, - unescapeHTML, - qualities, -) - - -class Revision3EmbedIE(InfoExtractor): - IE_NAME = 'revision3:embed' - _VALID_URL = r'(?:revision3:(?:(?P[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P\d+)' - _TEST = { - 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', - 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', - 'info_dict': { - 'id': '67558', - 'ext': 'mp4', - 'title': 'The Pros & Cons Of Zoos', - 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', - 'uploader_id': 'dnews', - 'uploader': 'DNews', - } - } - _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('playlist_id') - playlist_type = mobj.group('playlist_type') or 'video_id' - video_data = self._download_json( - 'http://revision3.com/api/getPlaylist.json', playlist_id, query={ - 'api_key': self._API_KEY, - 'codecs': 'h264,vp8,theora', - playlist_type: playlist_id, - })['items'][0] - - formats = [] - for vcodec, media in video_data['media'].items(): - for quality_id, quality in media.items(): - if quality_id == 'hls': - formats.extend(self._extract_m3u8_formats( - quality['url'], playlist_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': quality['url'], - 'format_id': '%s-%s' % (vcodec, quality_id), - 'tbr': int_or_none(quality.get('bitrate')), - 'vcodec': vcodec, - }) - self._sort_formats(formats) - - return { - 'id': playlist_id, - 'title': unescapeHTML(video_data['title']), - 'description': unescapeHTML(video_data.get('summary')), - 'uploader': video_data.get('show', {}).get('name'), - 'uploader_id': video_data.get('show', {}).get('slug'), - 'duration': int_or_none(video_data.get('duration')), - 'formats': formats, - } - - -class Revision3IE(InfoExtractor): - IE_NAME = 'revision' - _VALID_URL = r'https?://(?:www\.)?(?P(?:revision3|animalist)\.com)/(?P[^/]+(?:/[^/?#]+)?)' - _TESTS = [{ - 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', - 'md5': 'd94a72d85d0a829766de4deb8daaf7df', - 'info_dict': { - 'id': '71089', - 'display_id': 'technobuffalo/5-google-predictions-for-2016', - 'ext': 'webm', - 'title': '5 Google Predictions for 2016', - 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', - 'upload_date': '20151228', - 'timestamp': 1451325600, - 'duration': 187, - 'uploader': 'TechnoBuffalo', - 'uploader_id': 'technobuffalo', - } - }, { - # Show - 'url': 'http://revision3.com/variant', - 'only_matching': True, - }, { - # Tag - 'url': 'http://revision3.com/vr', - 'only_matching': True, - }] - _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' - - def _real_extract(self, url): - domain, display_id = re.match(self._VALID_URL, url).groups() - site = domain.split('.')[0] - page_info = self._download_json( - self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) - - page_data = page_info['data'] - page_type = page_data['type'] - if page_type in ('episode', 'embed'): - show_data = page_data['show']['data'] - page_id = compat_str(page_data['id']) - video_id = compat_str(page_data['video']['data']['id']) - - preference = qualities(['mini', 'small', 'medium', 'large']) - thumbnails = [{ - 'url': image_url, - 'id': image_id, - 'preference': preference(image_id) - } for image_id, image_url in page_data.get('images', {}).items()] - - info = { - 'id': page_id, - 'display_id': display_id, - 'title': unescapeHTML(page_data['name']), - 'description': unescapeHTML(page_data.get('summary')), - 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), - 'author': page_data.get('author'), - 'uploader': show_data.get('name'), - 'uploader_id': show_data.get('slug'), - 'thumbnails': thumbnails, - 'extractor_key': site, - } - - if page_type == 'embed': - info.update({ - '_type': 'url_transparent', - 'url': page_data['video']['data']['embed'], - }) - return info - - info.update({ - '_type': 'url_transparent', - 'url': 'revision3:%s' % video_id, - }) - return info - else: - list_data = page_info[page_type]['data'] - episodes_data = page_info['episodes']['data'] - num_episodes = page_info['meta']['totalEpisodes'] - processed_episodes = 0 - entries = [] - page_num = 1 - while True: - entries.extend([{ - '_type': 'url', - 'url': 'http://%s%s' % (domain, episode['path']), - 'id': compat_str(episode['id']), - 'ie_key': 'Revision3', - 'extractor_key': site, - } for episode in episodes_data]) - processed_episodes += len(episodes_data) - if processed_episodes == num_episodes: - break - page_num += 1 - episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( - domain, display_id + '/' + compat_str(page_num), domain), - display_id)['episodes']['data'] - - return self.playlist_result( - entries, compat_str(list_data['id']), - list_data.get('name'), list_data.get('summary')) diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py index 3b9c65e7e..7872dc80d 100644 --- a/youtube_dl/extractor/seeker.py +++ b/youtube_dl/extractor/seeker.py @@ -4,34 +4,37 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + get_element_by_class, + strip_or_none, +) class SeekerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html' _TESTS = [{ - # player.loadRevision3Item 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', - 'md5': '30c1dc4030cc715cf05b423d0947ac18', + 'md5': '897d44bbe0d8986a2ead96de565a92db', 'info_dict': { - 'id': '76243', - 'ext': 'webm', + 'id': 'Elrn3gnY', + 'ext': 'mp4', 'title': 'Should Trump Be Required To Release His Tax Returns?', - 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', - 'uploader': 'Seeker Daily', - 'uploader_id': 'seekerdaily', + 'description': 'md5:41efa8cfa8d627841045eec7b018eb45', + 'timestamp': 1490090165, + 'upload_date': '20170321', } }, { 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', 'playlist': [ { - 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'md5': '0497b9f20495174be73ae136949707d2', 'info_dict': { - 'id': '67558', + 'id': 'FihYQ8AE', 'ext': 'mp4', 'title': 'The Pros & Cons Of Zoos', - 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', - 'uploader': 'DNews', - 'uploader_id': 'dnews', + 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', + 'timestamp': 1490039133, + 'upload_date': '20170320', }, } ], @@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor): def _real_extract(self, url): display_id, article_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) - mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) - if mobj: - playlist_type, playlist_id = mobj.groups() - return self.url_result( - 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) - else: - entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( - r']+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] - return self.playlist_result( - entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) + entries = [] + for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): + entries.append(self.url_result( + 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) + return self.playlist_result( + entries, article_id, + self._og_search_title(webpage), + strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) From 20baa17c0180c7254644abea968792abcf0743cb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 16:00:12 +0100 Subject: [PATCH 677/785] [daisuki] remove extractor --- youtube_dl/extractor/daisuki.py | 154 ----------------------------- youtube_dl/extractor/extractors.py | 4 - 2 files changed, 158 deletions(-) delete mode 100644 youtube_dl/extractor/daisuki.py diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py deleted file mode 100644 index dbc1aa5d4..000000000 --- a/youtube_dl/extractor/daisuki.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import unicode_literals - -import base64 -import json -import random -import re - -from .common import InfoExtractor -from ..aes import ( - aes_cbc_decrypt, - aes_cbc_encrypt, -) -from ..compat import compat_b64decode -from ..utils import ( - bytes_to_intlist, - bytes_to_long, - extract_attributes, - ExtractorError, - intlist_to_bytes, - js_to_json, - int_or_none, - long_to_bytes, - pkcs1pad, -) - - -class DaisukiMottoIE(InfoExtractor): - _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P[0-9a-zA-Z]{3})' - - _TEST = { - 'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428', - 'info_dict': { - 'id': 'V2e', - 'ext': 'mp4', - 'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!', - 'subtitles': { - 'mul': [{ - 'ext': 'ttml', - }], - }, - }, - 'params': { - 'skip_download': True, # AES-encrypted HLS stream - }, - } - - # The public key in PEM format can be found in clientlibs_anime_watch.min.js - _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - flashvars = self._parse_json(self._search_regex( - r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), - video_id, transform_source=js_to_json) - - iv = [0] * 16 - - data = {} - for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'): - data[key] = flashvars.get(key, '') - - encrypted_rtn = None - - # Some AES keys are rejected. Try it with different AES keys - for idx in range(5): - aes_key = [random.randint(0, 254) for _ in range(32)] - padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128)) - - n, e = self._RSA_KEY - encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) - init_data = self._download_json( - 'http://motto.daisuki.net/fastAPI/bgn/init/', - video_id, query={ - 's': flashvars.get('s', ''), - 'c': flashvars.get('ss3_prm', ''), - 'e': url, - 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( - bytes_to_intlist(json.dumps(data)), - aes_key, iv))).decode('ascii'), - 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), - }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) - - if 'rtn' in init_data: - encrypted_rtn = init_data['rtn'] - break - - self._sleep(5, video_id) - - if encrypted_rtn is None: - raise ExtractorError('Failed to fetch init data') - - rtn = self._parse_json( - intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( - compat_b64decode(encrypted_rtn)), - aes_key, iv)).decode('utf-8').rstrip('\0'), - video_id) - - title = rtn['title_str'] - - formats = self._extract_m3u8_formats( - rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') - - subtitles = {} - caption_url = rtn.get('caption_url') - if caption_url: - # mul: multiple languages - subtitles['mul'] = [{ - 'url': caption_url, - 'ext': 'ttml', - }] - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'subtitles': subtitles, - } - - -class DaisukiMottoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://motto\.daisuki\.net/(?Pinformation)/' - - _TEST = { - 'url': 'http://motto.daisuki.net/information/', - 'info_dict': { - 'title': 'DRAGON BALL SUPER', - }, - 'playlist_mincount': 117, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [] - for li in re.findall(r'(]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage): - attr = extract_attributes(li) - ad_id = attr.get('data-ad_id') - product_id = attr.get('data-product_id') - if ad_id and product_id: - episode_id = attr.get('data-chapter') - entries.append({ - '_type': 'url_transparent', - 'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id), - 'episode_id': episode_id, - 'episode_number': int_or_none(episode_id), - 'ie_key': 'DaisukiMotto', - }) - - return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8df9d95b1..e2ebe8f95 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -254,10 +254,6 @@ from .dailymotion import ( DailymotionPlaylistIE, DailymotionUserIE, ) -from .daisuki import ( - DaisukiMottoIE, - DaisukiMottoPlaylistIE, -) from .daum import ( DaumIE, DaumClipIE, From 88b87b08b1ed06940053ee018547de051bf8d986 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 17:01:21 +0100 Subject: [PATCH 678/785] [minhateca] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/minhateca.py | 70 ------------------------------ 2 files changed, 71 deletions(-) delete mode 100644 youtube_dl/extractor/minhateca.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e2ebe8f95..dfd0ef198 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -625,7 +625,6 @@ from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, ) -from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE from .miomio import MioMioIE diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py deleted file mode 100644 index dccc54249..000000000 --- a/youtube_dl/extractor/minhateca.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_filesize, - sanitized_Request, - urlencode_postdata, -) - - -class MinhatecaIE(InfoExtractor): - _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P[0-9]+)\.' - _TEST = { - 'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)', - 'info_dict': { - 'id': '125848331', - 'ext': 'mp4', - 'title': 'youtube-dl test video', - 'thumbnail': r're:^https?://.*\.jpg$', - 'filesize_approx': 1530000, - 'duration': 9, - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - token = self._html_search_regex( - r'(.*?)', webpage, 'title') - title, _, ext = title_str.rpartition('.') - filesize_approx = parse_filesize(self._html_search_regex( - r'

    (.*?)

    ', - webpage, 'file size approximation', fatal=False)) - duration = parse_duration(self._html_search_regex( - r'(?s)

    .*?class="bold">(.*?)<', - webpage, 'duration', fatal=False)) - view_count = int_or_none(self._html_search_regex( - r'

    ([0-9]+)

    ', - webpage, 'view count', fatal=False)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'ext': ext, - 'filesize_approx': filesize_approx, - 'duration': duration, - 'view_count': view_count, - 'thumbnail': self._og_search_thumbnail(webpage), - } From 9e46d1f8aadd38f6de7c2b921b294e67ed2267eb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 17:15:15 +0100 Subject: [PATCH 679/785] [addanime] remove extractor --- youtube_dl/extractor/addanime.py | 95 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 96 deletions(-) delete mode 100644 youtube_dl/extractor/addanime.py diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py deleted file mode 100644 index 5e7c0724e..000000000 --- a/youtube_dl/extractor/addanime.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse, -) -from ..utils import ( - ExtractorError, - qualities, -) - - -class AddAnimeIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P[\w_]+)' - _TESTS = [{ - 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', - 'md5': '72954ea10bc979ab5e2eb288b21425a0', - 'info_dict': { - 'id': '24MR3YO5SAS9', - 'ext': 'mp4', - 'description': 'One Piece 606', - 'title': 'One Piece 606', - }, - 'skip': 'Video is gone', - }, { - 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - webpage = self._download_webpage(url, video_id) - except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError) or \ - ee.cause.code != 503: - raise - - redir_webpage = ee.cause.read().decode('utf-8') - action = self._search_regex( - r'
    ', - redir_webpage, 'redirect vc value') - av = re.search( - r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', - redir_webpage) - if av is None: - raise ExtractorError('Cannot find redirect math task') - av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) - - parsed_url = compat_urllib_parse_urlparse(url) - av_val = av_res + len(parsed_url.netloc) - confirm_url = ( - parsed_url.scheme + '://' + parsed_url.netloc - + action + '?' - + compat_urllib_parse_urlencode({ - 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) - self._download_webpage( - confirm_url, video_id, - note='Confirming after redirect') - webpage = self._download_webpage(url, video_id) - - FORMATS = ('normal', 'hq') - quality = qualities(FORMATS) - formats = [] - for format_id in FORMATS: - rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) - video_url = self._search_regex(rex, webpage, 'video file URLx', - fatal=False) - if not video_url: - continue - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'quality': quality(format_id), - }) - self._sort_formats(formats) - video_title = self._og_search_title(webpage) - video_description = self._og_search_description(webpage) - - return { - '_type': 'video', - 'id': video_id, - 'formats': formats, - 'title': video_title, - 'description': video_description - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dfd0ef198..d96f0d284 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -18,7 +18,6 @@ from .acast import ( ACastIE, ACastChannelIE, ) -from .addanime import AddAnimeIE from .adn import ADNIE from .adobeconnect import AdobeConnectIE from .adobetv import ( From 433e0710585e2414697cff6d444204e1db950bd7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Nov 2019 17:02:47 +0100 Subject: [PATCH 680/785] [facebook] fix posts video data extraction(closes #22473) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c723726b7..ce64e2683 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -334,7 +334,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) From 2e9ad59a4d6dfd82b34a965cfc5b8c5a647d1598 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 11 Nov 2019 09:53:04 +0100 Subject: [PATCH 681/785] [soundcloud] check if the soundtrack has downloads left(closes #23045) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 875b9d887..e8ffb2cbe 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -276,7 +276,7 @@ class SoundcloudIE(InfoExtractor): if secret_token: query['secret_token'] = secret_token - if info.get('downloadable'): + if info.get('downloadable') and info.get('has_downloads_left'): format_url = update_url_query( info.get('download_url') or track_base_url + '/download', query) format_urls.add(format_url) From 48970d5cc8838ac404a64462d175b248401e2bd2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 12 Nov 2019 10:51:54 +0100 Subject: [PATCH 682/785] [teamcoco] add support for new videos(closes #23054) --- youtube_dl/extractor/teamcoco.py | 68 +++++++++++++++++--------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 7640cf00a..5793b711f 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -84,6 +84,19 @@ class TeamcocoIE(TurnerBaseIE): 'only_matching': True, } ] + _RECORD_TEMPL = '''id + title + teaser + publishOn + thumb { + preview + } + tags { + name + } + duration + turnerMediaId + turnerMediaAuthToken''' def _graphql_call(self, query_template, object_type, object_id): find_object = 'find' + object_type @@ -98,36 +111,36 @@ class TeamcocoIE(TurnerBaseIE): display_id = self._match_id(url) response = self._graphql_call('''{ - %s(slug: "%s") { + %%s(slug: "%%s") { ... on RecordSlug { record { + %s + } + } + ... on PageSlug { + child { id - title - teaser - publishOn - thumb { - preview - } - file { - url - } - tags { - name - } - duration - turnerMediaId - turnerMediaAuthToken } } ... on NotFoundSlug { status } } -}''', 'Slug', display_id) +}''' % self._RECORD_TEMPL, 'Slug', display_id) if response.get('status'): raise ExtractorError('This video is no longer available.', expected=True) - record = response['record'] + child = response.get('child') + if child: + record = self._graphql_call('''{ + %%s(id: "%%s") { + ... on Video { + %s + } + } +}''' % self._RECORD_TEMPL, 'Record', child['id']) + else: + record = response['record'] video_id = record['id'] info = { @@ -150,25 +163,21 @@ class TeamcocoIE(TurnerBaseIE): 'accessTokenType': 'jws', })) else: - d = self._download_json( + video_sources = self._download_json( 'https://teamcoco.com/_truman/d/' + video_id, - video_id, fatal=False) or {} - video_sources = d.get('meta') or {} - if not video_sources: - video_sources = self._graphql_call('''{ - %s(id: "%s") { - src - } -}''', 'RecordVideoSource', video_id) or {} + video_id)['meta']['src'] + if isinstance(video_sources, dict): + video_sources = video_sources.values() formats = [] get_quality = qualities(['low', 'sd', 'hd', 'uhd']) - for format_id, src in video_sources.get('src', {}).items(): + for src in video_sources: if not isinstance(src, dict): continue src_url = src.get('src') if not src_url: continue + format_id = src.get('label') ext = determine_ext(src_url, mimetype2ext(src.get('type'))) if format_id == 'hls' or ext == 'm3u8': # compat_urllib_parse.urljoin does not work here @@ -190,9 +199,6 @@ class TeamcocoIE(TurnerBaseIE): 'format_id': format_id, 'quality': get_quality(format_id), }) - if not formats: - formats = self._extract_m3u8_formats( - record['file']['url'], video_id, 'mp4', fatal=False) self._sort_formats(formats) info['formats'] = formats From eb22d1b55744b69d5ec3556529868acfba6c217f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 13 Nov 2019 19:09:32 +0100 Subject: [PATCH 683/785] [nexx] Add support for Multi Player JS Setup(closes #23052) --- youtube_dl/extractor/nexx.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index f9aad83c4..586c1b7eb 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -108,7 +108,7 @@ class NexxIE(InfoExtractor): @staticmethod def _extract_domain_id(webpage): mobj = re.search( - r']+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', + r']+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P\d+)', webpage) return mobj.group('id') if mobj else None @@ -123,7 +123,7 @@ class NexxIE(InfoExtractor): domain_id = NexxIE._extract_domain_id(webpage) if domain_id: for video_id in re.findall( - r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', + r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)', webpage): entries.append( 'https://api.nexx.cloud/v3/%s/videos/byid/%s' @@ -410,8 +410,8 @@ class NexxIE(InfoExtractor): class NexxEmbedIE(InfoExtractor): - _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P[^/?#&]+)' + _TESTS = [{ 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1', 'md5': '16746bfc28c42049492385c989b26c4a', 'info_dict': { @@ -420,7 +420,6 @@ class NexxEmbedIE(InfoExtractor): 'title': 'Nervenkitzel Achterbahn', 'alt_title': 'Karussellbauer in Deutschland', 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', - 'release_year': 2005, 'creator': 'SPIEGEL TV', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2761, @@ -431,7 +430,10 @@ class NexxEmbedIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, - } + }, { + 'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7', + 'only_matching': True, + }] @staticmethod def _extract_urls(webpage): From 5709d661a2509fab0c9f3412239ecbe7a621f45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Nov 2019 01:45:04 +0700 Subject: [PATCH 684/785] [drtv] Add support for new URL schema (closes #23059) --- youtube_dl/extractor/drtv.py | 57 ++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 218f10209..390e79f8c 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -17,6 +17,7 @@ from ..utils import ( float_or_none, mimetype2ext, str_or_none, + try_get, unified_timestamp, update_url_query, url_or_none, @@ -24,7 +25,14 @@ from ..utils import ( class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| + (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/ + ) + (?P[\da-z_-]+) + ''' _GEO_BYPASS = False _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' @@ -83,6 +91,26 @@ class DRTVIE(InfoExtractor): }, { 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', 'only_matching': True, + }, { + 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', + 'info_dict': { + 'id': '00951930010', + 'ext': 'mp4', + 'title': 'Bonderøven (1:8)', + 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', + 'timestamp': 1546542000, + 'upload_date': '20190103', + 'duration': 2576.6, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769', + 'only_matching': True, + }, { + 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', + 'only_matching': True, }] def _real_extract(self, url): @@ -100,13 +128,32 @@ class DRTVIE(InfoExtractor): webpage, 'video id', default=None) if not video_id: - video_id = compat_urllib_parse_unquote(self._search_regex( + video_id = self._search_regex( r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', - webpage, 'urn')) + webpage, 'urn', default=None) + if video_id: + video_id = compat_urllib_parse_unquote(video_id) + + _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard' + query = {'expanded': 'true'} + + if video_id: + programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) + else: + programcard_url = _PROGRAMCARD_BASE + page = self._parse_json( + self._search_regex( + r'data\s*=\s*({.+?})\s*(?:;| Date: Thu, 14 Nov 2019 06:38:55 +0100 Subject: [PATCH 685/785] [comcarcoff] remove extractor --- youtube_dl/extractor/comcarcoff.py | 74 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 75 deletions(-) delete mode 100644 youtube_dl/extractor/comcarcoff.py diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py deleted file mode 100644 index 588aad0d9..000000000 --- a/youtube_dl/extractor/comcarcoff.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, -) - - -class ComCarCoffIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P[a-z0-9\-]*)' - _TESTS = [{ - 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', - 'info_dict': { - 'id': '2494164', - 'ext': 'mp4', - 'upload_date': '20141127', - 'timestamp': 1417107600, - 'duration': 1232, - 'title': 'Happy Thanksgiving Miranda', - 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', - }, - 'params': { - 'skip_download': 'requires ffmpeg', - } - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - if not display_id: - display_id = 'comediansincarsgettingcoffee.com' - webpage = self._download_webpage(url, display_id) - - full_data = self._parse_json( - self._search_regex( - r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), - display_id)['videoData'] - - display_id = full_data['activeVideo']['video'] - video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] - - video_id = compat_str(video_data['mediaId']) - title = video_data['title'] - formats = self._extract_m3u8_formats( - video_data['mediaUrl'], video_id, 'mp4') - self._sort_formats(formats) - - thumbnails = [{ - 'url': video_data['images']['thumb'], - }, { - 'url': video_data['images']['poster'], - }] - - timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( - video_data.get('pubDate')) - duration = int_or_none(video_data.get('durationSeconds')) or parse_duration( - video_data.get('duration')) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'timestamp': timestamp, - 'duration': duration, - 'thumbnails': thumbnails, - 'formats': formats, - 'season_number': int_or_none(video_data.get('season')), - 'episode_number': int_or_none(video_data.get('episode')), - 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d96f0d284..cf4bb8f20 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -222,7 +222,6 @@ from .comedycentral import ( ComedyCentralTVIE, ToshIE, ) -from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonprotocols import ( MmsIE, From 656c20010f53851c1b01e839744f7fe48497c03f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 15 Nov 2019 21:17:47 +0100 Subject: [PATCH 686/785] [ivi] fix format extraction(closes #21991) --- youtube_dl/extractor/ivi.py | 56 ++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 86c014b07..efdc3cc98 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -18,6 +18,8 @@ class IviIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _GEO_BYPASS = False _GEO_COUNTRIES = ['RU'] + _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c' + _LIGHT_URL = 'https://api.ivi.ru/light/' _TESTS = [ # Single movie @@ -78,25 +80,41 @@ class IviIE(InfoExtractor): 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): + try: + from Crypto.Cipher import Blowfish + from Crypto.Hash import CMAC + except ImportError: + raise ExtractorError('pycrypto not found. Please install it.', expected=True) + video_id = self._match_id(url) - data = { + timestamp = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode())['result'] + + data = json.dumps({ 'method': 'da.content.get', 'params': [ video_id, { - 'site': 's183', + 'site': 's353', 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'contentid': video_id } ] - } + }).encode() video_json = self._download_json( - 'http://api.digitalaccess.ru/api/json/', video_id, - 'Downloading video JSON', data=json.dumps(data)) + self._LIGHT_URL, video_id, + 'Downloading video JSON', data=data, query={ + 'ts': timestamp, + 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), + }) - if 'error' in video_json: - error = video_json['error'] + error = video_json.get('error') + if error: origin = error['origin'] if origin == 'NotAllowedForLocation': self.raise_geo_restricted( @@ -108,20 +126,24 @@ class IviIE(InfoExtractor): expected=True) result = video_json['result'] + title = result['title'] quality = qualities(self._KNOWN_FORMATS) - formats = [{ - 'url': x['url'], - 'format_id': x.get('content_format'), - 'quality': quality(x.get('content_format')), - } for x in result['files'] if x.get('url')] - + formats = [] + for f in result.get('files', []): + f_url = f.get('url') + content_format = f.get('content_format') + if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format: + continue + formats.append({ + 'url': f_url, + 'format_id': content_format, + 'quality': quality(content_format), + 'filesize': int_or_none(f.get('size_in_bytes')), + }) self._sort_formats(formats) - title = result['title'] - - duration = int_or_none(result.get('duration')) compilation = result.get('compilation') episode = title if compilation else None @@ -158,7 +180,7 @@ class IviIE(InfoExtractor): 'episode_number': episode_number, 'thumbnails': thumbnails, 'description': description, - 'duration': duration, + 'duration': int_or_none(result.get('duration')), 'formats': formats, } From 1bba88efc7e1f82095f7ae38348e56026db4bf3c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 15 Nov 2019 23:46:31 +0100 Subject: [PATCH 687/785] [ivi] sign content request only when pycryptodome is available --- youtube_dl/extractor/ivi.py | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index efdc3cc98..1dcb17c9b 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -80,38 +80,42 @@ class IviIE(InfoExtractor): 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): - try: - from Crypto.Cipher import Blowfish - from Crypto.Hash import CMAC - except ImportError: - raise ExtractorError('pycrypto not found. Please install it.', expected=True) - video_id = self._match_id(url) - timestamp = self._download_json( - self._LIGHT_URL, video_id, - 'Downloading timestamp JSON', data=json.dumps({ - 'method': 'da.timestamp.get', - 'params': [] - }).encode())['result'] - data = json.dumps({ 'method': 'da.content.get', 'params': [ video_id, { - 'site': 's353', + 'site': 's%d', 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'contentid': video_id } ] }).encode() - video_json = self._download_json( - self._LIGHT_URL, video_id, - 'Downloading video JSON', data=data, query={ + try: + from Crypto.Cipher import Blowfish + from Crypto.Hash import CMAC + + timestamp = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode())['result'] + + data = data % 353 + query = { 'ts': timestamp, 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), - }) + } + except ImportError: + data = data % 183 + query = {} + + video_json = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading video JSON', data=data, query=query) error = video_json.get('error') if error: @@ -121,6 +125,8 @@ class IviIE(InfoExtractor): msg=error['message'], countries=self._GEO_COUNTRIES) elif origin == 'NoRedisValidData': raise ExtractorError('Video %s does not exist' % video_id, expected=True) + elif origin == 'NotAllowedError': + raise ExtractorError('pycryptodome not found. Please install it.', expected=True) raise ExtractorError( 'Unable to download video %s: %s' % (video_id, error['message']), expected=True) From 7360c06facfd96ee603ad4fc27f5903d3f8f6694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Nov 2019 05:44:14 +0700 Subject: [PATCH 688/785] [extractor/common] Add data, headers and query to all major extract methods preserving standard order for potential future use --- youtube_dl/extractor/common.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4c2f9303e..04d676378 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1455,14 +1455,14 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, m3u8_id=None): + fatal=True, m3u8_id=None, data=None, headers={}, query={}): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source, - fatal=fatal) + fatal=fatal, data=data, headers=headers, query=query) if manifest is False: return [] @@ -1586,12 +1586,13 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True, live=False, headers={}): + fatal=True, live=False, data=None, headers={}, + query={}): res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', - fatal=fatal, headers=headers) + fatal=fatal, data=data, headers=headers, query=query) if res is False: return [] @@ -2009,12 +2010,12 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers={}): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', - fatal=fatal, headers=headers) + fatal=fatal, data=data, headers=headers, query=query) if res is False: return [] mpd_doc, urlh = res @@ -2317,12 +2318,12 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats - def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): + def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', - fatal=fatal) + fatal=fatal, data=data, headers=headers, query=query) if res is False: return [] ism_doc, urlh = res From 6c79785bb0c96d6fc22d942946196f0842d70a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Nov 2019 07:47:23 +0700 Subject: [PATCH 689/785] [travis] Add python 3.8 build --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 6d16c2955..14d95fa84 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,12 @@ matrix: - python: 3.7 dist: xenial env: YTDL_TEST_SET=download + - python: 3.8 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8 + dist: xenial + env: YTDL_TEST_SET=download - python: 3.8-dev dist: xenial env: YTDL_TEST_SET=core From 9e4e864639bf606a1931a684f130e219e869adfd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 16 Nov 2019 01:51:31 +0100 Subject: [PATCH 690/785] [ivi] improve error detection --- youtube_dl/extractor/ivi.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 1dcb17c9b..7f1146d95 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -119,17 +119,20 @@ class IviIE(InfoExtractor): error = video_json.get('error') if error: - origin = error['origin'] + origin = error.get('origin') + message = error.get('message') or error.get('user_message') + extractor_msg = 'Unable to download video %s' if origin == 'NotAllowedForLocation': - self.raise_geo_restricted( - msg=error['message'], countries=self._GEO_COUNTRIES) + self.raise_geo_restricted(message, self._GEO_COUNTRIES) elif origin == 'NoRedisValidData': - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - elif origin == 'NotAllowedError': - raise ExtractorError('pycryptodome not found. Please install it.', expected=True) - raise ExtractorError( - 'Unable to download video %s: %s' % (video_id, error['message']), - expected=True) + extractor_msg = 'Video %s does not exist' + elif message: + if 'недоступен для просмотра на площадке s183' in message: + raise ExtractorError( + 'pycryptodome not found. Please install it.', + expected=True) + extractor_msg += ': ' + message + raise ExtractorError(extractor_msg % video_id, expected=True) result = video_json['result'] title = result['title'] From 7e70620a342c57746812d4a8fae6f436bd90cf57 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 18 Nov 2019 12:51:25 +0100 Subject: [PATCH 691/785] [vk] fix wall audio thumbnails extraction(closes #23135) --- youtube_dl/extractor/vk.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 4c8ca4f41..195875938 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -634,14 +634,15 @@ class VKWallPostIE(VKBaseIE): if not a.url: continue title = unescapeHTML(a.title) + performer = unescapeHTML(a.performer) entries.append({ 'id': '%s_%s' % (a.owner_id, a.id), 'url': self._unmask_url(a.url, a.ads['vk_id']), - 'title': '%s - %s' % (a.performer, title) if a.performer else title, - 'thumbnail': a.cover_url.split(',') if a.cover_url else None, - 'duration': a.duration, + 'title': '%s - %s' % (performer, title) if performer else title, + 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None, + 'duration': int_or_none(a.duration), 'uploader': uploader, - 'artist': a.performer, + 'artist': performer, 'track': title, 'ext': 'mp4', 'protocol': 'm3u8', From f9c4a4521068a02c583803ea422c6fedfa7598e3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 18 Nov 2019 21:40:53 +0100 Subject: [PATCH 692/785] [ntvru] add support for non relative file URLs(closes #23140) --- youtube_dl/extractor/ntvru.py | 49 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index 4f9cedb84..c47d1dfa4 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - clean_html, - xpath_text, int_or_none, + strip_or_none, + unescapeHTML, + xpath_text, ) @@ -47,10 +48,10 @@ class NTVRuIE(InfoExtractor): 'duration': 1496, }, }, { - 'url': 'http://www.ntv.ru/kino/Koma_film', - 'md5': 'f825770930937aa7e5aca0dc0d29319a', + 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/', + 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4', 'info_dict': { - 'id': '1007609', + 'id': '1126480', 'ext': 'mp4', 'title': 'Остросюжетный фильм «Кома»', 'description': 'Остросюжетный фильм «Кома»', @@ -68,6 +69,10 @@ class NTVRuIE(InfoExtractor): 'thumbnail': r're:^http://.*\.jpg', 'duration': 2590, }, + }, { + # Schemeless file URL + 'url': 'https://www.ntv.ru/video/1797442', + 'only_matching': True, }] _VIDEO_ID_REGEXES = [ @@ -96,37 +101,31 @@ class NTVRuIE(InfoExtractor): 'http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') - title = clean_html(xpath_text(player, './data/title', 'title', fatal=True)) - description = clean_html(xpath_text(player, './data/description', 'description')) + title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True))) video = player.find('./data/video') - video_id = xpath_text(video, './id', 'video id') - thumbnail = xpath_text(video, './splash', 'thumbnail') - duration = int_or_none(xpath_text(video, './totaltime', 'duration')) - view_count = int_or_none(xpath_text(video, './views', 'view count')) - - token = self._download_webpage( - 'http://stat.ntv.ru/services/access/token', - video_id, 'Downloading access token') formats = [] for format_id in ['', 'hi', 'webm']: - file_ = video.find('./%sfile' % format_id) - if file_ is None: + file_ = xpath_text(video, './%sfile' % format_id) + if not file_: continue - size = video.find('./%ssize' % format_id) + if file_.startswith('//'): + file_ = self._proto_relative_url(file_) + elif not file_.startswith('http'): + file_ = 'http://media.ntv.ru/vod/' + file_ formats.append({ - 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token), - 'filesize': int_or_none(size.text if size is not None else None), + 'url': file_, + 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)), }) self._sort_formats(formats) return { - 'id': video_id, + 'id': xpath_text(video, './id'), 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, + 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))), + 'thumbnail': xpath_text(video, './splash'), + 'duration': int_or_none(xpath_text(video, './totaltime')), + 'view_count': int_or_none(xpath_text(video, './views')), 'formats': formats, } From 76d9eca43dd4fd7698d138b90ab6b2dd159559e0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 19 Nov 2019 20:16:31 +0100 Subject: [PATCH 693/785] [ivi] fallback to old extraction method for unknown error codes --- youtube_dl/extractor/ivi.py | 79 +++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 7f1146d95..0db023622 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -93,46 +93,57 @@ class IviIE(InfoExtractor): ] }).encode() - try: - from Crypto.Cipher import Blowfish - from Crypto.Hash import CMAC + for site in (353, 183): + content_data = data % site + if site == 353: + try: + from Cryptodome.Cipher import Blowfish + from Cryptodome.Hash import CMAC + pycryptodomex_found = True + except ImportError: + pycryptodomex_found = False + continue - timestamp = self._download_json( + timestamp = (self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode(), fatal=False) or {}).get('result') + if not timestamp: + continue + + query = { + 'ts': timestamp, + 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(), + } + else: + query = {} + + video_json = self._download_json( self._LIGHT_URL, video_id, - 'Downloading timestamp JSON', data=json.dumps({ - 'method': 'da.timestamp.get', - 'params': [] - }).encode())['result'] + 'Downloading video JSON', data=content_data, query=query) - data = data % 353 - query = { - 'ts': timestamp, - 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), - } - except ImportError: - data = data % 183 - query = {} - - video_json = self._download_json( - self._LIGHT_URL, video_id, - 'Downloading video JSON', data=data, query=query) - - error = video_json.get('error') - if error: - origin = error.get('origin') - message = error.get('message') or error.get('user_message') - extractor_msg = 'Unable to download video %s' - if origin == 'NotAllowedForLocation': - self.raise_geo_restricted(message, self._GEO_COUNTRIES) - elif origin == 'NoRedisValidData': - extractor_msg = 'Video %s does not exist' - elif message: - if 'недоступен для просмотра на площадке s183' in message: + error = video_json.get('error') + if error: + origin = error.get('origin') + message = error.get('message') or error.get('user_message') + extractor_msg = 'Unable to download video %s' + if origin == 'NotAllowedForLocation': + self.raise_geo_restricted(message, self._GEO_COUNTRIES) + elif origin == 'NoRedisValidData': + extractor_msg = 'Video %s does not exist' + elif site == 353: + continue + elif not pycryptodomex_found: raise ExtractorError( 'pycryptodome not found. Please install it.', expected=True) - extractor_msg += ': ' + message - raise ExtractorError(extractor_msg % video_id, expected=True) + elif message: + extractor_msg += ': ' + message + raise ExtractorError(extractor_msg % video_id, expected=True) + else: + break result = video_json['result'] title = result['title'] From f0f6a7e73f55b6227c40af17c6fcab44b5a2df79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Nov 2019 23:21:03 +0700 Subject: [PATCH 694/785] [chaturbate] Fix extraction (closes #23010, closes #23012) --- youtube_dl/extractor/chaturbate.py | 42 +++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 656e715ae..a459dcb8d 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -3,7 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + lowercase_escape, + url_or_none, +) class ChaturbateIE(InfoExtractor): @@ -38,12 +42,31 @@ class ChaturbateIE(InfoExtractor): 'https://chaturbate.com/%s/' % video_id, video_id, headers=self.geo_verification_headers()) - m3u8_urls = [] + found_m3u8_urls = [] - for m in re.finditer( - r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): - m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group( - 'url').replace('_fast', '') + data = self._parse_json( + self._search_regex( + r'initialRoomDossier\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data', default='{}', group='value'), + video_id, transform_source=lowercase_escape, fatal=False) + if data: + m3u8_url = url_or_none(data.get('hls_source')) + if m3u8_url: + found_m3u8_urls.append(m3u8_url) + + if not found_m3u8_urls: + for m in re.finditer( + r'(\\u002[27])(?Phttp.+?\.m3u8.*?)\1', webpage): + found_m3u8_urls.append(lowercase_escape(m.group('url'))) + + if not found_m3u8_urls: + for m in re.finditer( + r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): + found_m3u8_urls.append(m.group('url')) + + m3u8_urls = [] + for found_m3u8_url in found_m3u8_urls: + m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '') for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): if m3u8_url not in m3u8_urls: m3u8_urls.append(m3u8_url) @@ -63,7 +86,12 @@ class ChaturbateIE(InfoExtractor): formats = [] for m3u8_url in m3u8_urls: - m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow' + for known_id in ('fast', 'slow'): + if '_%s' % known_id in m3u8_url: + m3u8_id = known_id + break + else: + m3u8_id = None formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, ext='mp4', # ffmpeg skips segments for fast m3u8 From 25d3f770e6ef518a4230ad41bd4ea69dd2e851af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Nov 2019 23:22:59 +0700 Subject: [PATCH 695/785] [ivi] Ask for pycryptodomex instead of pycryptodome See discussion at https://github.com/ytdl-org/youtube-dl/commit/1bba88efc7e1f82095f7ae38348e56026db4bf3c#r35982110 --- youtube_dl/extractor/ivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 0db023622..52b53bfeb 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -137,7 +137,7 @@ class IviIE(InfoExtractor): continue elif not pycryptodomex_found: raise ExtractorError( - 'pycryptodome not found. Please install it.', + 'pycryptodomex not found. Please install it.', expected=True) elif message: extractor_msg += ': ' + message From f8015c15746e83394ecc395c6a13823d20971772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Nov 2019 23:38:39 +0700 Subject: [PATCH 696/785] [ivi] Fix python 3.4 support --- youtube_dl/extractor/ivi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 52b53bfeb..315ea03fa 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -91,10 +91,10 @@ class IviIE(InfoExtractor): 'contentid': video_id } ] - }).encode() + }) for site in (353, 183): - content_data = data % site + content_data = (data % site).encode() if site == 353: try: from Cryptodome.Cipher import Blowfish From 80a51fc2ef3ebb7d3e3d5fd0b6e9942bb4be6f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Nov 2019 01:10:24 +0700 Subject: [PATCH 697/785] [ivi] Skip s353 for bundled exe See https://github.com/Legrandin/pycryptodome/issues/228 --- youtube_dl/extractor/ivi.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 315ea03fa..a502e8806 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json +import re +import sys from .common import InfoExtractor from ..utils import ( @@ -93,9 +94,13 @@ class IviIE(InfoExtractor): ] }) + bundled = hasattr(sys, 'frozen') + for site in (353, 183): content_data = (data % site).encode() if site == 353: + if bundled: + continue try: from Cryptodome.Cipher import Blowfish from Cryptodome.Hash import CMAC @@ -135,6 +140,10 @@ class IviIE(InfoExtractor): extractor_msg = 'Video %s does not exist' elif site == 353: continue + elif bundled: + raise ExtractorError( + 'This feature does not work from bundled exe. Run youtube-dl from sources.', + expected=True) elif not pycryptodomex_found: raise ExtractorError( 'pycryptodomex not found. Please install it.', From fb8dfc5a2772ca35dd65bad7b7565ad6ec1ad4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Nov 2019 01:21:00 +0700 Subject: [PATCH 698/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/ChangeLog b/ChangeLog index d46d20082..acee2a75a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +version + +Core ++ [extractor/common] Clean jwplayer description HTML tags ++ [extractor/common] Add data, headers and query to all major extract formats + methods + +Extractors +* [chaturbate] Fix extraction (#23010, #23012) ++ [ntvru] Add support for non relative file URLs (#23140) +* [vk] Fix wall audio thumbnails extraction (#23135) +* [ivi] Fix format extraction (#21991) +- [comcarcoff] Remove extractor ++ [drtv] Add support for new URL schema (#23059) ++ [nexx] Add support for Multi Player JS Setup (#23052) ++ [teamcoco] Add support for new videos (#23054) +* [soundcloud] Check if the soundtrack has downloads left (#23045) +* [facebook] Fix posts video data extraction (#22473) +- [addanime] Remove extractor +- [minhateca] Remove extractor +- [daisuki] Remove extractor +* [seeker] Fix extraction +- [revision3] Remove extractors +* [twitch] Fix video comments URL (#18593, #15828) +* [twitter] Improve extraction + + Add support for generic embeds (#22168) + * Always extract http formats for native videos (#14934) + + Add support for Twitter Broadcasts (#21369) + + Extract more metadata + * Improve VMap format extraction + * Unify extraction code for both twitter statuses and cards ++ [twitch] Add support for Clip embed URLs +* [lnkgo] Fix extraction (#16834) +* [mixcloud] Improve extraction + * Improve metadata extraction (#11721) + * Fix playlist extraction (#22378) + * Fix user mixes extraction (#15197, #17865) ++ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384) +* [onionstudios] Fix extraction ++ [hotstar] Pass Referer header to format requests (#22836) +* [dplay] Minimize response size ++ [patreon] Extract uploader_id and filesize +* [patreon] Minimize response size +* [roosterteeth] Fix login request (#16094, #22689) + + version 2019.11.05 Extractors From 0de9fd24dc8723c78a90cb546e4a05818304521e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Nov 2019 01:24:27 +0700 Subject: [PATCH 699/785] release 2019.11.22 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 10 ++-------- youtube_dl/version.py | 2 +- 8 files changed, 16 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 12de9add2..d3e11cdcf 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.05 + [debug] youtube-dl version 2019.11.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8a6202cf6..51bf4db3b 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 83f91d5fe..19025ff25 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index be8e70f1e..a381b6979 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.05 + [debug] youtube-dl version 2019.11.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 7544d171c..9c945d5ec 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index acee2a75a..daaff3eef 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.11.22 Core + [extractor/common] Clean jwplayer description HTML tags diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 536b87479..3dcb026c5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -26,7 +26,6 @@ - **AcademicEarth:Course** - **acast** - **acast:channel** - - **AddAnime** - **ADN**: Anime Digital Network - **AdobeConnect** - **AdobeTV** @@ -175,7 +174,6 @@ - **CNN** - **CNNArticle** - **CNNBlogs** - - **ComCarCoff** - **ComedyCentral** - **ComedyCentralFullEpisodes** - **ComedyCentralShortname** @@ -203,8 +201,6 @@ - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** - - **DaisukiMotto** - - **DaisukiMottoPlaylist** - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -404,6 +400,7 @@ - **Ketnet** - **KhanAcademy** - **KickStarter** + - **KinjaEmbed** - **KinoPoisk** - **KonserthusetPlay** - **kontrtube**: KontrTube.ru - Труба зовёт @@ -485,14 +482,12 @@ - **Mgoon** - **MGTV**: 芒果TV - **MiaoPai** - - **Minhateca** - **MinistryGrid** - **Minoto** - **miomio.tv** - **MiTele**: mitele.es - **mixcloud** - **mixcloud:playlist** - - **mixcloud:stream** - **mixcloud:user** - **Mixer:live** - **Mixer:vod** @@ -723,8 +718,6 @@ - **Restudy** - **Reuters** - **ReverbNation** - - **revision** - - **revision3:embed** - **RICE** - **RMCDecouverte** - **RockstarGames** @@ -958,6 +951,7 @@ - **twitch:vod** - **twitter** - **twitter:amplify** + - **twitter:broadcast** - **twitter:card** - **udemy** - **udemy:course** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8012a66db..361809681 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.11.05' +__version__ = '2019.11.22' From cf3c9eafad5e6b83788e15a605aa6804b1ab307c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 Nov 2019 00:03:51 +0700 Subject: [PATCH 700/785] [soundcloud] Update client id (closes #23214) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e8ffb2cbe..988dec4fa 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -245,7 +245,7 @@ class SoundcloudIE(InfoExtractor): _API_BASE = 'https://api.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' - _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' + _CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _ARTWORK_MAP = { From 9d30c2132acf2d12bfa8e559987c341c76d9cd24 Mon Sep 17 00:00:00 2001 From: InfernalUnderling <42065091+InfernalUnderling@users.noreply.github.com> Date: Tue, 26 Nov 2019 17:08:37 +0000 Subject: [PATCH 701/785] [utils] Handle rd-suffixed day parts in unified_strdate (#23199) --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3920542bb..0db37d9d8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -340,6 +340,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') + self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103') + self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index aed988b88..0d30075aa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1718,13 +1718,16 @@ DATE_FORMATS = ( '%B %d %Y', '%B %dst %Y', '%B %dnd %Y', + '%B %drd %Y', '%B %dth %Y', '%b %d %Y', '%b %dst %Y', '%b %dnd %Y', + '%b %drd %Y', '%b %dth %Y', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', + '%b %drd %Y %I:%M', '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', From 6ddd4bf6ac04ae0b8ba39fb4124e844afc49b5a9 Mon Sep 17 00:00:00 2001 From: InfernalUnderling <42065091+InfernalUnderling@users.noreply.github.com> Date: Tue, 26 Nov 2019 17:20:39 +0000 Subject: [PATCH 702/785] [bitchute] Extract upload date (closes #22990) (#23193) --- youtube_dl/extractor/bitchute.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 430663fbf..0c773e66e 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor from ..utils import ( orderedSet, + unified_strdate, urlencode_postdata, ) @@ -23,6 +24,7 @@ class BitChuteIE(InfoExtractor): 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Victoria X Rave', + 'upload_date': '20170813', }, }, { 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', @@ -74,12 +76,17 @@ class BitChuteIE(InfoExtractor): r'(?s)]+\bclass=["\']video-author[^>]+>(.+?)

    '), webpage, 'uploader', fatal=False) + upload_date = unified_strdate(self._search_regex( + r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', + webpage, 'upload date', fatal=False)) + return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, + 'upload_date': upload_date, 'formats': formats, } From 1ced222120c00854865c5b16e89838235ed549ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 Nov 2019 02:26:42 +0700 Subject: [PATCH 703/785] [utils] Add generic caesar cipher and rot47 --- test/test_utils.py | 16 ++++++++++++++++ youtube_dl/utils.py | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 0db37d9d8..e83c8ea11 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -19,6 +19,7 @@ from youtube_dl.utils import ( age_restricted, args_to_str, encode_base_n, + caesar, clean_html, date_from_str, DateRange, @@ -69,6 +70,7 @@ from youtube_dl.utils import ( remove_start, remove_end, remove_quotes, + rot47, shell_quote, smuggle_url, str_to_int, @@ -1369,6 +1371,20 @@ Line 1 self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) + def test_caesar(self): + self.assertEqual(caesar('ace', 'abcdef', 2), 'cea') + self.assertEqual(caesar('cea', 'abcdef', -2), 'ace') + self.assertEqual(caesar('ace', 'abcdef', -2), 'eac') + self.assertEqual(caesar('eac', 'abcdef', 2), 'ace') + self.assertEqual(caesar('ace', 'abcdef', 0), 'ace') + self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz') + self.assertEqual(caesar('abc', 'acegik', 2), 'ebg') + self.assertEqual(caesar('ebg', 'acegik', -2), 'abc') + + def test_rot47(self): + self.assertEqual(rot47('youtube-dl'), r'J@FEF36\5=') + self.assertEqual(rot47('YOUTUBE-DL'), r'*~&%&qt\s{') + def test_urshift(self): self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0d30075aa..b14603d8a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -5383,6 +5383,19 @@ def decode_packed_codes(code): obfucasted_code) +def caesar(s, alphabet, shift): + if shift == 0: + return s + l = len(alphabet) + return ''.join( + alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c + for c in s) + + +def rot47(s): + return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47) + + def parse_m3u8_attributes(attrib): info = {} for (key, val) in re.findall(r'(?P[A-Z0-9-]+)=(?P"[^"]+"|[^",]+)(?:,|$)', attrib): From edc2a1f68b267abc6b4c94991da4ad83fd8374bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 Nov 2019 02:28:06 +0700 Subject: [PATCH 704/785] [vivo] Fix extraction (closes #22328, closes #22279) --- youtube_dl/extractor/shared.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index ff575f592..02295d1a4 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,13 +1,18 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_b64decode +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote_plus, +) from ..utils import ( determine_ext, ExtractorError, int_or_none, + js_to_json, KNOWN_EXTENSIONS, parse_filesize, + rot47, url_or_none, urlencode_postdata, ) @@ -112,16 +117,22 @@ class VivoIE(SharedBaseIE): webpage, 'filesize', fatal=False)) def _extract_video_url(self, webpage, video_id, url): - def decode_url(encoded_url): + def decode_url_old(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') - stream_url = url_or_none(decode_url(self._search_regex( + stream_url = self._search_regex( r'data-stream\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'stream url', default=None, group='url'))) + 'stream url', default=None, group='url') + if stream_url: + stream_url = url_or_none(decode_url_old(stream_url)) if stream_url: return stream_url - return self._parse_json( + + def decode_url(encoded_url): + return rot47(compat_urllib_parse_unquote_plus(encoded_url)) + + return decode_url(self._parse_json( self._search_regex( - r'InitializeStream\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'stream', group='url'), - video_id, transform_source=decode_url)[0] + r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, + 'stream'), + video_id, transform_source=js_to_json)['source']) From df65a4a1ed3096b8210c097c77d00f0391f78503 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 26 Nov 2019 21:53:51 +0100 Subject: [PATCH 705/785] [corus] improve extraction - add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com and disneylachaine.ca(closes #20861) - add support for self hosted videos(closes #22075) - detect DRM protection(closes #14910)(closes #9164) --- youtube_dl/extractor/corus.py | 169 ++++++++++++++++++++++------------ 1 file changed, 112 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py index a1b251804..e11aadf14 100644 --- a/youtube_dl/extractor/corus.py +++ b/youtube_dl/extractor/corus.py @@ -4,7 +4,12 @@ from __future__ import unicode_literals import re from .theplatform import ThePlatformFeedIE -from ..utils import int_or_none +from ..utils import ( + dict_get, + ExtractorError, + float_or_none, + int_or_none, +) class CorusIE(ThePlatformFeedIE): @@ -12,24 +17,49 @@ class CorusIE(ThePlatformFeedIE): https?:// (?:www\.)? (?P - (?:globaltv|etcanada)\.com| - (?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca + (?: + globaltv| + etcanada| + seriesplus| + wnetwork| + ytv + )\.com| + (?: + hgtv| + foodnetwork| + slice| + history| + showcase| + bigbrothercanada| + abcspark| + disney(?:channel|lachaine) + )\.ca + ) + /(?:[^/]+/)* + (?: + video\.html\?.*?\bv=| + videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)? + ) + (?P + [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}| + (?:[A-Z]{4})?\d{12,20} ) - /(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=)) - (?P\d+) ''' _TESTS = [{ 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/', - 'md5': '05dcbca777bf1e58c2acbb57168ad3a6', 'info_dict': { 'id': '870923331648', 'ext': 'mp4', 'title': 'Movie Night Popcorn with Bryan', 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.', - 'uploader': 'SHWM-NEW', 'upload_date': '20170206', 'timestamp': 1486392197, }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to parse JSON'], }, { 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753', 'only_matching': True, @@ -48,58 +78,83 @@ class CorusIE(ThePlatformFeedIE): }, { 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/', 'only_matching': True + }, { + 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/', + 'only_matching': True + }, { + 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/', + 'only_matching': True }] - - _TP_FEEDS = { - 'globaltv': { - 'feed_id': 'ChQqrem0lNUp', - 'account_id': 2269680845, - }, - 'etcanada': { - 'feed_id': 'ChQqrem0lNUp', - 'account_id': 2269680845, - }, - 'hgtv': { - 'feed_id': 'L0BMHXi2no43', - 'account_id': 2414428465, - }, - 'foodnetwork': { - 'feed_id': 'ukK8o58zbRmJ', - 'account_id': 2414429569, - }, - 'slice': { - 'feed_id': '5tUJLgV2YNJ5', - 'account_id': 2414427935, - }, - 'history': { - 'feed_id': 'tQFx_TyyEq4J', - 'account_id': 2369613659, - }, - 'showcase': { - 'feed_id': '9H6qyshBZU3E', - 'account_id': 2414426607, - }, - 'bigbrothercanada': { - 'feed_id': 'ChQqrem0lNUp', - 'account_id': 2269680845, - }, + _GEO_BYPASS = False + _SITE_MAP = { + 'globaltv': 'series', + 'etcanada': 'series', + 'foodnetwork': 'food', + 'bigbrothercanada': 'series', + 'disneychannel': 'disneyen', + 'disneylachaine': 'disneyfr', } def _real_extract(self, url): domain, video_id = re.match(self._VALID_URL, url).groups() - feed_info = self._TP_FEEDS[domain.split('.')[0]] - return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: { - 'episode_number': int_or_none(e.get('pl1$episode')), - 'season_number': int_or_none(e.get('pl1$season')), - 'series': e.get('pl1$show'), - }, { - 'HLS': { - 'manifest': 'm3u', - }, - 'DesktopHLS Default': { - 'manifest': 'm3u', - }, - 'MP4 MBR': { - 'manifest': 'm3u', - }, - }, feed_info['account_id']) + site = domain.split('.')[0] + path = self._SITE_MAP.get(site, site) + if path != 'series': + path = 'migration/' + path + video = self._download_json( + 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path, + video_id, query={'byId': video_id}, + headers={'Accept': 'application/json'})[0] + title = video['title'] + + formats = [] + for source in video.get('sources', []): + smil_url = source.get('file') + if not smil_url: + continue + source_type = source.get('type') + note = 'Downloading%s smil file' % (' ' + source_type if source_type else '') + resp = self._download_webpage( + smil_url, video_id, note, fatal=False, + headers=self.geo_verification_headers()) + if not resp: + continue + error = self._parse_json(resp, video_id, fatal=False) + if error: + if error.get('exception') == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=['CA']) + raise ExtractorError(error['description']) + smil = self._parse_xml(resp, video_id, fatal=False) + if smil is None: + continue + namespace = self._parse_smil_namespace(smil) + formats.extend(self._parse_smil_formats( + smil, smil_url, video_id, namespace)) + if not formats and video.get('drm'): + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) + + subtitles = {} + for track in video.get('tracks', []): + track_url = track.get('file') + if not track_url: + continue + lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en' + subtitles.setdefault(lang, []).append({'url': track_url}) + + metadata = video.get('metadata') or {} + get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')), + 'description': video.get('description'), + 'timestamp': int_or_none(video.get('availableDate'), 1000), + 'subtitles': subtitles, + 'duration': float_or_none(metadata.get('duration')), + 'series': dict_get(video, ('show', 'pl1$show')), + 'season_number': get_number('season'), + 'episode_number': get_number('episode'), + } From 5ef62fc4ce1f255343d67b70f3cee2f2240cdfba Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 26 Nov 2019 22:01:34 +0100 Subject: [PATCH 706/785] [dailymotion] improve extraction - extract http formats included in m3u8 manifest - fix user extraction(closes #3553)(closes #21415) - add suport for User Authentication(closes #11491) - fix password protected videos extraction(closes #23176) - respect age limit option and family filter cookie value(closes #18437) - handle video url playlist query param - report alowed countries for geo-restricted videos --- youtube_dl/extractor/common.py | 13 + youtube_dl/extractor/dailymotion.py | 559 +++++++++++----------------- youtube_dl/extractor/vk.py | 3 +- 3 files changed, 234 insertions(+), 341 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 04d676378..eaae5e484 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1766,6 +1766,19 @@ class InfoExtractor(object): # the same GROUP-ID f['acodec'] = 'none' formats.append(f) + + # for DailyMotion + progressive_uri = last_stream_inf.get('PROGRESSIVE-URI') + if progressive_uri: + http_f = f.copy() + del http_f['manifest_url'] + http_f.update({ + 'format_id': f['format_id'].replace('hls-', 'http-'), + 'protocol': 'http', + 'url': progressive_uri, + }) + formats.append(http_f) + last_stream_inf = {} return formats diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 745971900..327fdb04a 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,50 +1,93 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import functools -import hashlib -import itertools import json -import random import re -import string from .common import InfoExtractor -from ..compat import compat_struct_pack +from ..compat import compat_HTTPError from ..utils import ( - determine_ext, - error_to_compat_str, + age_restricted, + clean_html, ExtractorError, int_or_none, - mimetype2ext, OnDemandPagedList, - parse_iso8601, - sanitized_Request, - str_to_int, try_get, unescapeHTML, - update_url_query, - url_or_none, urlencode_postdata, ) class DailymotionBaseInfoExtractor(InfoExtractor): + _FAMILY_FILTER = None + _HEADERS = { + 'Content-Type': 'application/json', + 'Origin': 'https://www.dailymotion.com', + } + _NETRC_MACHINE = 'dailymotion' + + def _get_dailymotion_cookies(self): + return self._get_cookies('https://www.dailymotion.com/') + @staticmethod - def _build_request(url): - """Build a request with the family filter disabled""" - request = sanitized_Request(url) - request.add_header('Cookie', 'family_filter=off; ff=off') - return request + def _get_cookie_value(cookies, name): + cookie = cookies.get('name') + if cookie: + return cookie.value - def _download_webpage_handle_no_ff(self, url, *args, **kwargs): - request = self._build_request(url) - return self._download_webpage_handle(request, *args, **kwargs) + def _set_dailymotion_cookie(self, name, value): + self._set_cookie('www.dailymotion.com', name, value) - def _download_webpage_no_ff(self, url, *args, **kwargs): - request = self._build_request(url) - return self._download_webpage(request, *args, **kwargs) + def _real_initialize(self): + cookies = self._get_dailymotion_cookies() + ff = self._get_cookie_value(cookies, 'ff') + self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit')) + self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off') + + def _call_api(self, object_type, xid, object_fields, note, filter_extra=None): + if not self._HEADERS.get('Authorization'): + cookies = self._get_dailymotion_cookies() + token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token') + if not token: + data = { + 'client_id': 'f1a362d288c1b98099c7', + 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5', + } + username, password = self._get_login_info() + if username: + data.update({ + 'grant_type': 'password', + 'password': password, + 'username': username, + }) + else: + data['grant_type'] = 'client_credentials' + try: + token = self._download_json( + 'https://graphql.api.dailymotion.com/oauth/token', + None, 'Downloading Access Token', + data=urlencode_postdata(data))['access_token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + raise ExtractorError(self._parse_json( + e.cause.read().decode(), xid)['error_description'], expected=True) + raise + self._set_dailymotion_cookie('access_token' if username else 'client_token', token) + self._HEADERS['Authorization'] = 'Bearer ' + token + + resp = self._download_json( + 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({ + 'query': '''{ + %s(xid: "%s"%s) { + %s + } +}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields), + }).encode(), headers=self._HEADERS) + obj = resp['data'][object_type] + if not obj: + raise ExtractorError(resp['errors'][0]['message'], expected=True) + return obj class DailymotionIE(DailymotionBaseInfoExtractor): @@ -54,18 +97,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| (?:www\.)?lequipe\.fr/video ) - /(?P[^/?_]+) + /(?P[^/?_]+)(?:.+?\bplaylist=(?Px[0-9a-z]+))? ''' IE_NAME = 'dailymotion' - - _FORMATS = [ - ('stream_h264_ld_url', 'ld'), - ('stream_h264_url', 'standard'), - ('stream_h264_hq_url', 'hq'), - ('stream_h264_hd_url', 'hd'), - ('stream_h264_hd1080_url', 'hd180'), - ] - _TESTS = [{ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', 'md5': '074b95bdee76b9e3654137aee9c79dfe', @@ -74,7 +108,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', - 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', 'duration': 187, 'timestamp': 1493651285, 'upload_date': '20170501', @@ -146,7 +179,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor): }, { 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', 'only_matching': True, + }, { + 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw', + 'only_matching': True, }] + _GEO_BYPASS = False + _COMMON_MEDIA_FIELDS = '''description + geoblockedCountries { + allowed + } + xid''' @staticmethod def _extract_urls(webpage): @@ -162,264 +204,140 @@ class DailymotionIE(DailymotionBaseInfoExtractor): return urls def _real_extract(self, url): - video_id = self._match_id(url) + video_id, playlist_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage_no_ff( - 'https://www.dailymotion.com/video/%s' % video_id, video_id) + if playlist_id: + if not self._downloader.params.get('noplaylist'): + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + 'http://www.dailymotion.com/playlist/' + playlist_id, + 'DailymotionPlaylist', playlist_id) + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - age_limit = self._rta_search(webpage) - - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'description', webpage, 'description') - - view_count_str = self._search_regex( - (r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"', - r'video_views_count[^>]+>\s+([\s\d\,.]+)'), - webpage, 'view count', default=None) - if view_count_str: - view_count_str = re.sub(r'\s', '', view_count_str) - view_count = str_to_int(view_count_str) - comment_count = int_or_none(self._search_regex( - r']+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', - webpage, 'comment count', default=None)) - - player_v5 = self._search_regex( - [r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826 - r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);', - r'var\s+config\s*=\s*({.+?});', - # New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580) - r'__PLAYER_CONFIG__\s*=\s*({.+?});'], - webpage, 'player v5', default=None) - if player_v5: - player = self._parse_json(player_v5, video_id, fatal=False) or {} - metadata = try_get(player, lambda x: x['metadata'], dict) - if not metadata: - metadata_url = url_or_none(try_get( - player, lambda x: x['context']['metadata_template_url1'])) - if metadata_url: - metadata_url = metadata_url.replace(':videoId', video_id) - else: - metadata_url = update_url_query( - 'https://www.dailymotion.com/player/metadata/video/%s' - % video_id, { - 'embedder': url, - 'integration': 'inline', - 'GK_PV5_NEON': '1', - }) - metadata = self._download_json( - metadata_url, video_id, 'Downloading metadata JSON') - - if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': - password = self._downloader.params.get('videopassword') - if password: - r = int(metadata['id'][1:], 36) - us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=') - t = ''.join(random.choice(string.ascii_letters) for i in range(10)) - n = us64e(compat_struct_pack('I', r)) - i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest()) - metadata = self._download_json( - 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id) - - self._check_error(metadata) - - formats = [] - for quality, media_list in metadata['qualities'].items(): - for media in media_list: - media_url = media.get('url') - if not media_url: - continue - type_ = media.get('type') - if type_ == 'application/vnd.lumberjack.manifest': - continue - ext = mimetype2ext(type_) or determine_ext(media_url) - if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - media_url, video_id, 'mp4', preference=-1, - m3u8_id='hls', fatal=False) - for f in m3u8_formats: - f['url'] = f['url'].split('#')[0] - formats.append(f) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - media_url, video_id, preference=-1, f4m_id='hds', fatal=False)) - else: - f = { - 'url': media_url, - 'format_id': 'http-%s' % quality, - 'ext': ext, - } - m = re.search(r'H264-(?P\d+)x(?P\d+)', media_url) - if m: - f.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - formats.append(f) - self._sort_formats(formats) - - title = metadata['title'] - duration = int_or_none(metadata.get('duration')) - timestamp = int_or_none(metadata.get('created_time')) - thumbnail = metadata.get('poster_url') - uploader = metadata.get('owner', {}).get('screenname') - uploader_id = metadata.get('owner', {}).get('id') - - subtitles = {} - subtitles_data = metadata.get('subtitles', {}).get('data', {}) - if subtitles_data and isinstance(subtitles_data, dict): - for subtitle_lang, subtitle in subtitles_data.items(): - subtitles[subtitle_lang] = [{ - 'ext': determine_ext(subtitle_url), - 'url': subtitle_url, - } for subtitle_url in subtitle.get('urls', [])] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'age_limit': age_limit, - 'view_count': view_count, - 'comment_count': comment_count, - 'formats': formats, - 'subtitles': subtitles, - } - - # vevo embed - vevo_id = self._search_regex( - r'[\w]*)', - webpage, 'vevo embed', default=None) - if vevo_id: - return self.url_result('vevo:%s' % vevo_id, 'Vevo') - - # fallback old player - embed_page = self._download_webpage_no_ff( - 'https://www.dailymotion.com/embed/video/%s' % video_id, - video_id, 'Downloading embed page') - - timestamp = parse_iso8601(self._html_search_meta( - 'video:release_date', webpage, 'upload date')) - - info = self._parse_json( - self._search_regex( - r'var info = ({.*?}),$', embed_page, - 'video info', flags=re.MULTILINE), - video_id) - - self._check_error(info) - - formats = [] - for (key, format_id) in self._FORMATS: - video_url = info.get(key) - if video_url is not None: - m_size = re.search(r'H264-(\d+)x(\d+)', video_url) - if m_size is not None: - width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) - else: - width, height = None, None - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'format_id': format_id, - 'width': width, - 'height': height, - }) - self._sort_formats(formats) - - # subtitles - video_subtitles = self.extract_subtitles(video_id, webpage) - - title = self._og_search_title(webpage, default=None) - if title is None: - title = self._html_search_regex( - r'(?s)]*>(.*?)', webpage, - 'title') - - return { - 'id': video_id, - 'formats': formats, - 'uploader': info['owner.screenname'], - 'timestamp': timestamp, - 'title': title, - 'description': description, - 'subtitles': video_subtitles, - 'thumbnail': info['thumbnail_url'], - 'age_limit': age_limit, - 'view_count': view_count, - 'duration': info['duration'] + password = self._downloader.params.get('videopassword') + media = self._call_api( + 'media', video_id, '''... on Video { + %s + stats { + likes { + total } + views { + total + } + } + } + ... on Live { + %s + audienceCount + isOnAir + }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata', + 'password: "%s"' % self._downloader.params.get('videopassword') if password else None) + xid = media['xid'] - def _check_error(self, info): - error = info.get('error') + metadata = self._download_json( + 'https://www.dailymotion.com/player/metadata/video/' + xid, + xid, 'Downloading metadata JSON', + query={'app': 'com.dailymotion.neon'}) + + error = metadata.get('error') if error: - title = error.get('title') or error['message'] + title = error.get('title') or error['raw_message'] # See https://developer.dailymotion.com/api#access-error if error.get('code') == 'DM007': - self.raise_geo_restricted(msg=title) + allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list) + self.raise_geo_restricted(msg=title, countries=allowed_countries) raise ExtractorError( '%s said: %s' % (self.IE_NAME, title), expected=True) - def _get_subtitles(self, video_id, webpage): - try: - sub_list = self._download_webpage( - 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, - video_id, note=False) - except ExtractorError as err: - self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err)) - return {} - info = json.loads(sub_list) - if (info['total'] > 0): - sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) - return sub_lang_list - self._downloader.report_warning('video doesn\'t have subtitles') - return {} + title = metadata['title'] + is_live = media.get('isOnAir') + formats = [] + for quality, media_list in metadata['qualities'].items(): + for m in media_list: + media_url = m.get('url') + media_type = m.get('type') + if not media_url or media_type == 'application/vnd.lumberjack.manifest': + continue + if media_type == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', + 'm3u8' if is_live else 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + f = { + 'url': media_url, + 'format_id': 'http-' + quality, + } + m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url) + if m: + width, height, fps = map(int_or_none, m.groups()) + f.update({ + 'fps': fps, + 'height': height, + 'width': width, + }) + formats.append(f) + for f in formats: + f['url'] = f['url'].split('#')[0] + if not f.get('fps') and f['format_id'].endswith('@60'): + f['fps'] = 60 + self._sort_formats(formats) + + subtitles = {} + subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {} + for subtitle_lang, subtitle in subtitles_data.items(): + subtitles[subtitle_lang] = [{ + 'url': subtitle_url, + } for subtitle_url in subtitle.get('urls', [])] + + thumbnails = [] + for height, poster_url in metadata.get('posters', {}).items(): + thumbnails.append({ + 'height': int_or_none(height), + 'id': height, + 'url': poster_url, + }) + + owner = metadata.get('owner') or {} + stats = media.get('stats') or {} + get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total'])) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': clean_html(media.get('description')), + 'thumbnails': thumbnails, + 'duration': int_or_none(metadata.get('duration')) or None, + 'timestamp': int_or_none(metadata.get('created_time')), + 'uploader': owner.get('screenname'), + 'uploader_id': owner.get('id') or metadata.get('screenname'), + 'age_limit': 18 if metadata.get('explicit') else 0, + 'tags': metadata.get('tags'), + 'view_count': get_count('view') or int_or_none(media.get('audienceCount')), + 'like_count': get_count('like'), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + } -class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): - IE_NAME = 'dailymotion:playlist' - _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?Px[0-9a-z]+)' - _TESTS = [{ - 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', - 'info_dict': { - 'title': 'SPORT', - 'id': 'xv4bw', - }, - 'playlist_mincount': 20, - }] +class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor): _PAGE_SIZE = 100 - def _fetch_page(self, playlist_id, authorizaion, page): + def _fetch_page(self, playlist_id, page): page += 1 - videos = self._download_json( - 'https://graphql.api.dailymotion.com', - playlist_id, 'Downloading page %d' % page, - data=json.dumps({ - 'query': '''{ - collection(xid: "%s") { - videos(first: %d, page: %d) { - pageInfo { - hasNextPage - nextPage - } + videos = self._call_api( + self._OBJECT_TYPE, playlist_id, + '''videos(allowExplicit: %s, first: %d, page: %d) { edges { node { xid url } } - } - } -}''' % (playlist_id, self._PAGE_SIZE, page) - }).encode(), headers={ - 'Authorization': authorizaion, - 'Origin': 'https://www.dailymotion.com', - })['data']['collection']['videos'] + }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page), + 'Downloading page %d' % page)['videos'] for edge in videos['edges']: node = edge['node'] yield self.url_result( @@ -427,86 +345,49 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - api = self._parse_json(self._search_regex( - r'__PLAYER_CONFIG__\s*=\s*({.+?});', - webpage, 'player config'), playlist_id)['context']['api'] - auth = self._download_json( - api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'), - playlist_id, data=urlencode_postdata({ - 'client_id': api.get('client_id', 'f1a362d288c1b98099c7'), - 'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'), - 'grant_type': 'client_credentials', - })) - authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token']) entries = OnDemandPagedList(functools.partial( - self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE) + self._fetch_page, playlist_id), self._PAGE_SIZE) return self.playlist_result( - entries, playlist_id, - self._og_search_title(webpage)) + entries, playlist_id) -class DailymotionUserIE(DailymotionBaseInfoExtractor): +class DailymotionPlaylistIE(DailymotionPlaylistBaseIE): + IE_NAME = 'dailymotion:playlist' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?Px[0-9a-z]+)' + _TESTS = [{ + 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', + 'info_dict': { + 'id': 'xv4bw', + }, + 'playlist_mincount': 20, + }] + _OBJECT_TYPE = 'collection' + + +class DailymotionUserIE(DailymotionPlaylistBaseIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P[^/]+)' - _MORE_PAGES_INDICATOR = r'(?s)
    .*?[^/]+)' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', 'info_dict': { 'id': 'nqtv', - 'title': 'Rémi Gaillard', }, - 'playlist_mincount': 100, + 'playlist_mincount': 152, }, { 'url': 'http://www.dailymotion.com/user/UnderProject', 'info_dict': { 'id': 'UnderProject', - 'title': 'UnderProject', }, - 'playlist_mincount': 1800, - 'expected_warnings': [ - 'Stopped at duplicated page', - ], + 'playlist_mincount': 1000, 'skip': 'Takes too long time', + }, { + 'url': 'https://www.dailymotion.com/user/nqtv', + 'info_dict': { + 'id': 'nqtv', + }, + 'playlist_mincount': 148, + 'params': { + 'age_limit': 0, + }, }] - - def _extract_entries(self, id): - video_ids = set() - processed_urls = set() - for pagenum in itertools.count(1): - page_url = self._PAGE_TEMPLATE % (id, pagenum) - webpage, urlh = self._download_webpage_handle_no_ff( - page_url, id, 'Downloading page %s' % pagenum) - if urlh.geturl() in processed_urls: - self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( - page_url, urlh.geturl()), id) - break - - processed_urls.add(urlh.geturl()) - - for video_id in re.findall(r'data-xid="(.+?)"', webpage): - if video_id not in video_ids: - yield self.url_result( - 'http://www.dailymotion.com/video/%s' % video_id, - DailymotionIE.ie_key(), video_id) - video_ids.add(video_id) - - if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: - break - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user = mobj.group('user') - webpage = self._download_webpage( - 'https://www.dailymotion.com/user/%s' % user, user) - full_user = unescapeHTML(self._html_search_regex( - r'' % re.escape(user), - webpage, 'user')) - - return { - '_type': 'playlist', - 'id': user, - 'title': full_user, - 'entries': self._extract_entries(user), - } + _OBJECT_TYPE = 'channel' diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 195875938..a5e4a3e67 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -216,8 +216,7 @@ class VKIE(VKBaseIE): 'id': 'k3lz2cmXyRuJQSjGHUv', 'ext': 'mp4', 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', - # TODO: fix test by fixing dailymotion description extraction - 'description': 'md5:c651358f03c56f1150b555c26d90a0fd', + 'description': 'md5:424b8e88cc873217f520e582ba28bb36', 'uploader': 'AniLibria.Tv', 'upload_date': '20160914', 'uploader_id': 'x1p5vl5', From 6471d0d3b8086b282622c84a9eea968d4edfcf9b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 26 Nov 2019 23:57:37 +0100 Subject: [PATCH 707/785] [openload] remove OpenLoad related extractors(closes #11999)(closes #15406) --- youtube_dl/extractor/extractors.py | 5 - youtube_dl/extractor/generic.py | 16 -- youtube_dl/extractor/openload.py | 263 ----------------------------- youtube_dl/extractor/streamango.py | 128 -------------- 4 files changed, 412 deletions(-) delete mode 100644 youtube_dl/extractor/streamango.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cf4bb8f20..0e349b778 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -796,10 +796,6 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) -from .openload import ( - OpenloadIE, - VerystreamIE, -) from .ora import OraTVIE from .orf import ( ORFTVthekIE, @@ -1060,7 +1056,6 @@ from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamable import StreamableIE -from .streamango import StreamangoIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3d919f656..743ef47db 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -88,10 +88,6 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE -from .openload import ( - OpenloadIE, - VerystreamIE, -) from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE @@ -3048,18 +3044,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) - # Look for Openload embeds - openload_urls = OpenloadIE._extract_urls(webpage) - if openload_urls: - return self.playlist_from_matches( - openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) - - # Look for Verystream embeds - verystream_urls = VerystreamIE._extract_urls(webpage) - if verystream_urls: - return self.playlist_from_matches( - verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key()) - # Look for VideoPress embeds videopress_urls = VideoPressIE._extract_urls(webpage) if videopress_urls: diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 66e38cdb4..0c20d0177 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -3,21 +3,17 @@ from __future__ import unicode_literals import json import os -import re import subprocess import tempfile -from .common import InfoExtractor from ..compat import ( compat_urlparse, compat_kwargs, ) from ..utils import ( check_executable, - determine_ext, encodeArgument, ExtractorError, - get_element_by_id, get_exe_version, is_outdated_version, std_headers, @@ -240,262 +236,3 @@ class PhantomJSwrapper(object): self._load_cookies() return (html, encodeArgument(out)) - - -class OpenloadIE(InfoExtractor): - _DOMAINS = r''' - (?: - openload\.(?:co|io|link|pw)| - oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)| - oladblock\.(?:services|xyz|me)|openloed\.co - ) - ''' - _VALID_URL = r'''(?x) - https?:// - (?P - (?:www\.)? - %s - )/ - (?:f|embed)/ - (?P[a-zA-Z0-9-_]+) - ''' % _DOMAINS - _EMBED_WORD = 'embed' - _STREAM_WORD = 'f' - _REDIR_WORD = 'stream' - _URL_IDS = ('streamurl', 'streamuri', 'streamurj') - _TESTS = [{ - 'url': 'https://openload.co/f/kUEfGclsU9o', - 'md5': 'bf1c059b004ebc7a256f89408e65c36e', - 'info_dict': { - 'id': 'kUEfGclsU9o', - 'ext': 'mp4', - 'title': 'skyrim_no-audio_1080.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'https://openload.co/embed/rjC09fkPLYs', - 'info_dict': { - 'id': 'rjC09fkPLYs', - 'ext': 'mp4', - 'title': 'movie.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'subtitles': { - 'en': [{ - 'ext': 'vtt', - }], - }, - }, - 'params': { - 'skip_download': True, # test subtitles only - }, - }, { - 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', - 'only_matching': True, - }, { - 'url': 'https://openload.io/f/ZAn6oz-VZGE/', - 'only_matching': True, - }, { - 'url': 'https://openload.co/f/_-ztPaZtMhM/', - 'only_matching': True, - }, { - # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout - # for title and ext - 'url': 'https://openload.co/embed/Sxz5sADo82g/', - 'only_matching': True, - }, { - # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available - # via https://openload.co/f/e-Ixz9ZR5L0/ - 'url': 'https://openload.co/f/e-Ixz9ZR5L0/', - 'only_matching': True, - }, { - 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', - 'only_matching': True, - }, { - 'url': 'http://www.openload.link/f/KnG-kKZdcfY', - 'only_matching': True, - }, { - 'url': 'https://oload.stream/f/KnG-kKZdcfY', - 'only_matching': True, - }, { - 'url': 'https://oload.xyz/f/WwRBpzW8Wtk', - 'only_matching': True, - }, { - 'url': 'https://oload.win/f/kUEfGclsU9o', - 'only_matching': True, - }, { - 'url': 'https://oload.download/f/kUEfGclsU9o', - 'only_matching': True, - }, { - 'url': 'https://oload.cloud/f/4ZDnBXRWiB8', - 'only_matching': True, - }, { - # Its title has not got its extension but url has it - 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', - 'only_matching': True, - }, { - 'url': 'https://oload.cc/embed/5NEAbI2BDSk', - 'only_matching': True, - }, { - 'url': 'https://oload.icu/f/-_i4y_F_Hs8', - 'only_matching': True, - }, { - 'url': 'https://oload.fun/f/gb6G1H4sHXY', - 'only_matching': True, - }, { - 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', - 'only_matching': True, - }, { - 'url': 'https://oload.info/f/5NEAbI2BDSk', - 'only_matching': True, - }, { - 'url': 'https://openload.pw/f/WyKgK8s94N0', - 'only_matching': True, - }, { - 'url': 'https://oload.pw/f/WyKgK8s94N0', - 'only_matching': True, - }, { - 'url': 'https://oload.live/f/-Z58UZ-GR4M', - 'only_matching': True, - }, { - 'url': 'https://oload.space/f/IY4eZSst3u8/', - 'only_matching': True, - }, { - 'url': 'https://oload.services/embed/bs1NWj1dCag/', - 'only_matching': True, - }, { - 'url': 'https://oload.online/f/W8o2UfN1vNY/', - 'only_matching': True, - }, { - 'url': 'https://oload.monster/f/W8o2UfN1vNY/', - 'only_matching': True, - }, { - 'url': 'https://oload.press/embed/drTBl1aOTvk/', - 'only_matching': True, - }, { - 'url': 'https://oload.website/embed/drTBl1aOTvk/', - 'only_matching': True, - }, { - 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', - 'only_matching': True, - }, { - 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', - 'only_matching': True, - }, { - 'url': 'https://oload.best/embed/kkz9JgVZeWc/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.me/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://openloed.co/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oload.vip/f/kUEfGclsU9o', - 'only_matching': True, - }] - - @classmethod - def _extract_urls(cls, webpage): - return re.findall( - r'(?x)]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' - % (cls._DOMAINS, cls._EMBED_WORD), webpage) - - def _extract_decrypted_page(self, page_url, webpage, video_id): - phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) - return webpage - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - video_id = mobj.group('id') - - url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - - for path in (self._EMBED_WORD, self._STREAM_WORD): - page_url = url_pattern % path - last = path == self._STREAM_WORD - webpage = self._download_webpage( - page_url, video_id, 'Downloading %s webpage' % path, - fatal=last) - if not webpage: - continue - if 'File not found' in webpage or 'deleted by the owner' in webpage: - if not last: - continue - raise ExtractorError('File not found', expected=True, video_id=video_id) - break - - webpage = self._extract_decrypted_page(page_url, webpage, video_id) - for element_id in self._URL_IDS: - decoded_id = get_element_by_id(element_id, webpage) - if decoded_id: - break - if not decoded_id: - decoded_id = self._search_regex( - (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', - r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', - r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, - 'stream URL') - video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id) - - title = self._og_search_title(webpage, default=None) or self._search_regex( - r']+class=["\']title["\'][^>]*>([^<]+)', webpage, - 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', fatal=True) - - entries = self._parse_html5_media_entries(page_url, webpage, video_id) - entry = entries[0] if entries else {} - subtitles = entry.get('subtitles') - - return { - 'id': video_id, - 'title': title, - 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), - 'url': video_url, - 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), - 'subtitles': subtitles, - } - - -class VerystreamIE(OpenloadIE): - IE_NAME = 'verystream' - - _DOMAINS = r'(?:verystream\.com|woof\.tube)' - _VALID_URL = r'''(?x) - https?:// - (?P - (?:www\.)? - %s - )/ - (?:stream|e)/ - (?P[a-zA-Z0-9-_]+) - ''' % _DOMAINS - _EMBED_WORD = 'e' - _STREAM_WORD = 'stream' - _REDIR_WORD = 'gettoken' - _URL_IDS = ('videolink', ) - _TESTS = [{ - 'url': 'https://verystream.com/stream/c1GWQ9ngBBx/', - 'md5': 'd3e8c5628ccb9970b65fd65269886795', - 'info_dict': { - 'id': 'c1GWQ9ngBBx', - 'ext': 'mp4', - 'title': 'Big Buck Bunny.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'https://verystream.com/e/c1GWQ9ngBBx/', - 'only_matching': True, - }] - - def _extract_decrypted_page(self, page_url, webpage, video_id): - return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py deleted file mode 100644 index f1e17dd88..000000000 --- a/youtube_dl/extractor/streamango.py +++ /dev/null @@ -1,128 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_chr -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, -) - - -class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', - 'md5': 'e992787515a182f55e38fc97588d802a', - 'info_dict': { - 'id': 'clapasobsptpkdfe', - 'ext': 'mp4', - 'title': '20170315_150006.mp4', - } - }, { - # no og:title - 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4', - 'info_dict': { - 'id': 'foqebrpftarclpob', - 'ext': 'mp4', - 'title': 'foqebrpftarclpob', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'gone', - }, { - 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', - 'only_matching': True, - }, { - 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', - 'only_matching': True, - }, { - 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', - 'only_matching': True, - }] - - def _real_extract(self, url): - def decrypt_src(encoded, val): - ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA' - encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded) - decoded = '' - sm = [None] * 4 - i = 0 - str_len = len(encoded) - while i < str_len: - for j in range(4): - sm[j % 4] = ALPHABET.index(encoded[i]) - i += 1 - char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val - decoded += compat_chr(char_code) - if sm[2] != 0x40: - char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2) - decoded += compat_chr(char_code) - if sm[3] != 0x40: - char_code = ((sm[2] & 0x3) << 0x6) | sm[3] - decoded += compat_chr(char_code) - return decoded - - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage, default=video_id) - - formats = [] - for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage): - mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_) - if mobj is None: - continue - - format_ = format_.replace(mobj.group(0), '') - - video = self._parse_json( - format_, video_id, transform_source=js_to_json, - fatal=False) or {} - - mobj = re.search( - r'([\'"])(?P(?:(?!\1).)+)\1\s*,\s*(?P\d+)', - mobj.group(1)) - if mobj is None: - continue - - src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val'))) - if not src: - continue - - ext = determine_ext(src, default_ext=None) - if video.get('type') == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'url': src, - 'ext': ext or 'mp4', - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'tbr': int_or_none(video.get('bitrate')), - }) - - if not formats: - error = self._search_regex( - r']+\bclass=["\']lead[^>]+>(.+?)

    ', webpage, - 'error', default=None) - if not error and '>Sorry' in webpage: - error = 'Video %s is not available' % video_id - if error: - raise ExtractorError(error, expected=True) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'url': url, - 'title': title, - 'formats': formats, - } From 681ac7c92abbbd55be9796de86c2cc0d1d70a4c9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 27 Nov 2019 13:57:30 +0100 Subject: [PATCH 708/785] [vimeo] improve extraction - fix review extraction - fix ondemand extraction - make password protected player case as an expected error(closes #22896) - simplify channel based extractors code --- youtube_dl/extractor/vimeo.py | 177 +++++++++++++++++----------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 9abd59d98..baa46d5f3 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -15,18 +15,20 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + clean_html, determine_ext, + dict_get, ExtractorError, js_to_json, int_or_none, merge_dicts, - NO_DEFAULT, OnDemandPagedList, parse_filesize, RegexNotFoundError, sanitized_Request, smuggle_url, std_headers, + str_or_none, try_get, unified_timestamp, unsmuggle_url, @@ -210,7 +212,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): video_uploader_url = owner.get('url') return { - 'id': video_id, + 'id': str_or_none(video_data.get('id')) or video_id, 'title': self._live_title(video_title) if is_live else video_title, 'uploader': owner.get('name'), 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, @@ -258,11 +260,11 @@ class VimeoIE(VimeoBaseInfoExtractor): (?: (?: www| - (?Pplayer) + player ) \. )? - vimeo(?Ppro)?\.com/ + vimeo(?:pro)?\.com/ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: @@ -284,7 +286,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '56015672', 'ext': 'mp4', 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", - 'description': 'md5:509a9ad5c9bf97c60faee9203aca4479', + 'description': 'md5:2d3305bad981a06ff79f027f19865021', 'timestamp': 1355990239, 'upload_date': '20121220', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', @@ -293,6 +295,9 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 10, 'license': 'by-sa', }, + 'params': { + 'format': 'best[protocol=https]', + }, }, { 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', @@ -305,8 +310,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', - 'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30', + 'description': 'md5:2c362968038d4499f4d79f88458590c1', 'duration': 1595, + 'upload_date': '20130610', + 'timestamp': 1370893156, + }, + 'params': { + 'format': 'best[protocol=https]', }, }, { @@ -323,6 +333,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 3610, 'description': None, }, + 'params': { + 'format': 'best[protocol=https]', + }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://vimeo.com/68375962', @@ -341,6 +355,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', }, 'params': { + 'format': 'best[protocol=https]', 'videopassword': 'youtube-dl', }, }, @@ -441,10 +456,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': '10Ft Films', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms', 'uploader_id': 'tenfootfilms', + 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384', + 'upload_date': '20130830', + 'timestamp': 1377853339, }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://player.vimeo.com/video/68375962', @@ -459,6 +478,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 10, }, 'params': { + 'format': 'best[protocol=https]', 'videopassword': 'youtube-dl', }, }, @@ -523,7 +543,7 @@ class VimeoIE(VimeoBaseInfoExtractor): def _verify_player_video_password(self, url, video_id, headers): password = self._downloader.params.get('videopassword') if password is None: - raise ExtractorError('This video is protected by a password, use the --video-password option') + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) data = urlencode_postdata({ 'password': base64.b64encode(password.encode()), }) @@ -552,28 +572,26 @@ class VimeoIE(VimeoBaseInfoExtractor): r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) # Extract ID from URL - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) orig_url = url - if mobj.group('pro'): + is_pro = 'vimeopro.com/' in url + is_player = '://player.vimeo.com/video/' in url + if is_pro: # some videos require portfolio_id to be present in player url # https://github.com/ytdl-org/youtube-dl/issues/20070 url = self._extract_url(url, self._download_webpage(url, video_id)) - elif mobj.group('player'): + if not url: + url = 'https://vimeo.com/' + video_id + elif is_player: url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id - # Retrieve video webpage to extract further information - request = sanitized_Request(url, headers=headers) try: - webpage, urlh = self._download_webpage_handle(request, video_id) + # Retrieve video webpage to extract further information + webpage, urlh = self._download_webpage_handle( + url, video_id, headers=headers) redirect_url = compat_str(urlh.geturl()) - # Some URLs redirect to ondemand can't be extracted with - # this extractor right away thus should be passed through - # ondemand extractor (e.g. https://vimeo.com/73445910) - if VimeoOndemandIE.suitable(redirect_url): - return self.url_result(redirect_url, VimeoOndemandIE.ie_key()) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: errmsg = ee.cause.read() @@ -600,6 +618,7 @@ class VimeoIE(VimeoBaseInfoExtractor): cc_license = None timestamp = None + video_description = None # Extract the config JSON try: @@ -611,17 +630,17 @@ class VimeoIE(VimeoBaseInfoExtractor): # Sometimes new react-based page is served instead of old one that require # different config URL extraction approach (see # https://github.com/ytdl-org/youtube-dl/pull/7209) - vimeo_clip_page_config = self._search_regex( - r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, - 'vimeo clip page config') - page_config = self._parse_json(vimeo_clip_page_config, video_id) + page_config = self._parse_json(self._search_regex( + r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', + webpage, 'page config'), video_id) config_url = page_config['player']['config_url'] cc_license = page_config.get('cc_license') timestamp = try_get( page_config, lambda x: x['clip']['uploaded_on'], compat_str) - config_json = self._download_webpage(config_url, video_id) - config = json.loads(config_json) + video_description = clean_html(dict_get( + page_config, ('description', 'description_html_escaped'))) + config = self._download_json(config_url, video_id) except RegexNotFoundError: # For pro videos or player.vimeo.com urls # We try to find out to which variable is assigned the config dic @@ -675,14 +694,14 @@ class VimeoIE(VimeoBaseInfoExtractor): {'force_feature_id': True}), 'Vimeo') # Extract video description - - video_description = self._html_search_regex( - r'(?s)]*>(.*?)
    ', - webpage, 'description', default=None) + if not video_description: + video_description = self._html_search_regex( + r'(?s)]*>(.*?)
    ', + webpage, 'description', default=None) if not video_description: video_description = self._html_search_meta( 'description', webpage, default=None) - if not video_description and mobj.group('pro'): + if not video_description and is_pro: orig_webpage = self._download_webpage( orig_url, video_id, note='Downloading webpage for description', @@ -690,7 +709,7 @@ class VimeoIE(VimeoBaseInfoExtractor): if orig_webpage: video_description = self._html_search_meta( 'description', orig_webpage, default=None) - if not video_description and not mobj.group('player'): + if not video_description and not is_player: self._downloader.report_warning('Cannot find video description') # Extract upload date @@ -747,9 +766,9 @@ class VimeoIE(VimeoBaseInfoExtractor): return info_dict -class VimeoOndemandIE(VimeoBaseInfoExtractor): +class VimeoOndemandIE(VimeoIE): IE_NAME = 'vimeo:ondemand' - _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P[^/?#&]+)' _TESTS = [{ # ondemand video not available via https://vimeo.com/id 'url': 'https://vimeo.com/ondemand/20704', @@ -761,24 +780,32 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader': 'גם סרטים', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', + 'description': 'md5:4c027c965e439de4baab621e48b60791', + 'upload_date': '20140906', + 'timestamp': 1410032453, }, 'params': { 'format': 'best[protocol=https]', }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { # requires Referer to be passed along with og:video:url 'url': 'https://vimeo.com/ondemand/36938/126682985', 'info_dict': { - 'id': '126682985', + 'id': '126584684', 'ext': 'mp4', 'title': 'Rävlock, rätt läte på rätt plats', 'uploader': 'Lindroth & Norin', - 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847', - 'uploader_id': 'user14430847', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin', + 'uploader_id': 'lindrothnorin', + 'description': 'md5:c3c46a90529612c8279fb6af803fc0df', + 'upload_date': '20150502', + 'timestamp': 1430586422, }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'https://vimeo.com/ondemand/nazmaalik', 'only_matching': True, @@ -790,16 +817,6 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - return self.url_result( - # Some videos require Referer to be passed along with og:video:url - # similarly to generic vimeo embeds (e.g. - # https://vimeo.com/ondemand/36938/126682985). - VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url), - VimeoIE.ie_key()) - class VimeoChannelIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:channel' @@ -815,6 +832,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): }, 'playlist_mincount': 25, }] + _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s' def _page_url(self, base_url, pagenum): return '%s/videos/page:%d/' % (base_url, pagenum) @@ -886,14 +904,13 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return self.playlist_result(title_and_entries, list_id, list_title) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id) + channel_id = self._match_id(url) + return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id) class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ 'url': 'https://vimeo.com/nkistudio/videos', @@ -903,11 +920,7 @@ class VimeoUserIE(VimeoChannelIE): }, 'playlist_mincount': 66, }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - return self._extract_videos(name, 'https://vimeo.com/%s' % name) + _BASE_URL_TEMPL = 'https://vimeo.com/%s' class VimeoAlbumIE(VimeoChannelIE): @@ -969,25 +982,18 @@ class VimeoAlbumIE(VimeoChannelIE): r'\s*(.+?)(?:\s+on Vimeo)?', webpage, 'title', fatal=False)) -class VimeoGroupsIE(VimeoAlbumIE): +class VimeoGroupsIE(VimeoChannelIE): IE_NAME = 'vimeo:group' - _VALID_URL = r'https://vimeo\.com/groups/(?P[^/]+)(?:/(?!videos?/\d+)|$)' + _VALID_URL = r'https://vimeo\.com/groups/(?P[^/]+)(?:/(?!videos?/\d+)|$)' _TESTS = [{ - 'url': 'https://vimeo.com/groups/rolexawards', + 'url': 'https://vimeo.com/groups/kattykay', 'info_dict': { - 'id': 'rolexawards', - 'title': 'Rolex Awards for Enterprise', + 'id': 'kattykay', + 'title': 'Katty Kay', }, - 'playlist_mincount': 73, + 'playlist_mincount': 27, }] - - def _extract_list_title(self, webpage): - return self._og_search_title(webpage, fatal=False) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name) + _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s' class VimeoReviewIE(VimeoBaseInfoExtractor): @@ -1003,7 +1009,9 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'title': "DICK HARDWICK 'Comedian'", 'uploader': 'Richard Hardwick', 'uploader_id': 'user21297594', - } + 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks", + }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'note': 'video player needs Referer', 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', @@ -1016,7 +1024,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'duration': 2773, 'thumbnail': r're:^https?://.*\.jpg$', 'uploader_id': 'user22258446', - } + }, + 'skip': 'video gone', }, { 'note': 'Password protected', 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', @@ -1036,32 +1045,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _real_initialize(self): self._login() - def _get_config_url(self, webpage_url, video_id, video_password_verified=False): - webpage = self._download_webpage(webpage_url, video_id) - config_url = self._html_search_regex( - r'data-config-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'config URL', default=None, group='url') - if not config_url: - data = self._parse_json(self._search_regex( - r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', - default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config = data.get('vimeo_esi', {}).get('config', {}) - config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl']) - if config_url is None: - self._verify_video_password(webpage_url, video_id, webpage) - config_url = self._get_config_url( - webpage_url, video_id, video_password_verified=True) - return config_url - def _real_extract(self, url): page_url, video_id = re.match(self._VALID_URL, url).groups() - config_url = self._get_config_url(url, video_id) + clip_data = self._download_json( + page_url.replace('/review/', '/review/data/'), + video_id)['clipData'] + config_url = clip_data['configUrl'] config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) - source_format = self._extract_original_format(page_url, video_id) + source_format = self._extract_original_format( + page_url + '/action', video_id) if source_format: info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) + info_dict['description'] = clean_html(clip_data.get('description')) return info_dict From e3f00f139fc227217325c8e84e0b340e12ee9bb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 28 Nov 2019 23:09:48 +0700 Subject: [PATCH 709/785] [ChangeLog] Actualize [ci skip] --- ChangeLog | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ChangeLog b/ChangeLog index daaff3eef..d724d75ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,36 @@ +version + +Core ++ [utils] Add generic caesar cipher and rot47 +* [utils] Handle rd-suffixed day parts in unified_strdate (#23199) + +Extractors +* [vimeo] Improve extraction + * Fix review extraction + * Fix ondemand extraction + * Make password protected player case as an expected error (#22896) + * Simplify channel based extractors code +- [openload] Remove extractor (#11999) +- [verystream] Remove extractor +- [streamango] Remove extractor (#15406) +* [dailymotion] Improve extraction + * Extract http formats included in m3u8 manifest + * Fix user extraction (#3553, #21415) + + Add suport for User Authentication (#11491) + * Fix password protected videos extraction (#23176) + * Respect age limit option and family filter cookie value (#18437) + * Handle video url playlist query param + * Report allowed countries for geo-restricted videos +* [corus] Improve extraction + + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com + and disneylachaine.ca (#20861) + + Add support for self hosted videos (#22075) + * Detect DRM protection (#14910, #9164) +* [vivo] Fix extraction (#22328, #22279) ++ [bitchute] Extract upload date (#22990, #23193) +* [soundcloud] Update client id (#23214) + + version 2019.11.22 Core From b568561eba6f4aceb87419e21aba11567c5de7da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 28 Nov 2019 23:25:25 +0700 Subject: [PATCH 710/785] release 2019.11.28 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 3 --- youtube_dl/version.py | 2 +- 8 files changed, 14 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d3e11cdcf..3a94bd621 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.22 + [debug] youtube-dl version 2019.11.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 51bf4db3b..72bee12aa 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 19025ff25..ddf67e951 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a381b6979..7122e2714 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.22 + [debug] youtube-dl version 2019.11.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 9c945d5ec..a93882b39 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index d724d75ce..d4f809fc6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.11.28 Core + [utils] Add generic caesar cipher and rot47 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3dcb026c5..2744dfca8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -618,7 +618,6 @@ - **OnionStudios** - **Ooyala** - **OoyalaExternal** - - **Openload** - **OraTV** - **orf:fm4**: radio FM4 - **orf:fm4:story**: fm4.orf.at stories @@ -825,7 +824,6 @@ - **Steam** - **Stitcher** - **Streamable** - - **Streamango** - **streamcloud.eu** - **StreamCZ** - **StreetVoice** @@ -976,7 +974,6 @@ - **Vbox7** - **VeeHD** - **Veoh** - - **verystream** - **Vesti**: Вести.Ru - **Vevo** - **VevoPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 361809681..1227abc0a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.11.22' +__version__ = '2019.11.28' From 348c6bf1c1a00eec323d6e21ff7b9b12699afe04 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:05:06 +0100 Subject: [PATCH 711/785] [utils] handle int values passed to str_to_int --- test/test_utils.py | 1 + youtube_dl/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index e83c8ea11..fed94a906 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -499,6 +499,7 @@ class TestUtil(unittest.TestCase): def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) + self.assertEqual(str_to_int(523), 523) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b14603d8a..328f037a8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3519,8 +3519,8 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if int_str is None: - return None + if not isinstance(int_str, compat_str): + return int_str int_str = re.sub(r'[,\.\+]', '', int_str) return int(int_str) From 7f641d2c7a68b70d6c1e273af108741e5779bc28 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:06:34 +0100 Subject: [PATCH 712/785] [adobetv] improve extaction - use OnDemandPagedList for list extractors - reduce show extraction requests - extract original video format and subtitles - add support for adobe tv embeds --- youtube_dl/extractor/adobetv.py | 239 ++++++++++++++++++++--------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 166 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 008c98e51..80060f037 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -1,25 +1,119 @@ from __future__ import unicode_literals +import functools import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_duration, - unified_strdate, - str_to_int, - int_or_none, float_or_none, + int_or_none, ISO639Utils, - determine_ext, + OnDemandPagedList, + parse_duration, + str_or_none, + str_to_int, + unified_strdate, ) class AdobeTVBaseIE(InfoExtractor): - _API_BASE_URL = 'http://tv.adobe.com/api/v4/' + def _call_api(self, path, video_id, query, note=None): + return self._download_json( + 'http://tv.adobe.com/api/v4/' + path, + video_id, note, query=query)['data'] + + def _parse_subtitles(self, video_data, url_key): + subtitles = {} + for translation in video_data.get('translations', []): + vtt_path = translation.get(url_key) + if not vtt_path: + continue + lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) + subtitles.setdefault(lang, []).append({ + 'ext': 'vtt', + 'url': vtt_path, + }) + return subtitles + + def _parse_video_data(self, video_data): + video_id = compat_str(video_data['id']) + title = video_data['title'] + + s3_extracted = False + formats = [] + for source in video_data.get('videos', []): + source_url = source.get('url') + if not source_url: + continue + f = { + 'format_id': source.get('quality_level'), + 'fps': int_or_none(source.get('frame_rate')), + 'height': int_or_none(source.get('height')), + 'tbr': int_or_none(source.get('video_data_rate')), + 'width': int_or_none(source.get('width')), + 'url': source_url, + } + original_filename = source.get('original_filename') + if original_filename: + if not (f.get('height') and f.get('width')): + mobj = re.search(r'_(\d+)x(\d+)', original_filename) + if mobj: + f.update({ + 'height': int(mobj.group(2)), + 'width': int(mobj.group(1)), + }) + if original_filename.startswith('s3://') and not s3_extracted: + formats.append({ + 'format_id': 'original', + 'preference': 1, + 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), + }) + s3_extracted = True + formats.append(f) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnail'), + 'upload_date': unified_strdate(video_data.get('start_date')), + 'duration': parse_duration(video_data.get('duration')), + 'view_count': str_to_int(video_data.get('playcount')), + 'formats': formats, + 'subtitles': self._parse_subtitles(video_data, 'vtt'), + } + + +class AdobeTVEmbedIE(AdobeTVBaseIE): + IE_NAME = 'adobetv:embed' + _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P\d+)' + _TEST = { + 'url': 'https://tv.adobe.com/embed/22/4153', + 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', + 'info_dict': { + 'id': '4153', + 'ext': 'flv', + 'title': 'Creating Graphics Optimized for BlackBerry', + 'description': 'md5:eac6e8dced38bdaae51cd94447927459', + 'thumbnail': r're:https?://.*\.jpg$', + 'upload_date': '20091109', + 'duration': 377, + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._call_api( + 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] + return self._parse_video_data(video_data) class AdobeTVIE(AdobeTVBaseIE): + IE_NAME = 'adobetv' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)' _TEST = { @@ -42,45 +136,33 @@ class AdobeTVIE(AdobeTVBaseIE): if not language: language = 'en' - video_data = self._download_json( - self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname), - urlname)['data'][0] - - formats = [{ - 'url': source['url'], - 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - 'tbr': int_or_none(source.get('video_data_rate')), - } for source in video_data['videos']] - self._sort_formats(formats) - - return { - 'id': compat_str(video_data['id']), - 'title': video_data['title'], - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail'), - 'upload_date': unified_strdate(video_data.get('start_date')), - 'duration': parse_duration(video_data.get('duration')), - 'view_count': str_to_int(video_data.get('playcount')), - 'formats': formats, - } + video_data = self._call_api( + 'episode/get', urlname, { + 'disclosure': 'standard', + 'language': language, + 'show_urlname': show_urlname, + 'urlname': urlname, + })[0] + return self._parse_video_data(video_data) class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): - def _parse_page_data(self, page_data): - return [self.url_result(self._get_element_url(element_data)) for element_data in page_data] + _PAGE_SIZE = 25 - def _extract_playlist_entries(self, url, display_id): - page = self._download_json(url, display_id) - entries = self._parse_page_data(page['data']) - for page_num in range(2, page['paging']['pages'] + 1): - entries.extend(self._parse_page_data( - self._download_json(url + '&page=%d' % page_num, display_id)['data'])) - return entries + def _fetch_page(self, display_id, query, page): + page += 1 + query['page'] = page + for element_data in self._call_api( + self._RESOURCE, display_id, query, 'Download Page %d' % page): + yield self._process_data(element_data) + + def _extract_playlist_entries(self, display_id, query): + return OnDemandPagedList(functools.partial( + self._fetch_page, display_id, query), self._PAGE_SIZE) class AdobeTVShowIE(AdobeTVPlaylistBaseIE): + IE_NAME = 'adobetv:show' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)' _TEST = { @@ -92,26 +174,31 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE): }, 'playlist_mincount': 136, } - - def _get_element_url(self, element_data): - return element_data['urls'][0] + _RESOURCE = 'episode' + _process_data = AdobeTVBaseIE._parse_video_data def _real_extract(self, url): language, show_urlname = re.match(self._VALID_URL, url).groups() if not language: language = 'en' - query = 'language=%s&show_urlname=%s' % (language, show_urlname) + query = { + 'disclosure': 'standard', + 'language': language, + 'show_urlname': show_urlname, + } - show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0] + show_data = self._call_api( + 'show/get', show_urlname, query)[0] return self.playlist_result( - self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname), - compat_str(show_data['id']), - show_data['show_name'], - show_data['show_description']) + self._extract_playlist_entries(show_urlname, query), + str_or_none(show_data.get('id')), + show_data.get('show_name'), + show_data.get('show_description')) class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): + IE_NAME = 'adobetv:channel' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?' _TEST = { @@ -121,24 +208,30 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): }, 'playlist_mincount': 96, } + _RESOURCE = 'show' - def _get_element_url(self, element_data): - return element_data['url'] + def _process_data(self, show_data): + return self.url_result( + show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) def _real_extract(self, url): language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() if not language: language = 'en' - query = 'language=%s&channel_urlname=%s' % (language, channel_urlname) + query = { + 'channel_urlname': channel_urlname, + 'language': language, + } if category_urlname: - query += '&category_urlname=%s' % category_urlname + query['category_urlname'] = category_urlname return self.playlist_result( - self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname), + self._extract_playlist_entries(channel_urlname, query), channel_urlname) -class AdobeTVVideoIE(InfoExtractor): +class AdobeTVVideoIE(AdobeTVBaseIE): + IE_NAME = 'adobetv:video' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' _TEST = { @@ -160,38 +253,36 @@ class AdobeTVVideoIE(InfoExtractor): video_data = self._parse_json(self._search_regex( r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) + title = video_data['title'] - formats = [{ - 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), - 'url': source['src'], - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - 'tbr': int_or_none(source.get('bitrate')), - } for source in video_data['sources']] + formats = [] + sources = video_data.get('sources') or [] + for source in sources: + source_src = source.get('src') + if not source_src: + continue + formats.append({ + 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), + 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])), + 'height': int_or_none(source.get('height') or None), + 'tbr': int_or_none(source.get('bitrate') or None), + 'width': int_or_none(source.get('width') or None), + 'url': source_src, + }) self._sort_formats(formats) # For both metadata and downloaded files the duration varies among # formats. I just pick the max one duration = max(filter(None, [ float_or_none(source.get('duration'), scale=1000) - for source in video_data['sources']])) - - subtitles = {} - for translation in video_data.get('translations', []): - lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) - if lang_id not in subtitles: - subtitles[lang_id] = [] - subtitles[lang_id].append({ - 'url': translation['vttPath'], - 'ext': 'vtt', - }) + for source in sources])) return { 'id': video_id, 'formats': formats, - 'title': video_data['title'], + 'title': title, 'description': video_data.get('description'), - 'thumbnail': video_data['video'].get('poster'), + 'thumbnail': video_data.get('video', {}).get('poster'), 'duration': duration, - 'subtitles': subtitles, + 'subtitles': self._parse_subtitles(video_data, 'vttPath'), } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0e349b778..0f27c9678 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -21,6 +21,7 @@ from .acast import ( from .adn import ADNIE from .adobeconnect import AdobeConnectIE from .adobetv import ( + AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, AdobeTVChannelIE, From a15adbe461584e2e631d1be97805e81c17cfd3fe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:12:55 +0100 Subject: [PATCH 713/785] [channel9] reduce response size and update tests --- youtube_dl/extractor/channel9.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 81108e704..09cacf6d3 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -32,7 +32,7 @@ class Channel9IE(InfoExtractor): 'upload_date': '20130828', 'session_code': 'KOS002', 'session_room': 'Arena 1A', - 'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'], + 'session_speakers': 'count:5', }, }, { 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', @@ -64,15 +64,15 @@ class Channel9IE(InfoExtractor): 'params': { 'skip_download': True, }, - }, { - 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', - 'info_dict': { - 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', - 'title': 'Channel 9', - }, - 'playlist_mincount': 100, }, { 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'info_dict': { + 'id': 'Events/DEVintersection/DEVintersection-2016', + 'title': 'DEVintersection 2016 Orlando Sessions', + }, + 'playlist_mincount': 14, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', 'only_matching': True, }, { 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', @@ -112,11 +112,11 @@ class Channel9IE(InfoExtractor): episode_data), content_path) content_id = episode_data['contentId'] is_session = '/Sessions(' in episode_data['api'] - content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' if is_session: - content_url += '?$expand=Speakers' + content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' else: - content_url += '?$expand=Authors' + content_url += 'Authors,Body&$expand=Authors' content_data = self._download_json(content_url, content_id) title = content_data['Title'] @@ -210,7 +210,7 @@ class Channel9IE(InfoExtractor): 'id': content_id, 'title': title, 'description': clean_html(content_data.get('Description') or content_data.get('Body')), - 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'), + 'thumbnail': content_data.get('VideoPlayerPreviewImage'), 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), 'timestamp': parse_iso8601(content_data.get('PublishedDate')), 'avg_rating': int_or_none(content_data.get('Rating')), From 88a7a9089a0f3ccdd5e0e6f10b529652a24cbc7e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:22:54 +0100 Subject: [PATCH 714/785] [abcotvs] relax _VALID_URL regex and improve metadata extraction(closes #18014) --- youtube_dl/extractor/abcotvs.py | 79 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py index 03b92a39c..0bc69a64f 100644 --- a/youtube_dl/extractor/abcotvs.py +++ b/youtube_dl/extractor/abcotvs.py @@ -4,29 +4,30 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + dict_get, int_or_none, - parse_iso8601, + try_get, ) class ABCOTVSIE(InfoExtractor): IE_NAME = 'abcotvs' IE_DESC = 'ABC Owned Television Stations' - _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P[^/]+))?/(?P\d+)' + _VALID_URL = r'https?://(?Pabc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P[^/]+))?/(?P\d+)' _TESTS = [ { 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', 'info_dict': { - 'id': '472581', + 'id': '472548', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'ext': 'mp4', - 'title': 'East Bay museum celebrates vintage synthesizers', + 'title': 'East Bay museum celebrates synthesized music', 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3', 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1421123075, + 'timestamp': 1421118520, 'upload_date': '20150113', - 'uploader': 'Jonathan Bloom', }, 'params': { # m3u8 download @@ -37,39 +38,63 @@ class ABCOTVSIE(InfoExtractor): 'url': 'http://abc7news.com/472581', 'only_matching': True, }, + { + 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/', + 'only_matching': True, + }, ] + _SITE_MAP = { + '6abc': 'wpvi', + 'abc11': 'wtvd', + 'abc13': 'ktrk', + 'abc30': 'kfsn', + 'abc7': 'kabc', + 'abc7chicago': 'wls', + 'abc7news': 'kgo', + 'abc7ny': 'wabc', + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + site, display_id, video_id = re.match(self._VALID_URL, url).groups() + display_id = display_id or video_id + station = self._SITE_MAP[site] - webpage = self._download_webpage(url, display_id) + data = self._download_json( + 'https://api.abcotvs.com/v2/content', display_id, query={ + 'id': video_id, + 'key': 'otv.web.%s.story' % station, + 'station': station, + })['data'] + video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data + video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + title = video.get('title') or video['linkText'] - m3u8 = self._html_search_meta( - 'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0] - - formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') + formats = [] + m3u8_url = video.get('m3u8') + if m3u8_url: + formats = self._extract_m3u8_formats( + video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False) + mp4_url = video.get('mp4') + if mp4_url: + formats.append({ + 'abr': 128, + 'format_id': 'https', + 'height': 360, + 'url': mp4_url, + 'width': 640, + }) self._sort_formats(formats) - title = self._og_search_title(webpage).strip() - description = self._og_search_description(webpage).strip() - thumbnail = self._og_search_thumbnail(webpage) - timestamp = parse_iso8601(self._search_regex( - r'
    \s*