C-Span links support improvements

This commit is contained in:
Grabien 2021-09-27 22:22:39 +03:00
parent a803582717
commit f8f3b6f2bf
2 changed files with 40 additions and 4 deletions

View file

@ -1,11 +1,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor, compat_str, compat_urllib_parse_unquote
from .senateisvp import SenateISVPIE
from .ustream import UstreamIE
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
determine_ext,
extract_attributes, extract_attributes,
find_xpath_attr, find_xpath_attr,
get_element_by_attribute, get_element_by_attribute,
@ -18,11 +21,10 @@ from ..utils import (
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
) )
from .senateisvp import SenateISVPIE
from .ustream import UstreamIE
class CSpanIE(InfoExtractor): class CSpanIE(InfoExtractor):
_BASIC_URL = r'https?://(?:www\.)?c-span\.org/'
_VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
IE_DESC = 'C-SPAN' IE_DESC = 'C-SPAN'
_TESTS = [{ _TESTS = [{
@ -81,6 +83,29 @@ class CSpanIE(InfoExtractor):
}] }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
@classmethod
def is_basic_url(cls, url):
if '_BASIC_URL_RE' not in cls.__dict__:
cls._BASIC_URL_RE = re.compile(cls._BASIC_URL)
return cls._BASIC_URL_RE.match(url) is not None
@classmethod
def get_basic_url(cls, url):
return "/".join(url.rstrip('/').split('/')[:3])
@classmethod
def _get_id(cls, url):
if '_ID_URL_RE' not in cls.__dict__:
cls._ID_URL_RE = re.compile(cls._ID_URL)
if cls._ID_URL_RE.match(url) is not None:
m = cls._ID_URL_RE.match(url)
assert m
return compat_str(m.group('id'))
else:
return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_type = None video_type = None

View file

@ -48,6 +48,7 @@ from .nexx import (
NexxIE, NexxIE,
NexxEmbedIE, NexxEmbedIE,
) )
from .cspan import CSpanIE
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
@ -3551,6 +3552,16 @@ class GenericIE(InfoExtractor):
if entry_info_dict.get('formats'): if entry_info_dict.get('formats'):
self._sort_formats(entry_info_dict['formats']) self._sort_formats(entry_info_dict['formats'])
if CSpanIE.is_basic_url(url):
basic_url = CSpanIE.get_basic_url(url)
for f in entry_info_dict['formats']:
f.setdefault('http_headers', {})['referer'] = basic_url + "/"
f.setdefault('http_headers', {})['origin'] = basic_url
f.setdefault('http_headers', {})['accept'] = "*/*"
f.setdefault('http_headers', {})['sec-fetch-dest'] = "empty"
f.setdefault('http_headers', {})['sec-fetch-mode'] = "cors"
f.setdefault('http_headers', {})['sec-fetch-site'] = "cross-site"
entries.append(entry_info_dict) entries.append(entry_info_dict)
if len(entries) == 1: if len(entries) == 1: