[UNO] Add extractor for United Nations

This commit is contained in:
dirkf 2022-02-07 20:13:59 +00:00
parent 47b0c8697a
commit 09476ecdde
2 changed files with 51 additions and 0 deletions

View file

@ -1388,6 +1388,7 @@ from .dlive import (
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
from .uno import UNOIE
from .uol import UOLIE from .uol import UOLIE
from .uplynk import ( from .uplynk import (
UplynkIE, UplynkIE,

View file

@ -0,0 +1,50 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_timestamp,
url_or_none,
)
class UNOIE(InfoExtractor):
_VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?P<id>k\d[\w]+)'
_TESTS = [{
'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk',
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
'info_dict': {
'id': '1_r3vy9ikk',
'ext': 'mp4',
'title': 'md5:abde2a46d396051535e5e6fd6f627a19',
'description': 'md5:2cba11ee153ae3e6ae2c629e7c4e39b0',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 5768,
'timestamp': 1625216872,
'upload_date': '20210702',
'uploader_id': 'UNWebTV_New_York',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id[1:2] + '_' + video_id[2:]
webpage = self._download_webpage(url, video_id)
title = (
self._html_search_meta(('title', 'og:title'), webpage)
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title').rsplit('|', 1)[0]).strip()
partner_id = self._search_regex(r'partnerId\s*:\s*(\d+)\b', webpage, 'Partner ID')
result = self.url_result(
'kaltura:%s:%s' % (partner_id, video_id), 'Kaltura',
video_title=title,
video_id=video_id)
if result:
result.update({
'_type': 'url_transparent',
'description': self._html_search_meta(('description', 'og:description'), webpage, 'description'),
'creator': self._html_search_meta('author', webpage),
'upoader_id': self._html_search_meta('publisher', webpage),
'thumbnail': url_or_none(self._og_search_thumbnail(webpage)),
'timestamp': unified_timestamp(self._og_search_property('updated_time', webpage)),
})
return result