[webofstories] Tolerate malforder og:title (Closes #8417)

This commit is contained in:
Sergey M․ 2016-02-28 03:37:48 +06:00
parent 7a0e7779fe
commit 8870bb4653

View file

@ -12,38 +12,52 @@ class WebOfStoriesIE(InfoExtractor):
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
_TESTS = [ _TESTS = [{
{ 'url': 'http://www.webofstories.com/play/hans.bethe/71',
'url': 'http://www.webofstories.com/play/hans.bethe/71', 'md5': '373e4dd915f60cfe3116322642ddf364',
'md5': '373e4dd915f60cfe3116322642ddf364', 'info_dict': {
'info_dict': { 'id': '4536',
'id': '4536', 'ext': 'mp4',
'ext': 'mp4', 'title': 'The temperature of the sun',
'title': 'The temperature of the sun', 'thumbnail': 're:^https?://.*\.jpg$',
'thumbnail': 're:^https?://.*\.jpg$', 'description': 'Hans Bethe talks about calculating the temperature of the sun',
'description': 'Hans Bethe talks about calculating the temperature of the sun', 'duration': 238,
'duration': 238, }
} }, {
'url': 'http://www.webofstories.com/play/55908',
'md5': '2985a698e1fe3211022422c4b5ed962c',
'info_dict': {
'id': '55908',
'ext': 'mp4',
'title': 'The story of Gemmata obscuriglobus',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169,
}, },
{ 'skip': 'notfound',
'url': 'http://www.webofstories.com/play/55908', }, {
'md5': '2985a698e1fe3211022422c4b5ed962c', # malformed og:title meta
'info_dict': { 'url': 'http://www.webofstories.com/play/54215?o=MS',
'id': '55908', 'info_dict': {
'ext': 'mp4', 'id': '54215',
'title': 'The story of Gemmata obscuriglobus', 'ext': 'mp4',
'thumbnail': 're:^https?://.*\.jpg$', 'title': '"A Leg to Stand On"',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', 'thumbnail': 're:^https?://.*\.jpg$',
'duration': 169, 'description': 'Oliver Sacks talks about the death and resurrection of a limb',
} 'duration': 97,
}, },
] 'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage) # Sometimes og:title meta is malformed
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)