[youtube] Skip WebVTT in DASH manifest (#5297)

This commit is contained in:
Yen Chi Hsuan 2015-04-08 03:45:02 +08:00
parent a35099bd33
commit de5c545648

View file

@ -788,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
errnote='Could not download DASH manifest') errnote='Could not download DASH manifest')
formats = [] formats = []
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') mime_type = a.attrib.get('mimeType')
if url_el is None: for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
continue url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
format_id = r.attrib['id'] if url_el is None:
video_url = url_el.text continue
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) if mime_type == 'text/vtt':
f = { # TODO implement WebVTT downloading
'format_id': format_id, pass
'url': video_url, elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
'width': int_or_none(r.attrib.get('width')), format_id = r.attrib['id']
'height': int_or_none(r.attrib.get('height')), video_url = url_el.text
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
'asr': int_or_none(r.attrib.get('audioSamplingRate')), f = {
'filesize': filesize, 'format_id': format_id,
'fps': int_or_none(r.attrib.get('frameRate')), 'url': video_url,
} 'width': int_or_none(r.attrib.get('width')),
try: 'height': int_or_none(r.attrib.get('height')),
existing_format = next( 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
fo for fo in formats 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
if fo['format_id'] == format_id) 'filesize': filesize,
except StopIteration: 'fps': int_or_none(r.attrib.get('frameRate')),
full_info = self._formats.get(format_id, {}).copy() }
full_info.update(f) try:
formats.append(full_info) existing_format = next(
else: fo for fo in formats
existing_format.update(f) if fo['format_id'] == format_id)
except StopIteration:
full_info = self._formats.get(format_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
existing_format.update(f)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats return formats
def _real_extract(self, url): def _real_extract(self, url):