[channel9] Add low quality formats and modernize

This commit is contained in:
Sergey M․ 2015-10-15 01:52:25 +06:00
parent cb8961eeed
commit 1db82381e3

View file

@ -3,7 +3,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
parse_filesize,
qualities,
)
class Channel9IE(InfoExtractor): class Channel9IE(InfoExtractor):
@ -52,23 +56,6 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS' _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
# Sorted by quality
_known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
def _restore_bytes(self, formatted_size):
if not formatted_size:
return 0
m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
if not m:
return 0
units = m.group('units')
try:
exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
except ValueError:
return 0
size = float(m.group('size'))
return int(size * (1024 ** exponent))
def _formats_from_html(self, html): def _formats_from_html(self, html):
FORMAT_REGEX = r''' FORMAT_REGEX = r'''
(?x) (?x)
@ -78,16 +65,20 @@ class Channel9IE(InfoExtractor):
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s* <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
</div>)? # File size part may be missing </div>)? # File size part may be missing
''' '''
# Extract known formats quality = qualities((
'MP3', 'MP4',
'Low Quality WMV', 'Low Quality MP4',
'Mid Quality WMV', 'Mid Quality MP4',
'High Quality WMV', 'High Quality MP4'))
formats = [{ formats = [{
'url': x.group('url'), 'url': x.group('url'),
'format_id': x.group('quality'), 'format_id': x.group('quality'),
'format_note': x.group('note'), 'format_note': x.group('note'),
'format': '%s (%s)' % (x.group('quality'), x.group('note')), 'format': '%s (%s)' % (x.group('quality'), x.group('note')),
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate 'filesize_approx': parse_filesize(x.group('filesize')),
'preference': self._known_formats.index(x.group('quality')), 'quality': quality(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None, 'vcodec': 'none' if x.group('note') == 'Audio only' else None,
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] } for x in list(re.finditer(FORMAT_REGEX, html))]
self._sort_formats(formats) self._sort_formats(formats)