[subtitles] Use self._download_webpage for extracting the subtitles

It raises ExtractorError for the same exceptions we have to catch.
This commit is contained in:
Jaime Marquínez Ferrándiz 2013-09-11 16:24:47 +02:00
parent d82134c339
commit 7fad1c6328
3 changed files with 11 additions and 18 deletions

View file

@ -1,14 +1,11 @@
import re import re
import json import json
import itertools import itertools
import socket
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request, compat_urllib_request,
compat_str, compat_str,
get_element_by_attribute, get_element_by_attribute,
@ -98,10 +95,11 @@ class DailymotionIE(SubtitlesInfoExtractor):
}] }]
def _get_available_subtitles(self, video_id): def _get_available_subtitles(self, video_id):
request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
try: try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') sub_list = self._download_webpage(
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {} return {}
info = json.loads(sub_list) info = json.loads(sub_list)

View file

@ -1,12 +1,8 @@
import socket
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str, compat_str,
ExtractorError,
) )
@ -52,8 +48,8 @@ class SubtitlesInfoExtractor(InfoExtractor):
def _request_subtitle_url(self, sub_lang, url): def _request_subtitle_url(self, sub_lang, url):
""" makes the http request for the subtitle """ """ makes the http request for the subtitle """
try: try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8') sub = self._download_webpage(url, None, note=False)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return return
if not sub: if not sub:
@ -88,5 +84,3 @@ class SubtitlesInfoExtractor(InfoExtractor):
elif self._downloader.params.get('writeautomaticsub', False): elif self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage) video_subtitles = self._request_automatic_caption(video_id, video_webpage)
return video_subtitles return video_subtitles

View file

@ -454,10 +454,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return self._decrypt_signature(s) return self._decrypt_signature(s)
def _get_available_subtitles(self, video_id): def _get_available_subtitles(self, video_id):
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
try: try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') sub_list = self._download_webpage(
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {} return {}
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)