From e4a3df6b42898a4c462ffef3655b0d8bea75223e Mon Sep 17 00:00:00 2001 From: Aditya Kumar Mishra Date: Wed, 15 Apr 2026 17:44:12 +0530 Subject: [PATCH] core: Show [extractor] and video ID in ERROR messages (Fixes #33254) --- test/test_YoutubeDL.py | 106 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 5 +- youtube_dl/extractor/youtube.py | 2 +- youtube_dl/utils.py | 33 +++++++--- 4 files changed, 135 insertions(+), 11 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index d994682b2..66e7b407a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1187,5 +1187,111 @@ class TestYoutubeDLCookies(unittest.TestCase): self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain') +class TestExtractorErrorFormatting(unittest.TestCase): + """Tests for Issue #33254: [extractor] and video ID in ERROR messages.""" + + def test_no_prefix_when_bare(self): + """Without ie/video_id the message is unchanged.""" + e = ExtractorError('test error', expected=True) + self.assertEqual(e.orig_msg, 'test error') + self.assertIn('test error', str(e)) + # No bracket prefix before the message + self.assertFalse(str(e).startswith('[')) + + def test_video_id_only_prefix(self): + """Providing video_id prefixes with 'id: msg'.""" + e = ExtractorError('test error', expected=True, video_id='abc123') + self.assertTrue(str(e).startswith('abc123: test error'), str(e)) + self.assertEqual(e.orig_msg, 'test error') + self.assertEqual(e.video_id, 'abc123') + + def test_ie_only_prefix(self): + """Providing ie prefixes with '[extractor] msg'.""" + e = ExtractorError('test error', expected=True, ie='youtube') + self.assertTrue(str(e).startswith('[youtube] test error'), str(e)) + self.assertEqual(e.orig_msg, 'test error') + self.assertEqual(e.ie, 'youtube') + + def test_full_prefix(self): + """Both ie and video_id produce '[extractor] id: msg'.""" + e = ExtractorError( + 'Premieres in 7 hours', + expected=True, + video_id='-Nb_os-NV5o', + ie='youtube', + ) + self.assertEqual(str(e), '[youtube] -Nb_os-NV5o: Premieres in 7 hours') + self.assertEqual(e.orig_msg, 'Premieres in 7 hours') + self.assertEqual(e.video_id, '-Nb_os-NV5o') + self.assertEqual(e.ie, 'youtube') + + def test_setattr_recomputes_msg(self): + """Assigning ie/video_id post-construction recomputes str(e).""" + e = ExtractorError('test error', expected=True) + self.assertNotIn('[youtube]', str(e)) + + e.ie = 'youtube' + self.assertIn('[youtube]', str(e)) + + e.video_id = 'abc123' + self.assertIn('abc123', str(e)) + self.assertIn('[youtube]', str(e)) + # orig_msg is always preserved + self.assertEqual(e.orig_msg, 'test error') + + def test_generic_handler_backfills_ie_and_video_id(self): + """InfoExtractor.extract() injects ie/video_id on bare ExtractorError raises.""" + from youtube_dl.extractor.common import InfoExtractor + + class BareErrorIE(InfoExtractor): + IE_NAME = 'bare_test' + _VALID_URL = r'http://bare\.test/(?P[^/]+)' + + def _real_extract(self, url): + raise ExtractorError('bare error', expected=True) + + ydl = FakeYDL() + ie = BareErrorIE(ydl) + try: + ie.extract('http://bare.test/myvideo') + self.fail('ExtractorError not raised') + except ExtractorError as e: + self.assertEqual(e.ie, 'bare_test') + # Falls back to sentinel since no video_id was set + self.assertEqual(e.video_id, '**???**') + self.assertIn('[bare_test]', str(e)) + + def test_raise_no_formats_injects_context(self): + """YoutubeDL.raise_no_formats passes extractor and video_id to ExtractorError.""" + info = { + 'id': 'testid', + 'title': 'Test', + 'extractor': 'testex', + 'formats': [], + } + ydl = YDL() + try: + ydl.raise_no_formats(info, msg='requested format not available', expected=True) + self.fail('ExtractorError not raised') + except ExtractorError as e: + self.assertEqual(e.orig_msg, 'requested format not available') + self.assertEqual(e.video_id, 'testid') + self.assertEqual(e.ie, 'testex') + self.assertIn('[testex]', str(e)) + self.assertIn('testid', str(e)) + + def test_raise_no_formats_missing_id_no_keyerror(self): + """raise_no_formats does not raise KeyError when 'id' is absent.""" + info = {'extractor': 'testex'} # deliberately missing 'id' + ydl = YDL() + try: + ydl.raise_no_formats(info, msg='Missing \"id\" field in extractor result', forced=True) + self.fail('ExtractorError not raised') + except ExtractorError as e: + self.assertEqual(e.orig_msg, 'Missing "id" field in extractor result') + # video_id is None, so it is omitted from the formatted string + self.assertIsNone(e.video_id) + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a0901dab5..4a86d09db 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -579,7 +579,10 @@ class InfoExtractor(object): if self.__maybe_fake_ip_and_retry(e.countries): continue raise - except ExtractorError: + except ExtractorError as e: + e.video_id = getattr(e, 'video_id', None) or '**???**' + e.ie = getattr(e, 'ie', None) or self.IE_NAME + e.traceback = e.traceback or sys.exc_info()[2] raise except compat_http_client.IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 81a019143..1248adc65 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2804,7 +2804,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.raise_login_required(remove_end(reason, 'This helps protect our community. Learn more')) elif traverse_obj(playability_status, ('errorScreen', 'playerCaptchaViewModel', T(dict))): reason += '. YouTube is requiring a captcha challenge before playback' - raise ExtractorError(reason, expected=True) + raise ExtractorError(reason, expected=True, video_id=video_id) self._sort_formats(formats) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 02a49ff49..5ef741ad9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2416,37 +2416,52 @@ def bug_reports_message(before=';'): class YoutubeDLError(Exception): """Base exception for YoutubeDL errors.""" - pass + msg = None + + def __init__(self, msg=None): + self.msg = type(self).__name__ if msg is None else msg + super(YoutubeDLError, self).__init__(self.msg) class ExtractorError(YoutubeDLError): """Error during info extraction.""" - def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): + def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None): """ tb, if given, is the original traceback (so that it can be printed out). If expected is set, this is a normal error message and most likely not a bug in youtube-dl. """ self.orig_msg = msg if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): expected = True - if video_id is not None: - msg = video_id + ': ' + msg - if cause: - msg += ' (caused by %r)' % cause - if not expected: - msg += bug_reports_message() - super(ExtractorError, self).__init__(msg) self.traceback = tb self.exc_info = sys.exc_info() # preserve original exception self.cause = cause + self.ie = ie self.video_id = video_id + self.expected = expected + super(ExtractorError, self).__init__(self.__msg()) + + def __msg(self): + msg = '[%s] ' % (self.ie,) if self.ie else '' + msg += join_nonempty(self.video_id, self.orig_msg, delim=': ') + if self.cause: + msg += ' (caused by %r)' % self.cause + if not self.expected: + msg += bug_reports_message() + return msg def format_traceback(self): if self.traceback is None: return None return ''.join(traceback.format_tb(self.traceback)) + def __setattr__(self, name, value): + super(ExtractorError, self).__setattr__(name, value) + if getattr(self, 'msg', None) and name not in ('msg', 'args'): + self.msg = self.__msg() or type(self).__name__ + self.args = (self.msg, ) # Cannot be property + class UnsupportedError(ExtractorError): def __init__(self, url):