Redesigned link extraction process

2024-07-29 23:55:50 +00:00 · 2017-02-05 21:13:51 +01:00 · 2017-02-05 21:13:51 +01:00 · 63cf1124fa
parent ee32d5ae0f
commit 63cf1124fa
1 changed files with 10 additions and 5 deletions
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@ -19,6 +19,7 @@ from ..utils import (
    ExtractorError,
    ohdave_rsa_encrypt,
    remove_start,
    extract_attributes,
 )
@ -301,10 +302,14 @@ class IqiyiIE(InfoExtractor):
    def _extract_playlist(self, webpage):
        PAGE_SIZE = 50
-        links = re.findall(
+        links = []
-            r'<a[^>]+href="(http://www\.iqiyi\.com/.+\.html)"[^>]+class="site-piclist_pic_link".*>',
+        for link in re.findall(r'<a[^>]+class="[^"]*site-piclist_pic_link[^"]*"[^>]*>', webpage):
-            webpage)
+            attribs = extract_attributes(link)
-        if not links:
+            # It must be a valid url, and links on the playlist page have NO title-Attribute in them
            # (links to other videos on the video page have, so beware of that!)
            if attribs['href'].startswith('http') and 'title' not in attribs:
                links.append(attribs['href'])
        if len(links) == 0:
            return
        album_id = self._search_regex(
@ -331,7 +336,7 @@ class IqiyiIE(InfoExtractor):
                    break
            else:
                break
-
+        
        return self.playlist_result(entries, album_id, album_title)
    def _real_extract(self, url):