Merge branch 'master' into captcha-cheat

2025-11-07 18:34:47 +00:00 · 2017-09-30 18:06:03 -07:00 · 2017-09-30 18:06:03 -07:00 · 979201b66f
parent 18ae50b484 d21ac18059
commit 979201b66f
1 changed files with 64 additions and 27 deletions
--- a/irs/ripper.py
+++ b/irs/ripper.py
@ -133,7 +133,7 @@ class Ripper:

        return locations

-    def find_yt_url(self, song=None, artist=None, additional_search=None, caught_by_google=False):
+    def find_yt_url(self, song=None, artist=None, additional_search=None, caught_by_google=False, first=False):
        if additional_search is None:
            additional_search = Config.parse_search_terms(self)
            print(str(self.args["hook-text"].get("youtube")))
@ -160,11 +160,14 @@ init, or in method arguments.")
            soup = BeautifulSoup(CaptchaCheat.cheat_it(link), 'html.parser')

        # print(soup.prettify())
+        # with open("index.html", "w") as f:
+        #     f.write(soup.prettify().encode('utf-8'))

        def find_link(link):
            try:
                if "yt-simple-endpoint style-scope ytd-video-renderer" in str(" ".join(link["class"])) or \
-                   "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link" in str(" ".join(link["class"])):
+                   "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link " in str(" ".join(link["class"])):
+                    
                    if "&list=" not in link["href"]:
                        return link
            except KeyError:
@ -173,7 +176,7 @@ init, or in method arguments.")
        results = list(filter(None, map(find_link, soup.find_all("a"))))

        garbage_phrases = "cover  album  live  clean  rare version  full  full \
-album".split("  ")
+album  row  at  @  session".split("  ")

        self.code = None
        counter = 0
@ -181,11 +184,14 @@ album".split("  ")
        while self.code is None and counter <= 10:
            counter += 1
            for link in results:
+                if first == True:
+                    self.code = link
+                    break
+                if ObjManip.check_garbage_phrases(garbage_phrases,
+                                                  link["title"], song):
+                    continue
                if ObjManip.blank_include(link["title"], song) and \
                        ObjManip.blank_include(link["title"], artist):
-                    if ObjManip.check_garbage_phrases(garbage_phrases,
-                                                      link["title"], song):
-                        continue
                    self.code = link
                    break

@ -200,20 +206,33 @@ album".split("  ")
                        self.code = link
                        break

+            if self.code is None:
+                for link in results:
+                    if ObjManip.check_garbage_phrases(garbage_phrases,
+                                                      link["title"], song):
+                        continue
+                    if ObjManip.blank_include(link["title"], song):
+                        self.code = link
+                        break
+
            if self.code is None:
                song = ObjManip.limit_song_name(song)

-        if self.code is None:
+        if self.code is None and first is not True:
            if additional_search == "lyrics":
-                return self.find_yt_url(song, artist, "")
+                return self.find_yt_url(song, artist, additional_search, caught_by_google, first)

        try:
            return ("https://youtube.com" + self.code["href"], self.code["title"])
        except TypeError:
-            # Assuming Google catches you trying to search youtube for music ;)
-            return self.find_yt_url(song=song, artist=artist, additional_search=additional_search, caught_by_google=True)
-
-
+            if caught_by_google is not True:
+                # Assuming Google catches you trying to search youtube for music ;)
+                print("Trying to bypass google captcha.")
+                return self.find_yt_url(song=song, artist=artist, additional_search=additional_search, caught_by_google=True)
+            elif caught_by_google is True and first is not True: 
+                return self.find_yt_url(song, artist, additional_search, caught_by_google, first=True)
+            
+          
    def album(self, title, artist=None):  # Alias for spotify_list("album", ..)
        return self.spotify_list("album", title=title, artist=artist)

@ -252,20 +271,37 @@ with init, or in method arguments.")
        if len(list_of_lists) > 0:
            the_list = None
            for list_ in list_of_lists:
-                if ObjManip.blank_include(list_["name"], title):
-                    if Config.parse_artist(self):
-                        if ObjManip.blank_include(list_["artists"][0]["name"],
-                                                  Config.parse_artist(self)):
-                            the_list = self.spotify.album(list_["uri"])
-                            break
-                    else:
-                        if type == "album":
-                            the_list = self.spotify.album(list_["uri"])
+                if Config.parse_exact(self) == True:
+                    if list_["name"].encode("utf-8") == title.encode("utf-8"):
+                        if Config.parse_artist(self):
+                            if list_["artists"][0]["name"].encode("utf-8") == \
+                                    Config.parse_artist(self).encode('utf-8'):
+                                the_list = self.spotify.album(list_["uri"])
+                                break
                        else:
-                            the_list = self.spotify.user_playlist(
-                                list_["owner"]["id"], list_["uri"])
-                            the_list["artists"] = [{"name": username}]
-                        break
+                            if type == "album":
+                                the_list = self.spotify.album(list_["uri"])
+                            else:
+                                the_list = self.spotify.user_playlist(
+                                    list_["owner"]["id"], list_["uri"])
+                                the_list["artists"] = [{"name": username}]
+                            break
+
+                else:
+                    if ObjManip.blank_include(list_["name"], title):
+                        if Config.parse_artist(self):
+                            if ObjManip.blank_include(list_["artists"][0]["name"],
+                                    Config.parse_artist(self)):
+                                the_list = self.spotify.album(list_["uri"])
+                                break
+                        else:
+                            if type == "album":
+                                the_list = self.spotify.album(list_["uri"])
+                            else:
+                                the_list = self.spotify.user_playlist(
+                                    list_["owner"]["id"], list_["uri"])
+                                the_list["artists"] = [{"name": username}]
+                            break
            if the_list is not None:
                YdlUtils.clear_line()

@ -397,7 +433,6 @@ init, or in method arguments.")
            print(self.args["hook-text"].get("song").format(song, artist))

        file_name = data["file_prefix"] + ObjManip.blank(song, False) + ".mp3"
-
        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{
@ -409,9 +444,11 @@ init, or in method arguments.")
            'progress_hooks': [YdlUtils.my_hook],
            'output': "tmp_file",
            'prefer-ffmpeg': True,
-            'ffmpeg_location': os.path.expanduser("~/.irs/bin/")
        }

+        if Config.check_ffmpeg() is False:
+            ydl_opts.update({'ffmpeg_location': os.path.expanduser("~/.irs/bin/")})
+
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_url])