diff --git a/irs/ripper.py b/irs/ripper.py index 8dd73fa..54c86da 100644 --- a/irs/ripper.py +++ b/irs/ripper.py @@ -18,7 +18,7 @@ from spotipy.oauth2 import SpotifyClientCredentials # Local utilities -from .utils import YdlUtils, ObjManip, Config +from .utils import YdlUtils, ObjManip, Config, CaptchaCheat from .metadata import Metadata from .metadata import find_album_and_track, parse_genre @@ -58,6 +58,9 @@ class Ripper: client_credentials_manager = SpotifyClientCredentials(CLIENT_ID, CLIENT_SECRET # Stupid lint + # and stupid + # long var + # names ) self.spotify = spotipy.Spotify( @@ -130,7 +133,7 @@ class Ripper: return locations - def find_yt_url(self, song=None, artist=None, additional_search=None): + def find_yt_url(self, song=None, artist=None, additional_search=None, caught_by_google=False): if additional_search is None: additional_search = Config.parse_search_terms(self) print(str(self.args["hook-text"].get("youtube"))) @@ -150,19 +153,24 @@ init, or in method arguments.") search_terms.encode('utf-8'))}) link = "http://www.youtube.com/results?" + query_string - html_content = urlopen(link).read() - soup = BeautifulSoup(html_content, 'html.parser') # .prettify() + if not caught_by_google: + html_content = urlopen(link).read() + soup = BeautifulSoup(html_content, 'html.parser') + else: + soup = BeautifulSoup(CaptchaCheat.cheat_it(link), 'html.parser') + + # print(soup.prettify()) def find_link(link): try: - if "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-\ -sessionlink spf-link" in str(" ".join(link["class"])): + if "yt-simple-endpoint style-scope ytd-video-renderer" in str(" ".join(link["class"])) or \ + "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link" in str(" ".join(link["class"])): if "&list=" not in link["href"]: return link except KeyError: pass - results = list(filter(None, (map(find_link, soup.find_all("a"))))) + results = list(filter(None, map(find_link, soup.find_all("a")))) garbage_phrases = "cover album live clean rare version full full \ album".split(" ") @@ -198,10 +206,13 @@ album".split(" ") if self.code is None: if additional_search == "lyrics": return self.find_yt_url(song, artist, "") - else: - self.code = results[0] - return ("https://youtube.com" + self.code["href"], self.code["title"]) + try: + return ("https://youtube.com" + self.code["href"], self.code["title"]) + except TypeError: + # Assuming Google catches you trying to search youtube for music ;) + return self.find_yt_url(song=song, artist=artist, additional_search=additional_search, caught_by_google=True) + def album(self, title, artist=None): # Alias for spotify_list("album", ..) return self.spotify_list("album", title=title, artist=artist) diff --git a/irs/utils.py b/irs/utils.py index ff13995..caef49f 100644 --- a/irs/utils.py +++ b/irs/utils.py @@ -442,3 +442,30 @@ class Config: where="post_processors") else: return True + + +#============== +# Captcha Cheat +#============== +# I basically consider myself a genius for this snippet. + +from splinter import Browser +from time import sleep + + +@staticmethods +class CaptchaCheat: + def cheat_it(url, t=1): + executable_path = {'executable_path': '/usr/local/bin/chromedriver'} + with Browser('chrome', **executable_path) as b: + b.visit(url) + sleep(t) + while CaptchaCheat.strip_it(b.evaluate_script("document.URL")) != CaptchaCheat.strip_it(url): + sleep(t) + return b.evaluate_script("document.getElementsByTagName('html')[0].innerHTML") + + def strip_it(s): + s = s.encode("utf-8") + s = s.strip("http://") + s = s.strip("https://") + return s