mirror of
https://github.com/cooperhammond/irs.git
synced 2025-07-03 15:18:14 +00:00
Fixed to be compatible for 2 types of youtubes HTML/CSS code
This commit is contained in:
parent
13c71da9c4
commit
a4915cf85d
|
@ -18,7 +18,7 @@ from spotipy.oauth2 import SpotifyClientCredentials
|
||||||
|
|
||||||
|
|
||||||
# Local utilities
|
# Local utilities
|
||||||
from .utils import YdlUtils, ObjManip, Config
|
from .utils import YdlUtils, ObjManip, Config, CaptchaCheat
|
||||||
from .metadata import Metadata
|
from .metadata import Metadata
|
||||||
from .metadata import find_album_and_track, parse_genre
|
from .metadata import find_album_and_track, parse_genre
|
||||||
|
|
||||||
|
@ -58,6 +58,9 @@ class Ripper:
|
||||||
client_credentials_manager = SpotifyClientCredentials(CLIENT_ID,
|
client_credentials_manager = SpotifyClientCredentials(CLIENT_ID,
|
||||||
CLIENT_SECRET
|
CLIENT_SECRET
|
||||||
# Stupid lint
|
# Stupid lint
|
||||||
|
# and stupid
|
||||||
|
# long var
|
||||||
|
# names
|
||||||
)
|
)
|
||||||
|
|
||||||
self.spotify = spotipy.Spotify(
|
self.spotify = spotipy.Spotify(
|
||||||
|
@ -130,7 +133,7 @@ class Ripper:
|
||||||
|
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
def find_yt_url(self, song=None, artist=None, additional_search=None):
|
def find_yt_url(self, song=None, artist=None, additional_search=None, caught_by_google=False):
|
||||||
if additional_search is None:
|
if additional_search is None:
|
||||||
additional_search = Config.parse_search_terms(self)
|
additional_search = Config.parse_search_terms(self)
|
||||||
print(str(self.args["hook-text"].get("youtube")))
|
print(str(self.args["hook-text"].get("youtube")))
|
||||||
|
@ -150,19 +153,24 @@ init, or in method arguments.")
|
||||||
search_terms.encode('utf-8'))})
|
search_terms.encode('utf-8'))})
|
||||||
link = "http://www.youtube.com/results?" + query_string
|
link = "http://www.youtube.com/results?" + query_string
|
||||||
|
|
||||||
html_content = urlopen(link).read()
|
if not caught_by_google:
|
||||||
soup = BeautifulSoup(html_content, 'html.parser') # .prettify()
|
html_content = urlopen(link).read()
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
else:
|
||||||
|
soup = BeautifulSoup(CaptchaCheat.cheat_it(link), 'html.parser')
|
||||||
|
|
||||||
|
# print(soup.prettify())
|
||||||
|
|
||||||
def find_link(link):
|
def find_link(link):
|
||||||
try:
|
try:
|
||||||
if "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-\
|
if "yt-simple-endpoint style-scope ytd-video-renderer" in str(" ".join(link["class"])) or \
|
||||||
sessionlink spf-link" in str(" ".join(link["class"])):
|
"yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link" in str(" ".join(link["class"])):
|
||||||
if "&list=" not in link["href"]:
|
if "&list=" not in link["href"]:
|
||||||
return link
|
return link
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
results = list(filter(None, (map(find_link, soup.find_all("a")))))
|
results = list(filter(None, map(find_link, soup.find_all("a"))))
|
||||||
|
|
||||||
garbage_phrases = "cover album live clean rare version full full \
|
garbage_phrases = "cover album live clean rare version full full \
|
||||||
album".split(" ")
|
album".split(" ")
|
||||||
|
@ -198,10 +206,13 @@ album".split(" ")
|
||||||
if self.code is None:
|
if self.code is None:
|
||||||
if additional_search == "lyrics":
|
if additional_search == "lyrics":
|
||||||
return self.find_yt_url(song, artist, "")
|
return self.find_yt_url(song, artist, "")
|
||||||
else:
|
|
||||||
self.code = results[0]
|
|
||||||
|
|
||||||
return ("https://youtube.com" + self.code["href"], self.code["title"])
|
try:
|
||||||
|
return ("https://youtube.com" + self.code["href"], self.code["title"])
|
||||||
|
except TypeError:
|
||||||
|
# Assuming Google catches you trying to search youtube for music ;)
|
||||||
|
return self.find_yt_url(song=song, artist=artist, additional_search=additional_search, caught_by_google=True)
|
||||||
|
|
||||||
|
|
||||||
def album(self, title, artist=None): # Alias for spotify_list("album", ..)
|
def album(self, title, artist=None): # Alias for spotify_list("album", ..)
|
||||||
return self.spotify_list("album", title=title, artist=artist)
|
return self.spotify_list("album", title=title, artist=artist)
|
||||||
|
|
27
irs/utils.py
27
irs/utils.py
|
@ -442,3 +442,30 @@ class Config:
|
||||||
where="post_processors")
|
where="post_processors")
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
#==============
|
||||||
|
# Captcha Cheat
|
||||||
|
#==============
|
||||||
|
# I basically consider myself a genius for this snippet.
|
||||||
|
|
||||||
|
from splinter import Browser
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethods
|
||||||
|
class CaptchaCheat:
|
||||||
|
def cheat_it(url, t=1):
|
||||||
|
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
|
||||||
|
with Browser('chrome', **executable_path) as b:
|
||||||
|
b.visit(url)
|
||||||
|
sleep(t)
|
||||||
|
while CaptchaCheat.strip_it(b.evaluate_script("document.URL")) != CaptchaCheat.strip_it(url):
|
||||||
|
sleep(t)
|
||||||
|
return b.evaluate_script("document.getElementsByTagName('html')[0].innerHTML")
|
||||||
|
|
||||||
|
def strip_it(s):
|
||||||
|
s = s.encode("utf-8")
|
||||||
|
s = s.strip("http://")
|
||||||
|
s = s.strip("https://")
|
||||||
|
return s
|
||||||
|
|
Loading…
Reference in a new issue