mirror of
https://github.com/cooperhammond/irs.git
synced 2025-01-17 21:17:07 +00:00
Restarted utils file. Rewriting manager.py -> ripper.py with efficiency additions
This commit is contained in:
parent
abb60534d9
commit
846b2b9cb2
84
irs/ripper.py
Normal file
84
irs/ripper.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
# Powered by:
|
||||||
|
import youtube_dl
|
||||||
|
import spotipy
|
||||||
|
from spotipy.oauth2 import SpotifyClientCredentials
|
||||||
|
|
||||||
|
# System
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Parsing
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
from urllib.request import urlopen
|
||||||
|
elif sys.version_info[0] < 3:
|
||||||
|
from urllib import urlencode
|
||||||
|
from urllib import urlopen
|
||||||
|
else:
|
||||||
|
print ("Must be using Python 2 or 3")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Local utilities
|
||||||
|
import utils
|
||||||
|
|
||||||
|
class Ripper:
|
||||||
|
def __init__(self, args={}):
|
||||||
|
self.args = args
|
||||||
|
self.locations = []
|
||||||
|
try:
|
||||||
|
client_credentials_manager = SpotifyClientCredentials(os.environ["SPOTIFY_CLIENT_ID"], os.environ["SPOTIFY_CLIENT_SECRET"])
|
||||||
|
spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
|
||||||
|
self.authorized = True
|
||||||
|
except spotipy.oauth2.SpotifyOauthError:
|
||||||
|
spotify = spotipy.Spotify()
|
||||||
|
self.authorized = False
|
||||||
|
|
||||||
|
def find_yt_url(self, song=None, artist=None, additional_search="lyrics"):
|
||||||
|
if not song:
|
||||||
|
song = self.args["song"]
|
||||||
|
|
||||||
|
if not artist:
|
||||||
|
artist = self.args["artist"]
|
||||||
|
|
||||||
|
search_terms = song + " " + artist + " " + additional_search
|
||||||
|
query_string = urlencode({"search_query" : (search_terms)})
|
||||||
|
link = "http://www.youtube.com/results?" + query_string
|
||||||
|
|
||||||
|
html_content = urlopen(link).read()
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')#.prettify
|
||||||
|
|
||||||
|
def find_link(link):
|
||||||
|
try:
|
||||||
|
if "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link" in str(" ".join(link["class"])):
|
||||||
|
if not "&list=" in link["href"]:
|
||||||
|
return link
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
results = list(filter(None, (map(find_link, soup.find_all("a")))))
|
||||||
|
|
||||||
|
garbage_phrases = "cover album live clean rare version".split(" ")
|
||||||
|
|
||||||
|
self.code = None
|
||||||
|
for link in results:
|
||||||
|
if utils.blank_include(link["title"], song) and utils.blank_include(link["title"], artist):
|
||||||
|
if utils.check_garbage_phrases: continue
|
||||||
|
self.code = link
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.code == None:
|
||||||
|
for link in results:
|
||||||
|
if utils.check_garbage_phrases: continue
|
||||||
|
if utils.individual_word_match(song, link["title"]) >= 0.8 and utils.blank_include(link["title"], artist):
|
||||||
|
self.code = link
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.code == None:
|
||||||
|
if additional_search == "lyrics":
|
||||||
|
return self.find_yt_url(song, artist, "")
|
||||||
|
else:
|
||||||
|
self.code = results[0]
|
||||||
|
|
||||||
|
return ("https://youtube.com" + self.code["href"], self.code["title"])
|
||||||
|
|
30
irs/utils.py
Normal file
30
irs/utils.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
def check_garbage_phrases(phrases, string, title):
|
||||||
|
for phrase in phrases:
|
||||||
|
if phrase in blank(string):
|
||||||
|
if not phrase in blank(title):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def blank(string, downcase=True):
|
||||||
|
import re
|
||||||
|
regex = re.compile('[^a-zA-Z0-9\ ]')
|
||||||
|
string = regex.sub('', string)
|
||||||
|
if downcase: string = string.lower()
|
||||||
|
return string
|
||||||
|
|
||||||
|
def blank_include(this, includes_this):
|
||||||
|
this = blank(this)
|
||||||
|
includes_this = blank(includes_this)
|
||||||
|
if includes_this in this:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def individual_word_match(match_against, match):
|
||||||
|
match_against = blank(match_against).split(" ")
|
||||||
|
match = blank(match).split(" ")
|
||||||
|
matched = []
|
||||||
|
for match_ag in match_against:
|
||||||
|
for word in match:
|
||||||
|
if match_ag == word:
|
||||||
|
matched.append(word)
|
||||||
|
return (float(matched.uniq.size) / float(match_against.size))
|
Loading…
Reference in a new issue