diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b5a509 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Compiled python modules. +*.pyc + +# Setuptools distribution folder. +/dist/ + +# Python egg metadata, regenerated from source files by setuptools. +/*.egg-info/ + +/build/ diff --git a/irs/__init__.py b/irs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/irs/__main__.py b/irs/__main__.py new file mode 100644 index 0000000..e98c8c8 --- /dev/null +++ b/irs/__main__.py @@ -0,0 +1,30 @@ +#!/usr/bin python +import argparse +from .manage import * + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-a', '--artist', dest="artist", help="Specify the artist name") + + media = parser.add_mutually_exclusive_group() + media.add_argument('-A', '--album', dest="album", help="Specify album name of the artist") + media.add_argument('-s', '--song', dest="song", help="Specify song name of the artist") + + args = parser.parse_args() + + if args.artist and not (args.album or args.song): + print ("usage: __init__.py [-h] [-a ARTIST] [-A ALBUM | -s SONG] \n\ + error: must specify -A/--album or -s/--song if specifying -a/--artist") + sys.exit(1) + + elif not args.artist: + console + + elif args.artist: + if args.album: + rip_album(args.album, args.artist) + elif args.song: + rip_mp3(args.song, args.artist) + +if __name__ == "__main__": + main() diff --git a/irs/manage.py b/irs/manage.py new file mode 100644 index 0000000..f84267c --- /dev/null +++ b/irs/manage.py @@ -0,0 +1,95 @@ +import urllib.request, urllib.parse, re, sys, os, requests +import youtube_dl +from bs4 import BeautifulSoup +from .utils import * +from .metadata import * + +def find_mp3(song, artist): + search_terms = song + " " + artist + print ("\"%s\" by %s" % (song, artist)) + query_string = urllib.parse.urlencode({"search_query" : (search_terms)}) + html_content = urllib.request.urlopen("http://www.youtube.com/results?" + query_string) + search_results = re.findall(r'href=\"\/watch\?v=(.{11})', html_content.read().decode()) + in_title = False + i = -1 + given_up_score = 0 + while in_title == False: + i += 1 + given_up_score += 1 + if given_up_score >= 10: + in_title = True + audio_url = ("http://www.youtube.com/watch?v=" + search_results[i]) + title = strip_special_chars((BeautifulSoup(urllib.request.urlopen(audio_url), 'html.parser')).title.string.lower()) + song_title = song.lower().split("/") + for song in song_title: + if strip_special_chars(song) in strip_special_chars(title): + in_title = True + return search_results[i] + +def rip_album(album, artist, tried=False, search="album"): + visible_texts = search_google(album, artist, search) + try: + songs = [] + num = True + for i, j in enumerate(visible_texts): + if 'Songs' in j: + if visible_texts[i + 1] == "1": + indexed = i + while num == True: + try: + if type(int(visible_texts[indexed])) is int: + a = visible_texts[indexed + 1] + songs.append(a) + indexed += 1 + except: + indexed += 1 + if indexed >= 1000: + num = False + else: + pass + + for i, j in enumerate(songs): + rip_mp3(j, artist, part_of_album=True, album=album, tracknum=i + 1) + + except Exception as e: + if str(e) == "local variable 'indexed' referenced before assignment" or str(e) == 'list index out of range': + if tried != True: + print ("%s Trying to find album ..." % color('[*]','OKBLUE')) + rip_album(album, artist, tried=True, search="") + else: + print ("%s Could not find album '%s'" % (color('[-]','FAIL'), album)) + else: + print ("%s There was an error with getting the contents \ +of the album '%s'" % (color('[-]','FAIL'), album)) + +def rip_mp3(song, artist, part_of_album=False, album="", tracknum=""): + audio_code = find_mp3(song, artist) + filename = strip_special_chars(song) + ".mp3" + ydl_opts = { + 'format': 'bestaudio/best', + #'quiet': True, + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + }], + } + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download(["http://www.youtube.com/watch?v=" + audio_code]) + + artist_folder = artist + if not os.path.isdir(artist_folder): + os.makedirs(artist_folder) + if not part_of_album: + location = artist_folder + + if album != "" and part_of_album: + album_folder = artist + "/" + album + if not os.path.isdir(album_folder): + os.makedirs(album_folder) + location = album_folder + + for file in os.listdir("."): + if audio_code in file: + os.rename(file, location + "/" + filename) + + parse_metadata(song, artist, location, filename, tracknum=tracknum, album=album) diff --git a/irs/metadata.py b/irs/metadata.py new file mode 100644 index 0000000..9b77999 --- /dev/null +++ b/irs/metadata.py @@ -0,0 +1,112 @@ +import mutagen.id3, mutagen.easyid3, mutagen.mp3 +import urllib.request, urllib.parse +from bs4 import BeautifulSoup +import requests +from .utils import * +import re + +def search_google(song, artist, search_terms=""): + def visible(element): + if element.parent.name in ['style', 'script', '[document]', 'head', 'title']: + return False + elif re.match('', str(element)): + return False + return True + string = "%s %s %s" % (song, artist, search_terms) + filename = 'http://www.google.com/search?q=' + urllib.parse.quote_plus(string) + hdr = { + 'User-Agent':'Mozilla/5.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + } + texts = BeautifulSoup(urllib.request.urlopen(urllib.request.Request(filename, \ + headers=hdr)).read(), 'html.parser').findAll(text=True) + return list(filter(visible, texts)) + +def parse_metadata(song, artist, location, filename, tracknum="", album=""): + googled = search_google(song, artist) + mp3file = mutagen.mp3.MP3("%s/%s" % (location, filename), ID3=mutagen.easyid3.EasyID3) + print ("%s Metadata parsing:" % color('[+]','OKBLUE')) + + # Song title + mp3file['title'] = song + mp3file.save() + print ("\t%s Title parsed: " % color('[+]','OKGREEN') + mp3file['title'][0]) + + # Artist + mp3file['artist'] = artist + mp3file.save() + print ("\t%s Artist parsed: " % color('[+]','OKGREEN') + mp3file['artist'][0]) + + # Album + if album == "": + for i, j in enumerate(googled): + if "Album:" in j: + album = (googled[i + 1]) + try: + mp3file['album'] = album + print ("\t%s Album parsed: " % color('[+]','OKGREEN') + mp3file['album'][0]) + except Exception: + mp3file['album'] = album + print ("\t%s Album not parsed" % color('[-]','FAIL')) + mp3file.save() + + # Release date + for i, j in enumerate(googled): + if "Released:" in j: + date = (googled[i + 1]) + try: + mp3file['date'] = date + print ("\t%s Release date parsed" % color('[+]','OKGREEN')) + except Exception: + mp3file['date'] = "" + mp3file.save() + + # Track number + if tracknum != "": + mp3file['tracknumber'] = str(tracknum) + mp3file.save() + + # Album art + if mp3file['album'][0] != "": + try: + embed_mp3(get_albumart_url(album, artist), "%s/%s" % (location, filename)) + print ("\t%s Album art parsed" % color('[+]','OKGREEN')) + except Exception as e: + print ("\t%s Album art not parsed" % color('[-]','FAIL')) + + print ("\n%s \"%s\" downloaded successfully!\n" % (color('[+]','OKGREEN'), song)) + +def embed_mp3(albumart_url, song_location): + img = urllib.request.urlopen(albumart_url) + audio = mutagen.mp3.EasyMP3(song_location, ID3=mutagen.id3.ID3) + try: + audio.add_tags() + except Exception as e: + pass + audio.tags.add( + mutagen.id3.APIC( + encoding = 3, # UTF-8 + mime = 'image/png', + type = 3, # 3 is for album art + desc = 'Cover', + data = img.read() # Reads and adds album art + ) + ) + audio.save() + +def get_albumart_url(album, artist): + try: + search = "%s %s" % (album, artist) + url = "http://www.seekacover.com/cd/" + urllib.parse.quote_plus(search) + soup = BeautifulSoup(requests.get(url).text, 'html.parser') + done = False + for img in soup.findAll('img'): + if done == False: + try: + if search.lower() in img['title'].lower(): + return img['src'] + done = True + except Exception as e: + pass + except Exception as e: + pass diff --git a/irs/utils.py b/irs/utils.py new file mode 100644 index 0000000..e371351 --- /dev/null +++ b/irs/utils.py @@ -0,0 +1,11 @@ +def strip_special_chars(string): + special_chars = "\ / : * ? \" < > | - ( )".split(" ") + for char in special_chars: + string.replace(char, "") + return string + +def color(text, type): + types = {'HEADER': '\033[95m', 'OKBLUE': '\033[94m', 'OKGREEN': '\033[92m', + 'WARNING': '\033[93m','FAIL': '\033[91m','ENDC': '\033[0m','BOLD': '\033[1m' + ,'UNDERLINE': '\033[4m'} + return types[type] + text + types['ENDC'] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d66e550 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup + +setup( + name='irs', + version='0.1', + description='A music downloader that gets metadata.', + url='http://github.com/kepoorhampond/irs', + author='Kepoor Hampond', + author_email='kepoorh@gmail.com', + license='GNU', + packages=['irs'], + install_requires=[ + 'youtube-dl', + 'bs4', + 'mutagen', + 'requests', + ], + entry_points={ + 'console_scripts': [ + 'irs = irs.__main__:main' + ] + }, +)