Compare commits

...

19 commits

Author SHA1 Message Date
cooperhammond c99e8257e9 updated documentation #85 2022-02-23 10:59:15 -07:00
Cooper Hammond 3bbb0e767a
Merge pull request #84 from imsamuka/master
add option to apply metadata in existing file.

Apologies for the late merge, you sent this pull request right as school was beginning to pick up in earnest and I forgot about it in that rush. Thanks for the great work!
2022-01-27 11:18:58 -07:00
imsamuka 61120f21b0
add option to apply metadata in existing file 2022-01-07 22:15:27 -03:00
Cooper Hammond 390d59b9a0
Merge pull request #83 from imsamuka/fix-options
Fix options and add --ask-skip
2022-01-04 08:46:37 -07:00
imsamuka 3263ff4e07
fix GET requests url encoding 2022-01-03 01:01:58 -03:00
imsamuka 3d4acdeaea
add option to skip tracks on albums/playlists 2022-01-02 20:25:47 -03:00
imsamuka 72938a9b6a
show video title from url 2022-01-02 19:24:54 -03:00
imsamuka f962a0ab75
make youtube url validation safer 2022-01-02 18:04:05 -03:00
imsamuka ac7bc02ec5
fix youtube urls validation 2022-01-02 17:20:37 -03:00
imsamuka bdc63b4c35
fix --url ignoring argument on song.cr 2022-01-02 17:05:00 -03:00
imsamuka 289f1d8c63
fix video selection offset 2022-01-02 15:19:16 -03:00
Cooper Hammond f3776613b4 update version for new binary 2021-07-12 09:10:43 -06:00
Cooper Hammond ff3019e207
Merge pull request #78 from cooperhammond/select-vid-dl
added search terms config option and cli menu
2021-04-15 11:23:38 -06:00
Cooper Hammond fa5f3bb3b7 added search terms config option and cli menu
-S or --select will allow you to choose your song, for playlists or for
albums
2021-04-15 11:22:01 -06:00
Cooper Hammond 8d348031d3 update to 1.3.0 2021-04-15 09:46:55 -06:00
Cooper Hammond 92e8885ae9
Merge pull request #77 from cooperhammond/search-improvement
Search improvement based on song duration
2021-04-15 09:45:27 -06:00
Cooper Hammond 5eaac33345 minor fix to include duration_ms in all song metadata 2021-04-15 09:41:13 -06:00
Cooper Hammond 8c15f7b5e2 song duration now included in ranking 2021-04-14 09:12:08 -06:00
Cooper Hammond 3f12a880e9 minor fix for cross device linking 2021-04-13 22:39:33 -06:00
13 changed files with 374 additions and 229 deletions

3
.gitignore vendored
View file

@ -11,4 +11,5 @@
.ripper.log .ripper.log
ffmpeg ffmpeg
ffprobe ffprobe
youtube-dl youtube-dl
*.temp

View file

@ -124,6 +124,7 @@ Here's what they do:
```yaml ```yaml
binary_directory: ~/.irs/bin binary_directory: ~/.irs/bin
music_directory: ~/Music music_directory: ~/Music
search_terms: "lyrics"
filename_pattern: "{track_number} - {title}" filename_pattern: "{track_number} - {title}"
directory_pattern: "{artist}/{album}" directory_pattern: "{artist}/{album}"
client_key: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX client_key: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@ -136,6 +137,8 @@ single_folder_playlist:
- `binary_directory`: a path specifying where the downloaded binaries should - `binary_directory`: a path specifying where the downloaded binaries should
be placed be placed
- `music_directory`: a path specifying where downloaded mp3s should be placed. - `music_directory`: a path specifying where downloaded mp3s should be placed.
- `search_terms`: additional search terms to plug into youtube, which can be
potentially useful for not grabbing erroneous audio.
- `filename_pattern`: a pattern for the output filename of the mp3 - `filename_pattern`: a pattern for the output filename of the mp3
- `directory_pattern`: a pattern for the folder structure your mp3s are saved in - `directory_pattern`: a pattern for the folder structure your mp3s are saved in
- `client_key`: a client key from your spotify API application - `client_key`: a client key from your spotify API application

View file

@ -1,5 +1,5 @@
name: irs name: irs
version: 1.2.1 version: 1.4.0
authors: authors:
- Cooper Hammond <kepoorh@gmail.com> - Cooper Hammond <kepoorh@gmail.com>

View file

@ -21,7 +21,9 @@ class CLI
[["-A", "--album"], "album", "string"], [["-A", "--album"], "album", "string"],
[["-p", "--playlist"], "playlist", "string"], [["-p", "--playlist"], "playlist", "string"],
[["-u", "--url"], "url", "string"], [["-u", "--url"], "url", "string"],
[["-g", "--give-url"], "give-url", "bool"], [["-S", "--select"], "select", "bool"],
[["--ask-skip"], "ask_skip", "bool"],
[["--apply"], "apply_file", "string"]
] ]
@args : Hash(String, String) @args : Hash(String, String)
@ -50,8 +52,12 @@ class CLI
#{Style.blue "-s, --song <song>"} Specify song name to download #{Style.blue "-s, --song <song>"} Specify song name to download
#{Style.blue "-A, --album <album>"} Specify the album name to download #{Style.blue "-A, --album <album>"} Specify the album name to download
#{Style.blue "-p, --playlist <playlist>"} Specify the playlist name to download #{Style.blue "-p, --playlist <playlist>"} Specify the playlist name to download
#{Style.blue "-u, --url <url>"} Specify the youtube url to download from (for single songs only) #{Style.blue "-u, --url <url>"} Specify the youtube url to download from
#{Style.blue "-g, --give-url"} Specify the youtube url sources while downloading (for albums or playlists only) #{Style.blue " "} (for albums and playlists, the command-line
#{Style.blue " "} argument is ignored, and it should be '')
#{Style.blue "-S, --select"} Use a menu to choose each song's video source
#{Style.blue "--ask-skip"} Before every playlist/album song, ask to skip
#{Style.blue "--apply <file>"} Apply metadata to a existing file
#{Style.bold "Examples:"} #{Style.bold "Examples:"}
$ #{Style.green %(irs --song "Bohemian Rhapsody" --artist "Queen")} $ #{Style.green %(irs --song "Bohemian Rhapsody" --artist "Queen")}
@ -73,33 +79,32 @@ class CLI
if @args["help"]? || @args.keys.size == 0 if @args["help"]? || @args.keys.size == 0
help help
elsif @args["version"]? elsif @args["version"]?
version version
elsif @args["install"]? elsif @args["install"]?
YdlBinaries.get_both(Config.binary_location) YdlBinaries.get_both(Config.binary_location)
elsif @args["config"]? elsif @args["config"]?
puts ENV["IRS_CONFIG_LOCATION"]? puts ENV["IRS_CONFIG_LOCATION"]?
elsif @args["song"]? && @args["artist"]? elsif @args["song"]? && @args["artist"]?
s = Song.new(@args["song"], @args["artist"]) s = Song.new(@args["song"], @args["artist"])
s.provide_client_keys(Config.client_key, Config.client_secret) s.provide_client_keys(Config.client_key, Config.client_secret)
s.grab_it(@args["url"]?) s.grab_it(flags: @args)
s.organize_it() s.organize_it()
elsif @args["album"]? && @args["artist"]? elsif @args["album"]? && @args["artist"]?
a = Album.new(@args["album"], @args["artist"]) a = Album.new(@args["album"], @args["artist"])
a.provide_client_keys(Config.client_key, Config.client_secret) a.provide_client_keys(Config.client_key, Config.client_secret)
if @args["give-url"]? a.grab_it(flags: @args)
a.grab_it(true)
else
a.grab_it(false)
end
elsif @args["playlist"]? && @args["artist"]? elsif @args["playlist"]? && @args["artist"]?
p = Playlist.new(@args["playlist"], @args["artist"]) p = Playlist.new(@args["playlist"], @args["artist"])
p.provide_client_keys(Config.client_key, Config.client_secret) p.provide_client_keys(Config.client_key, Config.client_secret)
if @args["give-url"]? p.grab_it(flags: @args)
p.grab_it(true)
else
p.grab_it(false)
end
else else
puts Style.red("Those arguments don't do anything when used that way.") puts Style.red("Those arguments don't do anything when used that way.")
puts "Type `irs -h` to see usage." puts "Type `irs -h` to see usage."

View file

@ -7,6 +7,7 @@ require "../search/spotify"
EXAMPLE_CONFIG = <<-EOP EXAMPLE_CONFIG = <<-EOP
#{Style.dim "exampleconfig.yml"} #{Style.dim "exampleconfig.yml"}
#{Style.dim "===="} #{Style.dim "===="}
#{Style.blue "search_terms"}: #{Style.green "\"lyrics\""}
#{Style.blue "binary_directory"}: #{Style.green "~/.irs/bin"} #{Style.blue "binary_directory"}: #{Style.green "~/.irs/bin"}
#{Style.blue "music_directory"}: #{Style.green "~/Music"} #{Style.blue "music_directory"}: #{Style.green "~/Music"}
#{Style.blue "filename_pattern"}: #{Style.green "\"{track_number} - {title}\""} #{Style.blue "filename_pattern"}: #{Style.green "\"{track_number} - {title}\""}
@ -24,6 +25,7 @@ module Config
extend self extend self
@@arguments = [ @@arguments = [
"search_terms",
"binary_directory", "binary_directory",
"music_directory", "music_directory",
"filename_pattern", "filename_pattern",
@ -45,6 +47,10 @@ module Config
exit 1 exit 1
end end
def search_terms : String
return @@conf["search_terms"].to_s
end
def binary_location : String def binary_location : String
path = @@conf["binary_directory"].to_s path = @@conf["binary_directory"].to_s
return Path[path].expand(home: true).to_s return Path[path].expand(home: true).to_s

View file

@ -1,3 +1,3 @@
module IRS module IRS
VERSION = "0.1.0" VERSION = "1.4.0"
end end

View file

@ -27,7 +27,11 @@ abstract class SpotifyList
end end
# Finds the list, and downloads all of the songs using the `Song` class # Finds the list, and downloads all of the songs using the `Song` class
def grab_it(ask_url : Bool = false) def grab_it(flags = {} of String => String)
ask_url = flags["url"]?
ask_skip = flags["ask_skip"]?
is_playlist = flags["playlist"]?
if !@spotify_searcher.authorized? if !@spotify_searcher.authorized?
raise("Need to call provide_client_keys on Album or Playlist class.") raise("Need to call provide_client_keys on Album or Playlist class.")
end end
@ -43,22 +47,28 @@ abstract class SpotifyList
i = 0 i = 0
contents.each do |datum| contents.each do |datum|
i += 1
if datum["track"]? if datum["track"]?
datum = datum["track"] datum = datum["track"]
end end
data = organize_song_metadata(list, datum) data = organize_song_metadata(list, datum)
song = Song.new(data["name"].to_s, data["artists"][0]["name"].to_s) s_name = data["name"].to_s
s_artist = data["artists"][0]["name"].to_s
song = Song.new(s_name, s_artist)
song.provide_spotify(@spotify_searcher) song.provide_spotify(@spotify_searcher)
song.provide_metadata(data) song.provide_metadata(data)
puts Style.bold("[#{data["track_number"]}/#{contents.size}]") puts Style.bold("[#{i}/#{contents.size}]")
song.grab_it ask_url: ask_url
organize(song) unless ask_skip && skip?(s_name, s_artist, is_playlist)
song.grab_it(flags: flags)
i += 1 organize(song)
else
puts "Skipping..."
end
end end
end end
@ -67,6 +77,13 @@ abstract class SpotifyList
@spotify_searcher.authorize(client_key, client_secret) @spotify_searcher.authorize(client_key, client_secret)
end end
private def skip?(name, artist, is_playlist)
print "Skip #{Style.blue name}" +
(is_playlist ? " (by #{Style.green artist})": "") + "? "
response = gets
return response && response.lstrip.downcase.starts_with? "y"
end
private def outputter(key : String, index : Int32) private def outputter(key : String, index : Int32)
text = @outputs[key][index] text = @outputs[key][index]
.gsub("%l", @list_name) .gsub("%l", @list_name)

View file

@ -46,6 +46,7 @@ class TrackMapper
type: Int32, type: Int32,
setter: true setter: true
}, },
duration_ms: Int32,
type: String, type: String,
uri: String uri: String
) )

View file

@ -67,7 +67,8 @@ class Playlist < SpotifyList
FileUtils.mkdir_p(strpath) FileUtils.mkdir_p(strpath)
end end
safe_filename = song.filename.gsub(/[\/]/, "").gsub(" ", " ") safe_filename = song.filename.gsub(/[\/]/, "").gsub(" ", " ")
File.rename("./" + song.filename, (path / safe_filename).to_s) FileUtils.cp("./" + song.filename, (path / safe_filename).to_s)
FileUtils.rm("./" + song.filename)
else else
song.organize_it() song.organize_it()
end end

View file

@ -29,7 +29,7 @@ class Song
Style.green(" + ") + Style.dim("URL found \n"), Style.green(" + ") + Style.dim("URL found \n"),
" Validating URL ...\r", " Validating URL ...\r",
Style.green(" + ") + Style.dim("URL validated \n"), Style.green(" + ") + Style.dim("URL validated \n"),
"URL?: " " URL?: "
], ],
"download" => [ "download" => [
" Downloading video:\n", " Downloading video:\n",
@ -53,11 +53,15 @@ class Song
# Find, downloads, and tags the mp3 song that this class represents. # Find, downloads, and tags the mp3 song that this class represents.
# Optionally takes a youtube URL to download from # Optionally takes a youtube URL to download from
# #
# ``` # ```
# Song.new("Bohemian Rhapsody", "Queen").grab_it # Song.new("Bohemian Rhapsody", "Queen").grab_it
# ``` # ```
def grab_it(url : (String | Nil) = nil, ask_url : Bool = false) def grab_it(url : (String | Nil) = nil, flags = {} of String => String)
passed_url : (String | Nil) = flags["url"]?
passed_file : (String | Nil) = flags["apply_file"]?
select_link = flags["select"]?
outputter("intro", 0) outputter("intro", 0)
if !@spotify_searcher.authorized? && !@metadata if !@spotify_searcher.authorized? && !@metadata
@ -85,63 +89,75 @@ class Song
end end
data = @metadata.as(JSON::Any) data = @metadata.as(JSON::Any)
@song_name = data["name"].as_s
@artist_name = data["artists"][0]["name"].as_s
@filename = "#{Pattern.parse(Config.filename_pattern, data)}.mp3" @filename = "#{Pattern.parse(Config.filename_pattern, data)}.mp3"
if ask_url if passed_file
outputter("url", 4) puts Style.green(" +") + Style.dim(" Moving file: ") + passed_file
url = gets File.rename(passed_file, @filename)
if !url.nil? && url.strip == ""
url = nil
end
end
if !url
outputter("url", 0)
url = Youtube.find_url(@song_name, @artist_name, search_terms: "lyrics")
if !url
raise("There was no url found on youtube for " +
%("#{@song_name}" by "#{@artist_name}. ) +
"Check your input and try again.")
end
outputter("url", 1)
else else
outputter("url", 2) if passed_url
if !Youtube.is_valid_url(url) if passed_url.strip != ""
raise("The url '#{url}' is an invalid youtube URL " + url = passed_url
"Check the URL and try again") else
outputter("url", 4)
url = gets
if !url.nil? && url.strip == ""
url = nil
end
end
end end
outputter("url", 3)
end
outputter("download", 0) if !url
Ripper.download_mp3(url.as(String), @filename) outputter("url", 0)
outputter("download", 1) url = Youtube.find_url(data, flags: flags)
if !url
raise("There was no url found on youtube for " +
%("#{@song_name}" by "#{@artist_name}. ) +
"Check your input and try again.")
end
outputter("url", 1)
else
outputter("url", 2)
url = Youtube.validate_url(url)
if !url
raise("The url is an invalid youtube URL " +
"Check the URL and try again")
end
outputter("url", 3)
end
outputter("download", 0)
Ripper.download_mp3(url.as(String), @filename)
outputter("download", 1)
end
outputter("albumart", 0) outputter("albumart", 0)
temp_albumart_filename = ".tempalbumart.jpg" temp_albumart_filename = ".tempalbumart.jpg"
HTTP::Client.get(data["album"]["images"][0]["url"].to_s) do |response| HTTP::Client.get(data["album"]["images"][0]["url"].as_s) do |response|
File.write(temp_albumart_filename, response.body_io) File.write(temp_albumart_filename, response.body_io)
end end
outputter("albumart", 0) outputter("albumart", 0)
# check if song's metadata has been modded in playlist, update artist accordingly # check if song's metadata has been modded in playlist, update artist accordingly
if data["artists"][-1]["owner"]? if data["artists"][-1]["owner"]?
@artist = data["artists"][-1]["name"].to_s @artist = data["artists"][-1]["name"].as_s
else else
@artist = data["artists"][0]["name"].to_s @artist = data["artists"][0]["name"].as_s
end end
@album = data["album"]["name"].to_s @album = data["album"]["name"].as_s
tagger = Tags.new(@filename) tagger = Tags.new(@filename)
tagger.add_album_art(temp_albumart_filename) tagger.add_album_art(temp_albumart_filename)
tagger.add_text_tag("title", data["name"].to_s) tagger.add_text_tag("title", data["name"].as_s)
tagger.add_text_tag("artist", @artist) tagger.add_text_tag("artist", @artist)
if !@album.empty? if !@album.empty?
tagger.add_text_tag("album", @album) tagger.add_text_tag("album", @album)
end end
if genre = @spotify_searcher.find_genre(data["artists"][0]["id"].to_s) if genre = @spotify_searcher.find_genre(data["artists"][0]["id"].as_s)
tagger.add_text_tag("genre", genre) tagger.add_text_tag("genre", genre)
end end

144
src/search/ranking.cr Normal file
View file

@ -0,0 +1,144 @@
alias VID_VALUE_CLASS = String
alias VID_METADATA_CLASS = Hash(String, VID_VALUE_CLASS)
alias YT_METADATA_CLASS = Array(VID_METADATA_CLASS)
module Ranker
extend self
GARBAGE_PHRASES = [
"cover", "album", "live", "clean", "version", "full", "full album", "row",
"at", "@", "session", "how to", "npr music", "reimagined", "version",
"trailer"
]
GOLDEN_PHRASES = [
"official video", "official music video",
]
# Will rank videos according to their title and the user input, returns a sorted array of hashes
# of the points a song was assigned and its original index
# *spotify_metadata* is the metadate (from spotify) of the song that you want
# *yt_metadata* is an array of hashes with metadata scraped from the youtube search result page
# *query* is the query that you submitted to youtube for the results you now have
# ```
# Ranker.rank_videos(spotify_metadata, yt_metadata, query)
# => [
# {"points" => x, "index" => x},
# ...
# ]
# ```
# "index" corresponds to the original index of the song in yt_metadata
def rank_videos(spotify_metadata : JSON::Any, yt_metadata : YT_METADATA_CLASS,
query : String) : Array(Hash(String, Int32))
points = [] of Hash(String, Int32)
index = 0
actual_song_name = spotify_metadata["name"].as_s
actual_artist_name = spotify_metadata["artists"][0]["name"].as_s
yt_metadata.each do |vid|
pts = 0
pts += points_string_compare(actual_song_name, vid["title"])
pts += points_string_compare(actual_artist_name, vid["title"])
pts += count_buzzphrases(query, vid["title"])
pts += compare_timestamps(spotify_metadata, vid)
points.push({
"points" => pts,
"index" => index,
})
index += 1
end
# Sort first by points and then by original index of the song
points.sort! { |a, b|
if b["points"] == a["points"]
a["index"] <=> b["index"]
else
b["points"] <=> a["points"]
end
}
return points
end
# SINGULAR COMPONENT OF RANKING ALGORITHM
private def compare_timestamps(spotify_metadata : JSON::Any, node : VID_METADATA_CLASS) : Int32
# puts spotify_metadata.to_pretty_json()
actual_time = spotify_metadata["duration_ms"].as_i
vid_time = node["duration_ms"].to_i
difference = (actual_time - vid_time).abs
# puts "actual: #{actual_time}, vid: #{vid_time}"
# puts "\tdiff: #{difference}"
# puts "\ttitle: #{node["title"]}"
if difference <= 1000
return 3
elsif difference <= 2000
return 2
elsif difference <= 5000
return 1
else
return 0
end
end
# SINGULAR COMPONENT OF RANKING ALGORITHM
# Returns an `Int` based off the number of points worth assigning to the
# matchiness of the string. First the strings are downcased and then all
# nonalphanumeric characters are stripped.
# If *item1* includes *item2*, return 3 pts.
# If after the items have been blanked, *item1* includes *item2*,
# return 1 pts.
# Else, return 0 pts.
private def points_string_compare(item1 : String, item2 : String) : Int32
if item2.includes?(item1)
return 3
end
item1 = item1.downcase.gsub(/[^a-z0-9]/, "")
item2 = item2.downcase.gsub(/[^a-z0-9]/, "")
if item2.includes?(item1)
return 1
else
return 0
end
end
# SINGULAR COMPONENT OF RANKING ALGORITHM
# Checks if there are any phrases in the title of the video that would
# indicate audio having what we want.
# *video_name* is the title of the video, and *query* is what the user the
# program searched for. *query* is needed in order to make sure we're not
# subtracting points from something that's naturally in the title
private def count_buzzphrases(query : String, video_name : String) : Int32
good_phrases = 0
bad_phrases = 0
GOLDEN_PHRASES.each do |gold_phrase|
gold_phrase = gold_phrase.downcase.gsub(/[^a-z0-9]/, "")
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
next
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
good_phrases += 1
end
end
GARBAGE_PHRASES.each do |garbage_phrase|
garbage_phrase = garbage_phrase.downcase.gsub(/[^a-z0-9]/, "")
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
next
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
bad_phrases += 1
end
end
return good_phrases - bad_phrases
end
end

View file

@ -60,9 +60,10 @@ class SpotifySearcher
# ``` # ```
def find_item(item_type : String, item_parameters : Hash, offset = 0, def find_item(item_type : String, item_parameters : Hash, offset = 0,
limit = 20) : JSON::Any? limit = 20) : JSON::Any?
query = generate_query(item_type, item_parameters, offset, limit) query = generate_query(item_type, item_parameters)
url = @root_url.join("search?q=#{query}").to_s url = "search?q=#{query}&type=#{item_type}&limit=#{limit}&offset=#{offset}"
url = @root_url.join(url).to_s
response = HTTP::Client.get(url, headers: @access_header) response = HTTP::Client.get(url, headers: @access_header)
error_check(response) error_check(response)
@ -228,8 +229,7 @@ class SpotifySearcher
# Generates url to run a GET request against to the Spotify open API # Generates url to run a GET request against to the Spotify open API
# Returns a `String.` # Returns a `String.`
private def generate_query(item_type : String, item_parameters : Hash, private def generate_query(item_type : String, item_parameters : Hash) : String
offset : Int32, limit : Int32) : String
query = "" query = ""
# parameter keys to exclude in the api request. These values will be put # parameter keys to exclude in the api request. These values will be put
@ -241,9 +241,9 @@ class SpotifySearcher
if k == "name" if k == "name"
# will remove the "name:<title>" param from the query # will remove the "name:<title>" param from the query
if item_type == "playlist" if item_type == "playlist"
query += item_parameters[k].gsub(" ", "+") + "+" query += item_parameters[k] + "+"
else else
query += param_encode(item_type, item_parameters[k]) query += as_field(item_type, item_parameters[k])
end end
# check if the key is to be excluded # check if the key is to be excluded
@ -254,14 +254,21 @@ class SpotifySearcher
# NOTE: playlist names will be inserted into the query normally, without # NOTE: playlist names will be inserted into the query normally, without
# a parameter. # a parameter.
else else
query += param_encode(k, item_parameters[k]) query += as_field(k, item_parameters[k])
end end
end end
# extra api info return URI.encode(query.rchop("+"))
query += "&type=#{item_type}&limit=#{limit}&offset=#{offset}" end
return query # Returns a `String` encoded for the spotify api
#
# ```
# query_encode("album", "A Night At The Opera")
# => "album:A Night At The Opera+"
# ```
private def as_field(key, value) : String
return "#{key}:#{value}+"
end end
# Ranks the given items based off of the info from parameters. # Ranks the given items based off of the info from parameters.
@ -327,15 +334,6 @@ class SpotifySearcher
end end
end end
# Returns a `String` encoded for the spotify api
#
# ```
# query_encode("album", "A Night At The Opera")
# => "album:A+Night+At+The+Opera"
# ```
private def param_encode(key : String, value : String) : String
return key.gsub(" ", "+") + ":" + value.gsub(" ", "+") + "+"
end
end end
# puts SpotifySearcher.new() # puts SpotifySearcher.new()

View file

@ -3,6 +3,11 @@ require "xml"
require "json" require "json"
require "uri" require "uri"
require "./ranking"
require "../bottle/config"
require "../bottle/styles"
module Youtube module Youtube
extend self extend self
@ -12,188 +17,95 @@ module Youtube
"yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ", "yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ",
] ]
GARBAGE_PHRASES = [ # Note that VID_VALUE_CLASS, VID_METADATA_CLASS, and YT_METADATA_CLASS are found in ranking.cr
"cover", "album", "live", "clean", "version", "full", "full album", "row",
"at", "@", "session", "how to", "npr music", "reimagined", "hr version",
"trailer",
]
GOLDEN_PHRASES = [
"official video", "official music video",
]
alias NODES_CLASS = Array(Hash(String, String))
# Checks if the given URL is a valid youtube URL
#
# ```
# Youtube.is_valid_url("https://www.youtube.com/watch?v=NOTANACTUALVIDEOID")
# => false
# ```
def is_valid_url(url : String) : Bool
uri = URI.parse url
# is it a video on youtube, with a query
query = uri.query
if uri.host != "www.youtube.com" || uri.path != "/watch" || !query
return false
end
queries = query.split('&')
# find the video ID
i = 0
while i < queries.size
if queries[i].starts_with?("v=")
vID = queries[i][2..-1]
break
end
i += 1
end
if !vID
return false
end
# this is an internal endpoint to validate the video ID
response = HTTP::Client.get "https://www.youtube.com/get_video_info?video_id=#{vID}"
return response.body.includes?("status=ok")
end
# Finds a youtube url based off of the given information. # Finds a youtube url based off of the given information.
# The query to youtube is constructed like this: # The query to youtube is constructed like this:
# "<song_name> <artist_name> <search terms>" # "<song_name> <artist_name> <search terms>"
# If *download_first* is provided, the first link found will be downloaded. # If *download_first* is provided, the first link found will be downloaded.
# If *select_link* is provided, a menu of options will be shown for the user to choose their poison
# #
# ``` # ```
# Youtube.find_url("Bohemian Rhapsody", "Queen") # Youtube.find_url("Bohemian Rhapsody", "Queen")
# => "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # => "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
# ``` # ```
def find_url(song_name : String, artist_name : String, search_terms = "", def find_url(spotify_metadata : JSON::Any,
download_first = false) : String? flags = {} of String => String) : String?
query = (song_name + " " + artist_name + " " + search_terms).strip.gsub(" ", "+")
url = "https://www.youtube.com/results?search_query=" + query search_terms = Config.search_terms
response = HTTP::Client.get(url) select_link = flags["select"]?
valid_nodes = get_video_link_nodes(response.body) song_name = spotify_metadata["name"].as_s
artist_name = spotify_metadata["artists"][0]["name"].as_s
if valid_nodes.size == 0 human_query = "#{song_name} #{artist_name} #{search_terms.strip}"
puts "There were no results for that query." params = HTTP::Params.encode({"search_query" => human_query})
response = HTTP::Client.get("https://www.youtube.com/results?#{params}")
yt_metadata = get_yt_search_metadata(response.body)
if yt_metadata.size == 0
puts "There were no results for this query on youtube: \"#{human_query}\""
return nil return nil
end end
root = "https://youtube.com" root = "https://youtube.com"
ranked = Ranker.rank_videos(spotify_metadata, yt_metadata, human_query)
return root + valid_nodes[0]["href"] if download_first if select_link
return root + select_link_menu(spotify_metadata, yt_metadata)
ranked = rank_videos(song_name, artist_name, query, valid_nodes) end
begin begin
return root + valid_nodes[ranked[0]["index"]]["href"] puts Style.dim(" Video: ") + yt_metadata[ranked[0]["index"]]["title"]
return root + yt_metadata[ranked[0]["index"]]["href"]
rescue IndexError rescue IndexError
return nil return nil
end end
exit 1
end end
# Will rank videos according to their title and the user input # Presents a menu with song info for the user to choose which url they want to download
# Return: private def select_link_menu(spotify_metadata : JSON::Any,
# [ yt_metadata : YT_METADATA_CLASS) : String
# {"points" => x, "index" => x}, puts Style.dim(" Spotify info: ") +
# ... Style.bold("\"" + spotify_metadata["name"].to_s) + "\" by \"" +
# ] Style.bold(spotify_metadata["artists"][0]["name"].to_s + "\"") +
private def rank_videos(song_name : String, artist_name : String, " @ " + Style.blue((spotify_metadata["duration_ms"].as_i / 1000).to_i.to_s) + "s"
query : String, nodes : Array(Hash(String, String))) : Array(Hash(String, Int32)) puts " Choose video to download:"
points = [] of Hash(String, Int32) index = 1
index = 0 yt_metadata.each do |vid|
print " " + Style.bold(index.to_s + " ")
nodes.each do |node| puts "\"" + vid["title"] + "\" @ " + Style.blue((vid["duration_ms"].to_i / 1000).to_i.to_s) + "s"
pts = 0
pts += points_compare(song_name, node["title"])
pts += points_compare(artist_name, node["title"])
pts += count_buzzphrases(query, node["title"])
points.push({
"points" => pts,
"index" => index,
})
index += 1 index += 1
end if index > 5
break
# Sort first by points and then by original index of the song
points.sort! { |a, b|
if b["points"] == a["points"]
a["index"] <=> b["index"]
else
b["points"] <=> a["points"]
end
}
return points
end
# Returns an `Int` based off the number of points worth assigning to the
# matchiness of the string. First the strings are downcased and then all
# nonalphanumeric characters are stripped.
# If *item1* includes *item2*, return 3 pts.
# If after the items have been blanked, *item1* includes *item2*,
# return 1 pts.
# Else, return 0 pts.
private def points_compare(item1 : String, item2 : String) : Int32
if item2.includes?(item1)
return 3
end
item1 = item1.downcase.gsub(/[^a-z0-9]/, "")
item2 = item2.downcase.gsub(/[^a-z0-9]/, "")
if item2.includes?(item1)
return 1
else
return 0
end
end
# Checks if there are any phrases in the title of the video that would
# indicate audio having what we want.
# *video_name* is the title of the video, and *query* is what the user the
# program searched for. *query* is needed in order to make sure we're not
# subtracting points from something that's naturally in the title
private def count_buzzphrases(query : String, video_name : String) : Int32
good_phrases = 0
bad_phrases = 0
GOLDEN_PHRASES.each do |gold_phrase|
gold_phrase = gold_phrase.downcase.gsub(/[^a-z0-9]/, "")
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
next
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
good_phrases += 1
end end
end end
GARBAGE_PHRASES.each do |garbage_phrase| input = 0
garbage_phrase = garbage_phrase.downcase.gsub(/[^a-z0-9]/, "") while true # not between 1 and 5
begin
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase) print Style.bold(" > ")
next input = gets.not_nil!.chomp.to_i
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase) if input < 6 && input > 0
bad_phrases += 1 break
end
rescue
puts Style.red(" Invalid input, try again.")
end end
end end
return good_phrases - bad_phrases return yt_metadata[input-1]["href"]
end end
# Finds valid video links from a `HTTP::Client.get` request # Finds valid video links from a `HTTP::Client.get` request
# Returns an `Array` of `XML::Node` # Returns an `Array` of `NODES_CLASS` containing additional metadata from Youtube
private def get_video_link_nodes(response_body : String) : NODES_CLASS private def get_yt_search_metadata(response_body : String) : YT_METADATA_CLASS
yt_initial_data : JSON::Any = JSON.parse("{}") yt_initial_data : JSON::Any = JSON.parse("{}")
response_body.each_line do |line| response_body.each_line do |line|
@ -221,7 +133,7 @@ module Youtube
# where the vid metadata lives # where the vid metadata lives
yt_initial_data = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"] yt_initial_data = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"]
video_metadata = [] of Hash(String, String) video_metadata = [] of VID_METADATA_CLASS
i = 0 i = 0
while true while true
@ -229,11 +141,16 @@ module Youtube
# video title # video title
raw_metadata = yt_initial_data[0]["itemSectionRenderer"]["contents"][i]["videoRenderer"] raw_metadata = yt_initial_data[0]["itemSectionRenderer"]["contents"][i]["videoRenderer"]
metadata = {} of String => String metadata = {} of String => VID_VALUE_CLASS
metadata["title"] = raw_metadata["title"]["runs"][0]["text"].as_s metadata["title"] = raw_metadata["title"]["runs"][0]["text"].as_s
metadata["href"] = raw_metadata["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s metadata["href"] = raw_metadata["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
timestamp = raw_metadata["lengthText"]["simpleText"].as_s
metadata["timestamp"] = timestamp
metadata["duration_ms"] = ((timestamp.split(":")[0].to_i * 60 +
timestamp.split(":")[1].to_i) * 1000).to_s
video_metadata.push(metadata) video_metadata.push(metadata)
rescue IndexError rescue IndexError
break break
@ -244,4 +161,40 @@ module Youtube
return video_metadata return video_metadata
end end
# Returns as a valid URL if possible
#
# ```
# Youtube.validate_url("https://www.youtube.com/watch?v=NOTANACTUALVIDEOID")
# => nil
# ```
def validate_url(url : String) : String | Nil
uri = URI.parse url
return nil if !uri
query = uri.query
return nil if !query
# find the video ID
vID = nil
query.split('&').each do |q|
if q.starts_with?("v=")
vID = q[2..-1]
end
end
return nil if !vID
url = "https://www.youtube.com/watch?v=#{vID}"
# this is an internal endpoint to validate the video ID
params = HTTP::Params.encode({"format" => "json", "url" => url})
response = HTTP::Client.get "https://www.youtube.com/oembed?#{params}"
return nil unless response.success?
res_json = JSON.parse(response.body)
title = res_json["title"].as_s
puts Style.dim(" Video: ") + title
return url
end
end end