mirror of
https://github.com/cooperhammond/irs.git
synced 2025-08-13 23:21:02 +00:00
Compare commits
19 commits
Author | SHA1 | Date | |
---|---|---|---|
|
c99e8257e9 | ||
|
3bbb0e767a | ||
|
61120f21b0 | ||
|
390d59b9a0 | ||
|
3263ff4e07 | ||
|
3d4acdeaea | ||
|
72938a9b6a | ||
|
f962a0ab75 | ||
|
ac7bc02ec5 | ||
|
bdc63b4c35 | ||
|
289f1d8c63 | ||
|
f3776613b4 | ||
|
ff3019e207 | ||
|
fa5f3bb3b7 | ||
|
8d348031d3 | ||
|
92e8885ae9 | ||
|
5eaac33345 | ||
|
8c15f7b5e2 | ||
|
3f12a880e9 |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -12,3 +12,4 @@
|
|||
ffmpeg
|
||||
ffprobe
|
||||
youtube-dl
|
||||
*.temp
|
|
@ -124,6 +124,7 @@ Here's what they do:
|
|||
```yaml
|
||||
binary_directory: ~/.irs/bin
|
||||
music_directory: ~/Music
|
||||
search_terms: "lyrics"
|
||||
filename_pattern: "{track_number} - {title}"
|
||||
directory_pattern: "{artist}/{album}"
|
||||
client_key: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
||||
|
@ -136,6 +137,8 @@ single_folder_playlist:
|
|||
- `binary_directory`: a path specifying where the downloaded binaries should
|
||||
be placed
|
||||
- `music_directory`: a path specifying where downloaded mp3s should be placed.
|
||||
- `search_terms`: additional search terms to plug into youtube, which can be
|
||||
potentially useful for not grabbing erroneous audio.
|
||||
- `filename_pattern`: a pattern for the output filename of the mp3
|
||||
- `directory_pattern`: a pattern for the folder structure your mp3s are saved in
|
||||
- `client_key`: a client key from your spotify API application
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
name: irs
|
||||
version: 1.2.1
|
||||
version: 1.4.0
|
||||
|
||||
authors:
|
||||
- Cooper Hammond <kepoorh@gmail.com>
|
||||
|
|
|
@ -21,7 +21,9 @@ class CLI
|
|||
[["-A", "--album"], "album", "string"],
|
||||
[["-p", "--playlist"], "playlist", "string"],
|
||||
[["-u", "--url"], "url", "string"],
|
||||
[["-g", "--give-url"], "give-url", "bool"],
|
||||
[["-S", "--select"], "select", "bool"],
|
||||
[["--ask-skip"], "ask_skip", "bool"],
|
||||
[["--apply"], "apply_file", "string"]
|
||||
]
|
||||
|
||||
@args : Hash(String, String)
|
||||
|
@ -50,8 +52,12 @@ class CLI
|
|||
#{Style.blue "-s, --song <song>"} Specify song name to download
|
||||
#{Style.blue "-A, --album <album>"} Specify the album name to download
|
||||
#{Style.blue "-p, --playlist <playlist>"} Specify the playlist name to download
|
||||
#{Style.blue "-u, --url <url>"} Specify the youtube url to download from (for single songs only)
|
||||
#{Style.blue "-g, --give-url"} Specify the youtube url sources while downloading (for albums or playlists only)
|
||||
#{Style.blue "-u, --url <url>"} Specify the youtube url to download from
|
||||
#{Style.blue " "} (for albums and playlists, the command-line
|
||||
#{Style.blue " "} argument is ignored, and it should be '')
|
||||
#{Style.blue "-S, --select"} Use a menu to choose each song's video source
|
||||
#{Style.blue "--ask-skip"} Before every playlist/album song, ask to skip
|
||||
#{Style.blue "--apply <file>"} Apply metadata to a existing file
|
||||
|
||||
#{Style.bold "Examples:"}
|
||||
$ #{Style.green %(irs --song "Bohemian Rhapsody" --artist "Queen")}
|
||||
|
@ -73,33 +79,32 @@ class CLI
|
|||
|
||||
if @args["help"]? || @args.keys.size == 0
|
||||
help
|
||||
|
||||
elsif @args["version"]?
|
||||
version
|
||||
|
||||
elsif @args["install"]?
|
||||
YdlBinaries.get_both(Config.binary_location)
|
||||
|
||||
elsif @args["config"]?
|
||||
puts ENV["IRS_CONFIG_LOCATION"]?
|
||||
|
||||
elsif @args["song"]? && @args["artist"]?
|
||||
s = Song.new(@args["song"], @args["artist"])
|
||||
s.provide_client_keys(Config.client_key, Config.client_secret)
|
||||
s.grab_it(@args["url"]?)
|
||||
s.grab_it(flags: @args)
|
||||
s.organize_it()
|
||||
|
||||
elsif @args["album"]? && @args["artist"]?
|
||||
a = Album.new(@args["album"], @args["artist"])
|
||||
a.provide_client_keys(Config.client_key, Config.client_secret)
|
||||
if @args["give-url"]?
|
||||
a.grab_it(true)
|
||||
else
|
||||
a.grab_it(false)
|
||||
end
|
||||
a.grab_it(flags: @args)
|
||||
|
||||
elsif @args["playlist"]? && @args["artist"]?
|
||||
p = Playlist.new(@args["playlist"], @args["artist"])
|
||||
p.provide_client_keys(Config.client_key, Config.client_secret)
|
||||
if @args["give-url"]?
|
||||
p.grab_it(true)
|
||||
else
|
||||
p.grab_it(false)
|
||||
end
|
||||
p.grab_it(flags: @args)
|
||||
|
||||
else
|
||||
puts Style.red("Those arguments don't do anything when used that way.")
|
||||
puts "Type `irs -h` to see usage."
|
||||
|
|
|
@ -7,6 +7,7 @@ require "../search/spotify"
|
|||
EXAMPLE_CONFIG = <<-EOP
|
||||
#{Style.dim "exampleconfig.yml"}
|
||||
#{Style.dim "===="}
|
||||
#{Style.blue "search_terms"}: #{Style.green "\"lyrics\""}
|
||||
#{Style.blue "binary_directory"}: #{Style.green "~/.irs/bin"}
|
||||
#{Style.blue "music_directory"}: #{Style.green "~/Music"}
|
||||
#{Style.blue "filename_pattern"}: #{Style.green "\"{track_number} - {title}\""}
|
||||
|
@ -24,6 +25,7 @@ module Config
|
|||
extend self
|
||||
|
||||
@@arguments = [
|
||||
"search_terms",
|
||||
"binary_directory",
|
||||
"music_directory",
|
||||
"filename_pattern",
|
||||
|
@ -45,6 +47,10 @@ module Config
|
|||
exit 1
|
||||
end
|
||||
|
||||
def search_terms : String
|
||||
return @@conf["search_terms"].to_s
|
||||
end
|
||||
|
||||
def binary_location : String
|
||||
path = @@conf["binary_directory"].to_s
|
||||
return Path[path].expand(home: true).to_s
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
module IRS
|
||||
VERSION = "0.1.0"
|
||||
VERSION = "1.4.0"
|
||||
end
|
||||
|
|
|
@ -27,7 +27,11 @@ abstract class SpotifyList
|
|||
end
|
||||
|
||||
# Finds the list, and downloads all of the songs using the `Song` class
|
||||
def grab_it(ask_url : Bool = false)
|
||||
def grab_it(flags = {} of String => String)
|
||||
ask_url = flags["url"]?
|
||||
ask_skip = flags["ask_skip"]?
|
||||
is_playlist = flags["playlist"]?
|
||||
|
||||
if !@spotify_searcher.authorized?
|
||||
raise("Need to call provide_client_keys on Album or Playlist class.")
|
||||
end
|
||||
|
@ -43,22 +47,28 @@ abstract class SpotifyList
|
|||
|
||||
i = 0
|
||||
contents.each do |datum|
|
||||
i += 1
|
||||
if datum["track"]?
|
||||
datum = datum["track"]
|
||||
end
|
||||
|
||||
data = organize_song_metadata(list, datum)
|
||||
|
||||
song = Song.new(data["name"].to_s, data["artists"][0]["name"].to_s)
|
||||
s_name = data["name"].to_s
|
||||
s_artist = data["artists"][0]["name"].to_s
|
||||
|
||||
song = Song.new(s_name, s_artist)
|
||||
song.provide_spotify(@spotify_searcher)
|
||||
song.provide_metadata(data)
|
||||
|
||||
puts Style.bold("[#{data["track_number"]}/#{contents.size}]")
|
||||
song.grab_it ask_url: ask_url
|
||||
puts Style.bold("[#{i}/#{contents.size}]")
|
||||
|
||||
unless ask_skip && skip?(s_name, s_artist, is_playlist)
|
||||
song.grab_it(flags: flags)
|
||||
organize(song)
|
||||
|
||||
i += 1
|
||||
else
|
||||
puts "Skipping..."
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -67,6 +77,13 @@ abstract class SpotifyList
|
|||
@spotify_searcher.authorize(client_key, client_secret)
|
||||
end
|
||||
|
||||
private def skip?(name, artist, is_playlist)
|
||||
print "Skip #{Style.blue name}" +
|
||||
(is_playlist ? " (by #{Style.green artist})": "") + "? "
|
||||
response = gets
|
||||
return response && response.lstrip.downcase.starts_with? "y"
|
||||
end
|
||||
|
||||
private def outputter(key : String, index : Int32)
|
||||
text = @outputs[key][index]
|
||||
.gsub("%l", @list_name)
|
||||
|
|
|
@ -46,6 +46,7 @@ class TrackMapper
|
|||
type: Int32,
|
||||
setter: true
|
||||
},
|
||||
duration_ms: Int32,
|
||||
type: String,
|
||||
uri: String
|
||||
)
|
||||
|
|
|
@ -67,7 +67,8 @@ class Playlist < SpotifyList
|
|||
FileUtils.mkdir_p(strpath)
|
||||
end
|
||||
safe_filename = song.filename.gsub(/[\/]/, "").gsub(" ", " ")
|
||||
File.rename("./" + song.filename, (path / safe_filename).to_s)
|
||||
FileUtils.cp("./" + song.filename, (path / safe_filename).to_s)
|
||||
FileUtils.rm("./" + song.filename)
|
||||
else
|
||||
song.organize_it()
|
||||
end
|
||||
|
|
|
@ -57,7 +57,11 @@ class Song
|
|||
# ```
|
||||
# Song.new("Bohemian Rhapsody", "Queen").grab_it
|
||||
# ```
|
||||
def grab_it(url : (String | Nil) = nil, ask_url : Bool = false)
|
||||
def grab_it(url : (String | Nil) = nil, flags = {} of String => String)
|
||||
passed_url : (String | Nil) = flags["url"]?
|
||||
passed_file : (String | Nil) = flags["apply_file"]?
|
||||
select_link = flags["select"]?
|
||||
|
||||
outputter("intro", 0)
|
||||
|
||||
if !@spotify_searcher.authorized? && !@metadata
|
||||
|
@ -85,19 +89,29 @@ class Song
|
|||
end
|
||||
|
||||
data = @metadata.as(JSON::Any)
|
||||
@song_name = data["name"].as_s
|
||||
@artist_name = data["artists"][0]["name"].as_s
|
||||
@filename = "#{Pattern.parse(Config.filename_pattern, data)}.mp3"
|
||||
|
||||
if ask_url
|
||||
if passed_file
|
||||
puts Style.green(" +") + Style.dim(" Moving file: ") + passed_file
|
||||
File.rename(passed_file, @filename)
|
||||
else
|
||||
if passed_url
|
||||
if passed_url.strip != ""
|
||||
url = passed_url
|
||||
else
|
||||
outputter("url", 4)
|
||||
url = gets
|
||||
if !url.nil? && url.strip == ""
|
||||
url = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if !url
|
||||
outputter("url", 0)
|
||||
url = Youtube.find_url(@song_name, @artist_name, search_terms: "lyrics")
|
||||
url = Youtube.find_url(data, flags: flags)
|
||||
if !url
|
||||
raise("There was no url found on youtube for " +
|
||||
%("#{@song_name}" by "#{@artist_name}. ) +
|
||||
|
@ -106,8 +120,9 @@ class Song
|
|||
outputter("url", 1)
|
||||
else
|
||||
outputter("url", 2)
|
||||
if !Youtube.is_valid_url(url)
|
||||
raise("The url '#{url}' is an invalid youtube URL " +
|
||||
url = Youtube.validate_url(url)
|
||||
if !url
|
||||
raise("The url is an invalid youtube URL " +
|
||||
"Check the URL and try again")
|
||||
end
|
||||
outputter("url", 3)
|
||||
|
@ -116,32 +131,33 @@ class Song
|
|||
outputter("download", 0)
|
||||
Ripper.download_mp3(url.as(String), @filename)
|
||||
outputter("download", 1)
|
||||
end
|
||||
|
||||
outputter("albumart", 0)
|
||||
temp_albumart_filename = ".tempalbumart.jpg"
|
||||
HTTP::Client.get(data["album"]["images"][0]["url"].to_s) do |response|
|
||||
HTTP::Client.get(data["album"]["images"][0]["url"].as_s) do |response|
|
||||
File.write(temp_albumart_filename, response.body_io)
|
||||
end
|
||||
outputter("albumart", 0)
|
||||
|
||||
# check if song's metadata has been modded in playlist, update artist accordingly
|
||||
if data["artists"][-1]["owner"]?
|
||||
@artist = data["artists"][-1]["name"].to_s
|
||||
@artist = data["artists"][-1]["name"].as_s
|
||||
else
|
||||
@artist = data["artists"][0]["name"].to_s
|
||||
@artist = data["artists"][0]["name"].as_s
|
||||
end
|
||||
@album = data["album"]["name"].to_s
|
||||
@album = data["album"]["name"].as_s
|
||||
|
||||
tagger = Tags.new(@filename)
|
||||
tagger.add_album_art(temp_albumart_filename)
|
||||
tagger.add_text_tag("title", data["name"].to_s)
|
||||
tagger.add_text_tag("title", data["name"].as_s)
|
||||
tagger.add_text_tag("artist", @artist)
|
||||
|
||||
if !@album.empty?
|
||||
tagger.add_text_tag("album", @album)
|
||||
end
|
||||
|
||||
if genre = @spotify_searcher.find_genre(data["artists"][0]["id"].to_s)
|
||||
if genre = @spotify_searcher.find_genre(data["artists"][0]["id"].as_s)
|
||||
tagger.add_text_tag("genre", genre)
|
||||
end
|
||||
|
||||
|
|
144
src/search/ranking.cr
Normal file
144
src/search/ranking.cr
Normal file
|
@ -0,0 +1,144 @@
|
|||
alias VID_VALUE_CLASS = String
|
||||
alias VID_METADATA_CLASS = Hash(String, VID_VALUE_CLASS)
|
||||
alias YT_METADATA_CLASS = Array(VID_METADATA_CLASS)
|
||||
|
||||
module Ranker
|
||||
extend self
|
||||
|
||||
GARBAGE_PHRASES = [
|
||||
"cover", "album", "live", "clean", "version", "full", "full album", "row",
|
||||
"at", "@", "session", "how to", "npr music", "reimagined", "version",
|
||||
"trailer"
|
||||
]
|
||||
|
||||
GOLDEN_PHRASES = [
|
||||
"official video", "official music video",
|
||||
]
|
||||
|
||||
# Will rank videos according to their title and the user input, returns a sorted array of hashes
|
||||
# of the points a song was assigned and its original index
|
||||
# *spotify_metadata* is the metadate (from spotify) of the song that you want
|
||||
# *yt_metadata* is an array of hashes with metadata scraped from the youtube search result page
|
||||
# *query* is the query that you submitted to youtube for the results you now have
|
||||
# ```
|
||||
# Ranker.rank_videos(spotify_metadata, yt_metadata, query)
|
||||
# => [
|
||||
# {"points" => x, "index" => x},
|
||||
# ...
|
||||
# ]
|
||||
# ```
|
||||
# "index" corresponds to the original index of the song in yt_metadata
|
||||
def rank_videos(spotify_metadata : JSON::Any, yt_metadata : YT_METADATA_CLASS,
|
||||
query : String) : Array(Hash(String, Int32))
|
||||
points = [] of Hash(String, Int32)
|
||||
index = 0
|
||||
|
||||
actual_song_name = spotify_metadata["name"].as_s
|
||||
actual_artist_name = spotify_metadata["artists"][0]["name"].as_s
|
||||
|
||||
yt_metadata.each do |vid|
|
||||
pts = 0
|
||||
|
||||
pts += points_string_compare(actual_song_name, vid["title"])
|
||||
pts += points_string_compare(actual_artist_name, vid["title"])
|
||||
pts += count_buzzphrases(query, vid["title"])
|
||||
pts += compare_timestamps(spotify_metadata, vid)
|
||||
|
||||
points.push({
|
||||
"points" => pts,
|
||||
"index" => index,
|
||||
})
|
||||
index += 1
|
||||
end
|
||||
|
||||
# Sort first by points and then by original index of the song
|
||||
points.sort! { |a, b|
|
||||
if b["points"] == a["points"]
|
||||
a["index"] <=> b["index"]
|
||||
else
|
||||
b["points"] <=> a["points"]
|
||||
end
|
||||
}
|
||||
|
||||
return points
|
||||
end
|
||||
|
||||
# SINGULAR COMPONENT OF RANKING ALGORITHM
|
||||
private def compare_timestamps(spotify_metadata : JSON::Any, node : VID_METADATA_CLASS) : Int32
|
||||
# puts spotify_metadata.to_pretty_json()
|
||||
actual_time = spotify_metadata["duration_ms"].as_i
|
||||
vid_time = node["duration_ms"].to_i
|
||||
|
||||
difference = (actual_time - vid_time).abs
|
||||
|
||||
# puts "actual: #{actual_time}, vid: #{vid_time}"
|
||||
# puts "\tdiff: #{difference}"
|
||||
# puts "\ttitle: #{node["title"]}"
|
||||
|
||||
if difference <= 1000
|
||||
return 3
|
||||
elsif difference <= 2000
|
||||
return 2
|
||||
elsif difference <= 5000
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
end
|
||||
end
|
||||
|
||||
# SINGULAR COMPONENT OF RANKING ALGORITHM
|
||||
# Returns an `Int` based off the number of points worth assigning to the
|
||||
# matchiness of the string. First the strings are downcased and then all
|
||||
# nonalphanumeric characters are stripped.
|
||||
# If *item1* includes *item2*, return 3 pts.
|
||||
# If after the items have been blanked, *item1* includes *item2*,
|
||||
# return 1 pts.
|
||||
# Else, return 0 pts.
|
||||
private def points_string_compare(item1 : String, item2 : String) : Int32
|
||||
if item2.includes?(item1)
|
||||
return 3
|
||||
end
|
||||
|
||||
item1 = item1.downcase.gsub(/[^a-z0-9]/, "")
|
||||
item2 = item2.downcase.gsub(/[^a-z0-9]/, "")
|
||||
|
||||
if item2.includes?(item1)
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
end
|
||||
end
|
||||
|
||||
# SINGULAR COMPONENT OF RANKING ALGORITHM
|
||||
# Checks if there are any phrases in the title of the video that would
|
||||
# indicate audio having what we want.
|
||||
# *video_name* is the title of the video, and *query* is what the user the
|
||||
# program searched for. *query* is needed in order to make sure we're not
|
||||
# subtracting points from something that's naturally in the title
|
||||
private def count_buzzphrases(query : String, video_name : String) : Int32
|
||||
good_phrases = 0
|
||||
bad_phrases = 0
|
||||
|
||||
GOLDEN_PHRASES.each do |gold_phrase|
|
||||
gold_phrase = gold_phrase.downcase.gsub(/[^a-z0-9]/, "")
|
||||
|
||||
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
|
||||
next
|
||||
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
|
||||
good_phrases += 1
|
||||
end
|
||||
end
|
||||
|
||||
GARBAGE_PHRASES.each do |garbage_phrase|
|
||||
garbage_phrase = garbage_phrase.downcase.gsub(/[^a-z0-9]/, "")
|
||||
|
||||
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
|
||||
next
|
||||
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
|
||||
bad_phrases += 1
|
||||
end
|
||||
end
|
||||
|
||||
return good_phrases - bad_phrases
|
||||
end
|
||||
end
|
|
@ -60,9 +60,10 @@ class SpotifySearcher
|
|||
# ```
|
||||
def find_item(item_type : String, item_parameters : Hash, offset = 0,
|
||||
limit = 20) : JSON::Any?
|
||||
query = generate_query(item_type, item_parameters, offset, limit)
|
||||
query = generate_query(item_type, item_parameters)
|
||||
|
||||
url = @root_url.join("search?q=#{query}").to_s
|
||||
url = "search?q=#{query}&type=#{item_type}&limit=#{limit}&offset=#{offset}"
|
||||
url = @root_url.join(url).to_s
|
||||
|
||||
response = HTTP::Client.get(url, headers: @access_header)
|
||||
error_check(response)
|
||||
|
@ -228,8 +229,7 @@ class SpotifySearcher
|
|||
|
||||
# Generates url to run a GET request against to the Spotify open API
|
||||
# Returns a `String.`
|
||||
private def generate_query(item_type : String, item_parameters : Hash,
|
||||
offset : Int32, limit : Int32) : String
|
||||
private def generate_query(item_type : String, item_parameters : Hash) : String
|
||||
query = ""
|
||||
|
||||
# parameter keys to exclude in the api request. These values will be put
|
||||
|
@ -241,9 +241,9 @@ class SpotifySearcher
|
|||
if k == "name"
|
||||
# will remove the "name:<title>" param from the query
|
||||
if item_type == "playlist"
|
||||
query += item_parameters[k].gsub(" ", "+") + "+"
|
||||
query += item_parameters[k] + "+"
|
||||
else
|
||||
query += param_encode(item_type, item_parameters[k])
|
||||
query += as_field(item_type, item_parameters[k])
|
||||
end
|
||||
|
||||
# check if the key is to be excluded
|
||||
|
@ -254,14 +254,21 @@ class SpotifySearcher
|
|||
# NOTE: playlist names will be inserted into the query normally, without
|
||||
# a parameter.
|
||||
else
|
||||
query += param_encode(k, item_parameters[k])
|
||||
query += as_field(k, item_parameters[k])
|
||||
end
|
||||
end
|
||||
|
||||
# extra api info
|
||||
query += "&type=#{item_type}&limit=#{limit}&offset=#{offset}"
|
||||
return URI.encode(query.rchop("+"))
|
||||
end
|
||||
|
||||
return query
|
||||
# Returns a `String` encoded for the spotify api
|
||||
#
|
||||
# ```
|
||||
# query_encode("album", "A Night At The Opera")
|
||||
# => "album:A Night At The Opera+"
|
||||
# ```
|
||||
private def as_field(key, value) : String
|
||||
return "#{key}:#{value}+"
|
||||
end
|
||||
|
||||
# Ranks the given items based off of the info from parameters.
|
||||
|
@ -327,15 +334,6 @@ class SpotifySearcher
|
|||
end
|
||||
end
|
||||
|
||||
# Returns a `String` encoded for the spotify api
|
||||
#
|
||||
# ```
|
||||
# query_encode("album", "A Night At The Opera")
|
||||
# => "album:A+Night+At+The+Opera"
|
||||
# ```
|
||||
private def param_encode(key : String, value : String) : String
|
||||
return key.gsub(" ", "+") + ":" + value.gsub(" ", "+") + "+"
|
||||
end
|
||||
end
|
||||
|
||||
# puts SpotifySearcher.new()
|
||||
|
|
|
@ -3,6 +3,11 @@ require "xml"
|
|||
require "json"
|
||||
require "uri"
|
||||
|
||||
require "./ranking"
|
||||
|
||||
require "../bottle/config"
|
||||
require "../bottle/styles"
|
||||
|
||||
|
||||
module Youtube
|
||||
extend self
|
||||
|
@ -12,188 +17,95 @@ module Youtube
|
|||
"yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ",
|
||||
]
|
||||
|
||||
GARBAGE_PHRASES = [
|
||||
"cover", "album", "live", "clean", "version", "full", "full album", "row",
|
||||
"at", "@", "session", "how to", "npr music", "reimagined", "hr version",
|
||||
"trailer",
|
||||
]
|
||||
|
||||
GOLDEN_PHRASES = [
|
||||
"official video", "official music video",
|
||||
]
|
||||
|
||||
alias NODES_CLASS = Array(Hash(String, String))
|
||||
|
||||
# Checks if the given URL is a valid youtube URL
|
||||
#
|
||||
# ```
|
||||
# Youtube.is_valid_url("https://www.youtube.com/watch?v=NOTANACTUALVIDEOID")
|
||||
# => false
|
||||
# ```
|
||||
def is_valid_url(url : String) : Bool
|
||||
uri = URI.parse url
|
||||
|
||||
# is it a video on youtube, with a query
|
||||
query = uri.query
|
||||
if uri.host != "www.youtube.com" || uri.path != "/watch" || !query
|
||||
return false
|
||||
end
|
||||
|
||||
|
||||
queries = query.split('&')
|
||||
|
||||
# find the video ID
|
||||
i = 0
|
||||
while i < queries.size
|
||||
if queries[i].starts_with?("v=")
|
||||
vID = queries[i][2..-1]
|
||||
break
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
|
||||
if !vID
|
||||
return false
|
||||
end
|
||||
|
||||
|
||||
# this is an internal endpoint to validate the video ID
|
||||
response = HTTP::Client.get "https://www.youtube.com/get_video_info?video_id=#{vID}"
|
||||
|
||||
return response.body.includes?("status=ok")
|
||||
end
|
||||
# Note that VID_VALUE_CLASS, VID_METADATA_CLASS, and YT_METADATA_CLASS are found in ranking.cr
|
||||
|
||||
# Finds a youtube url based off of the given information.
|
||||
# The query to youtube is constructed like this:
|
||||
# "<song_name> <artist_name> <search terms>"
|
||||
# If *download_first* is provided, the first link found will be downloaded.
|
||||
# If *select_link* is provided, a menu of options will be shown for the user to choose their poison
|
||||
#
|
||||
# ```
|
||||
# Youtube.find_url("Bohemian Rhapsody", "Queen")
|
||||
# => "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
||||
# ```
|
||||
def find_url(song_name : String, artist_name : String, search_terms = "",
|
||||
download_first = false) : String?
|
||||
query = (song_name + " " + artist_name + " " + search_terms).strip.gsub(" ", "+")
|
||||
def find_url(spotify_metadata : JSON::Any,
|
||||
flags = {} of String => String) : String?
|
||||
|
||||
url = "https://www.youtube.com/results?search_query=" + query
|
||||
search_terms = Config.search_terms
|
||||
|
||||
response = HTTP::Client.get(url)
|
||||
select_link = flags["select"]?
|
||||
|
||||
valid_nodes = get_video_link_nodes(response.body)
|
||||
song_name = spotify_metadata["name"].as_s
|
||||
artist_name = spotify_metadata["artists"][0]["name"].as_s
|
||||
|
||||
if valid_nodes.size == 0
|
||||
puts "There were no results for that query."
|
||||
human_query = "#{song_name} #{artist_name} #{search_terms.strip}"
|
||||
params = HTTP::Params.encode({"search_query" => human_query})
|
||||
|
||||
response = HTTP::Client.get("https://www.youtube.com/results?#{params}")
|
||||
|
||||
yt_metadata = get_yt_search_metadata(response.body)
|
||||
|
||||
if yt_metadata.size == 0
|
||||
puts "There were no results for this query on youtube: \"#{human_query}\""
|
||||
return nil
|
||||
end
|
||||
|
||||
root = "https://youtube.com"
|
||||
ranked = Ranker.rank_videos(spotify_metadata, yt_metadata, human_query)
|
||||
|
||||
return root + valid_nodes[0]["href"] if download_first
|
||||
|
||||
ranked = rank_videos(song_name, artist_name, query, valid_nodes)
|
||||
if select_link
|
||||
return root + select_link_menu(spotify_metadata, yt_metadata)
|
||||
end
|
||||
|
||||
begin
|
||||
return root + valid_nodes[ranked[0]["index"]]["href"]
|
||||
puts Style.dim(" Video: ") + yt_metadata[ranked[0]["index"]]["title"]
|
||||
return root + yt_metadata[ranked[0]["index"]]["href"]
|
||||
rescue IndexError
|
||||
return nil
|
||||
end
|
||||
|
||||
exit 1
|
||||
end
|
||||
|
||||
# Will rank videos according to their title and the user input
|
||||
# Return:
|
||||
# [
|
||||
# {"points" => x, "index" => x},
|
||||
# ...
|
||||
# ]
|
||||
private def rank_videos(song_name : String, artist_name : String,
|
||||
query : String, nodes : Array(Hash(String, String))) : Array(Hash(String, Int32))
|
||||
points = [] of Hash(String, Int32)
|
||||
index = 0
|
||||
|
||||
nodes.each do |node|
|
||||
pts = 0
|
||||
|
||||
pts += points_compare(song_name, node["title"])
|
||||
pts += points_compare(artist_name, node["title"])
|
||||
pts += count_buzzphrases(query, node["title"])
|
||||
|
||||
points.push({
|
||||
"points" => pts,
|
||||
"index" => index,
|
||||
})
|
||||
# Presents a menu with song info for the user to choose which url they want to download
|
||||
private def select_link_menu(spotify_metadata : JSON::Any,
|
||||
yt_metadata : YT_METADATA_CLASS) : String
|
||||
puts Style.dim(" Spotify info: ") +
|
||||
Style.bold("\"" + spotify_metadata["name"].to_s) + "\" by \"" +
|
||||
Style.bold(spotify_metadata["artists"][0]["name"].to_s + "\"") +
|
||||
" @ " + Style.blue((spotify_metadata["duration_ms"].as_i / 1000).to_i.to_s) + "s"
|
||||
puts " Choose video to download:"
|
||||
index = 1
|
||||
yt_metadata.each do |vid|
|
||||
print " " + Style.bold(index.to_s + " ")
|
||||
puts "\"" + vid["title"] + "\" @ " + Style.blue((vid["duration_ms"].to_i / 1000).to_i.to_s) + "s"
|
||||
index += 1
|
||||
end
|
||||
|
||||
# Sort first by points and then by original index of the song
|
||||
points.sort! { |a, b|
|
||||
if b["points"] == a["points"]
|
||||
a["index"] <=> b["index"]
|
||||
else
|
||||
b["points"] <=> a["points"]
|
||||
end
|
||||
}
|
||||
|
||||
return points
|
||||
end
|
||||
|
||||
# Returns an `Int` based off the number of points worth assigning to the
|
||||
# matchiness of the string. First the strings are downcased and then all
|
||||
# nonalphanumeric characters are stripped.
|
||||
# If *item1* includes *item2*, return 3 pts.
|
||||
# If after the items have been blanked, *item1* includes *item2*,
|
||||
# return 1 pts.
|
||||
# Else, return 0 pts.
|
||||
private def points_compare(item1 : String, item2 : String) : Int32
|
||||
if item2.includes?(item1)
|
||||
return 3
|
||||
end
|
||||
|
||||
item1 = item1.downcase.gsub(/[^a-z0-9]/, "")
|
||||
item2 = item2.downcase.gsub(/[^a-z0-9]/, "")
|
||||
|
||||
if item2.includes?(item1)
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
if index > 5
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
# Checks if there are any phrases in the title of the video that would
|
||||
# indicate audio having what we want.
|
||||
# *video_name* is the title of the video, and *query* is what the user the
|
||||
# program searched for. *query* is needed in order to make sure we're not
|
||||
# subtracting points from something that's naturally in the title
|
||||
private def count_buzzphrases(query : String, video_name : String) : Int32
|
||||
good_phrases = 0
|
||||
bad_phrases = 0
|
||||
|
||||
GOLDEN_PHRASES.each do |gold_phrase|
|
||||
gold_phrase = gold_phrase.downcase.gsub(/[^a-z0-9]/, "")
|
||||
|
||||
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
|
||||
next
|
||||
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
|
||||
good_phrases += 1
|
||||
input = 0
|
||||
while true # not between 1 and 5
|
||||
begin
|
||||
print Style.bold(" > ")
|
||||
input = gets.not_nil!.chomp.to_i
|
||||
if input < 6 && input > 0
|
||||
break
|
||||
end
|
||||
rescue
|
||||
puts Style.red(" Invalid input, try again.")
|
||||
end
|
||||
end
|
||||
|
||||
GARBAGE_PHRASES.each do |garbage_phrase|
|
||||
garbage_phrase = garbage_phrase.downcase.gsub(/[^a-z0-9]/, "")
|
||||
return yt_metadata[input-1]["href"]
|
||||
|
||||
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
|
||||
next
|
||||
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
|
||||
bad_phrases += 1
|
||||
end
|
||||
end
|
||||
|
||||
return good_phrases - bad_phrases
|
||||
end
|
||||
|
||||
# Finds valid video links from a `HTTP::Client.get` request
|
||||
# Returns an `Array` of `XML::Node`
|
||||
private def get_video_link_nodes(response_body : String) : NODES_CLASS
|
||||
# Returns an `Array` of `NODES_CLASS` containing additional metadata from Youtube
|
||||
private def get_yt_search_metadata(response_body : String) : YT_METADATA_CLASS
|
||||
yt_initial_data : JSON::Any = JSON.parse("{}")
|
||||
|
||||
response_body.each_line do |line|
|
||||
|
@ -221,7 +133,7 @@ module Youtube
|
|||
# where the vid metadata lives
|
||||
yt_initial_data = yt_initial_data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"]["contents"]
|
||||
|
||||
video_metadata = [] of Hash(String, String)
|
||||
video_metadata = [] of VID_METADATA_CLASS
|
||||
|
||||
i = 0
|
||||
while true
|
||||
|
@ -229,10 +141,15 @@ module Youtube
|
|||
# video title
|
||||
raw_metadata = yt_initial_data[0]["itemSectionRenderer"]["contents"][i]["videoRenderer"]
|
||||
|
||||
metadata = {} of String => String
|
||||
metadata = {} of String => VID_VALUE_CLASS
|
||||
|
||||
metadata["title"] = raw_metadata["title"]["runs"][0]["text"].as_s
|
||||
metadata["href"] = raw_metadata["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
|
||||
timestamp = raw_metadata["lengthText"]["simpleText"].as_s
|
||||
metadata["timestamp"] = timestamp
|
||||
metadata["duration_ms"] = ((timestamp.split(":")[0].to_i * 60 +
|
||||
timestamp.split(":")[1].to_i) * 1000).to_s
|
||||
|
||||
|
||||
video_metadata.push(metadata)
|
||||
rescue IndexError
|
||||
|
@ -244,4 +161,40 @@ module Youtube
|
|||
|
||||
return video_metadata
|
||||
end
|
||||
|
||||
# Returns as a valid URL if possible
|
||||
#
|
||||
# ```
|
||||
# Youtube.validate_url("https://www.youtube.com/watch?v=NOTANACTUALVIDEOID")
|
||||
# => nil
|
||||
# ```
|
||||
def validate_url(url : String) : String | Nil
|
||||
uri = URI.parse url
|
||||
return nil if !uri
|
||||
|
||||
query = uri.query
|
||||
return nil if !query
|
||||
|
||||
# find the video ID
|
||||
vID = nil
|
||||
query.split('&').each do |q|
|
||||
if q.starts_with?("v=")
|
||||
vID = q[2..-1]
|
||||
end
|
||||
end
|
||||
return nil if !vID
|
||||
|
||||
url = "https://www.youtube.com/watch?v=#{vID}"
|
||||
|
||||
# this is an internal endpoint to validate the video ID
|
||||
params = HTTP::Params.encode({"format" => "json", "url" => url})
|
||||
response = HTTP::Client.get "https://www.youtube.com/oembed?#{params}"
|
||||
return nil unless response.success?
|
||||
|
||||
res_json = JSON.parse(response.body)
|
||||
title = res_json["title"].as_s
|
||||
puts Style.dim(" Video: ") + title
|
||||
|
||||
return url
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue