mirror of
https://github.com/cooperhammond/irs.git
synced 2025-02-27 23:16:46 +00:00
fixed youtube search ranking algo
This commit is contained in:
parent
ba7400819b
commit
efff04947c
|
@ -17,6 +17,8 @@ module Ripper
|
||||||
# remove the extension that will be added on by ydl
|
# remove the extension that will be added on by ydl
|
||||||
output_filename = output_filename.split(".")[..-2].join(".")
|
output_filename = output_filename.split(".")[..-2].join(".")
|
||||||
|
|
||||||
|
# TODO: update the logger for this. Explore overwriting stdout and
|
||||||
|
# injecting/removing text
|
||||||
options = {
|
options = {
|
||||||
"--output" => %("#{output_filename}.%(ext)s"), # auto-add correct ext
|
"--output" => %("#{output_filename}.%(ext)s"), # auto-add correct ext
|
||||||
# "--quiet" => "",
|
# "--quiet" => "",
|
||||||
|
|
|
@ -11,6 +11,16 @@ module Youtube
|
||||||
"yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link "
|
"yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link "
|
||||||
]
|
]
|
||||||
|
|
||||||
|
GARBAGE_PHRASES = [
|
||||||
|
"cover", "album", "live", "clean", "version", "full", "full album", "row",
|
||||||
|
"at", "@", "session", "how to", "npr music", "reimagined", "hr version",
|
||||||
|
"trailer"
|
||||||
|
]
|
||||||
|
|
||||||
|
GOLDEN_PHRASES = [
|
||||||
|
"official video", "official music video"
|
||||||
|
]
|
||||||
|
|
||||||
# Finds a youtube url based off of the given information.
|
# Finds a youtube url based off of the given information.
|
||||||
# The query to youtube is constructed like this:
|
# The query to youtube is constructed like this:
|
||||||
# "<song_name> <artist_name> <search terms>"
|
# "<song_name> <artist_name> <search terms>"
|
||||||
|
@ -38,16 +48,16 @@ module Youtube
|
||||||
|
|
||||||
return root + valid_nodes[0]["href"] if download_first
|
return root + valid_nodes[0]["href"] if download_first
|
||||||
|
|
||||||
ranked = __rank_videos(song_name, artist_name, valid_nodes)
|
ranked = __rank_videos(song_name, artist_name, query, valid_nodes)
|
||||||
|
|
||||||
return root + valid_nodes[ranked[0][1]]["href"]
|
return root + valid_nodes[ranked[0]["index"]]["href"]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Will rank videos according to their title and the user input
|
# Will rank videos according to their title and the user input
|
||||||
# Returns an `Array` of Arrays each layed out like
|
# Returns an `Array` of Arrays each layed out like
|
||||||
# [<points>, <original index>].
|
# [<points>, <original index>].
|
||||||
private def __rank_videos(song_name, artist_name, nodes : Array(XML::Node))
|
private def __rank_videos(song_name : String, artist_name : String, query : String, nodes : Array(XML::Node))
|
||||||
points = [] of Array(Int32)
|
points = [] of Hash(String, Int32)
|
||||||
index = 0
|
index = 0
|
||||||
|
|
||||||
nodes.each do |node|
|
nodes.each do |node|
|
||||||
|
@ -55,12 +65,24 @@ module Youtube
|
||||||
|
|
||||||
pts += __points_compare(song_name, node["title"])
|
pts += __points_compare(song_name, node["title"])
|
||||||
pts += __points_compare(artist_name, node["title"])
|
pts += __points_compare(artist_name, node["title"])
|
||||||
|
pts += __count_buzzphrases(query, node["title"])
|
||||||
|
|
||||||
points.push([pts, index])
|
points.push({
|
||||||
|
"points" => pts,
|
||||||
|
"index" => index
|
||||||
|
})
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
points.sort!{ |a, b| b[0] <=> a[0] }
|
# Sort first by points and then by original index of the song
|
||||||
|
points.sort!{ |a, b|
|
||||||
|
if b["points"] == a["points"]
|
||||||
|
a["index"] <=> b["index"]
|
||||||
|
else
|
||||||
|
b["points"] <=> a["points"]
|
||||||
|
end
|
||||||
|
}
|
||||||
|
|
||||||
return points
|
return points
|
||||||
end
|
end
|
||||||
|
@ -73,20 +95,52 @@ module Youtube
|
||||||
# return 1 pts.
|
# return 1 pts.
|
||||||
# Else, return 0 pts.
|
# Else, return 0 pts.
|
||||||
private def __points_compare(item1 : String, item2 : String)
|
private def __points_compare(item1 : String, item2 : String)
|
||||||
if item1.includes?(item2)
|
if item2.includes?(item1)
|
||||||
return 3
|
return 3
|
||||||
end
|
end
|
||||||
|
|
||||||
item1 = item1.downcase.gsub(/[^a-z0-9]/, "")
|
item1 = item1.downcase.gsub(/[^a-z0-9]/, "")
|
||||||
item2 = item2.downcase.gsub(/[^a-z0-9]/, "")
|
item2 = item2.downcase.gsub(/[^a-z0-9]/, "")
|
||||||
|
|
||||||
if item1.includes?(item2)
|
if item2.includes?(item1)
|
||||||
return 1
|
return 1
|
||||||
else
|
else
|
||||||
return 0
|
return 0
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Checks if there are any phrases in the title of the video that would
|
||||||
|
# indicate audio having what we want.
|
||||||
|
# *video_name* is the title of the video, and *query* is what the user the
|
||||||
|
# program searched for. *query* is needed in order to make sure we're not
|
||||||
|
# subtracting points from something that's naturally in the title
|
||||||
|
private def __count_buzzphrases(query : String, video_name : String)
|
||||||
|
good_phrases = 0
|
||||||
|
bad_phrases = 0
|
||||||
|
|
||||||
|
GOLDEN_PHRASES.each do |gold_phrase|
|
||||||
|
gold_phrase = gold_phrase.downcase.gsub(/[^a-z0-9]/, "")
|
||||||
|
|
||||||
|
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
|
||||||
|
next
|
||||||
|
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(gold_phrase)
|
||||||
|
bad_phrases += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
GARBAGE_PHRASES.each do |garbage_phrase|
|
||||||
|
garbage_phrase = garbage_phrase.downcase.gsub(/[^a-z0-9]/, "")
|
||||||
|
|
||||||
|
if query.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
|
||||||
|
next
|
||||||
|
elsif video_name.downcase.gsub(/[^a-z0-9]/, "").includes?(garbage_phrase)
|
||||||
|
bad_phrases += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return good_phrases - bad_phrases
|
||||||
|
end
|
||||||
|
|
||||||
# Finds valid video links from a `HTTP::Client.get` request
|
# Finds valid video links from a `HTTP::Client.get` request
|
||||||
# Returns an `Array` of `XML::Node`
|
# Returns an `Array` of `XML::Node`
|
||||||
private def __get_video_link_nodes(doc : String)
|
private def __get_video_link_nodes(doc : String)
|
||||||
|
|
Loading…
Reference in a new issue