From 9d25197a5fa0ca6e67a7c0c0aea6930df6b0984f Mon Sep 17 00:00:00 2001
From: Luke Garrison <lgarriso@nd.edu>
Date: Wed, 9 Aug 2017 21:42:31 -0400
Subject: [PATCH] Improves accuracy of selected youtube video

Finds a balance between viewcount (Youtube's relevancy) and proper song
duration based on Spotify duration. Thus, a Youtube video that is 30
seconds longer than the Spotify track will not be considered

Takes the first result from Youtube (using Youtube's original ordering
based on relevance) that has a similar duration to the Spotify song

Fixes a bug where if there were no suitable videos for a song, the
program would infinitely loop. The program will now retry to find a song
up to 5 times before moving on (this is necessary because occasionally the
song isn't properly fetched or parsed from Youtube)

Fixes bug where songs that are retried were appended to the playlist
file without being separated by a newline
---
 core/misc.py |  3 ++-
 spotdl.py    | 73 ++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/core/misc.py b/core/misc.py
index 91b5fd4..dcf310c 100755
--- a/core/misc.py
+++ b/core/misc.py
@@ -103,9 +103,10 @@ def generate_search_url(song, viewsort=False):
     # urllib.request.quote() encodes URL with special characters
     song = quote(song)
     if viewsort:
-        url = u"https://www.youtube.com/results?q={0}&sp=CAMSAhABUBQ%253D".format(song)
+        url = u"https://www.youtube.com/results?q={0}".format(song)
     else:
         url = u"https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={0}".format(song)
+
     return url
 
 
diff --git a/spotdl.py b/spotdl.py
index 6b9512e..5092871 100755
--- a/spotdl.py
+++ b/spotdl.py
@@ -53,8 +53,12 @@ def generate_metadata(raw_song):
     return meta_tags
 
 
-def generate_youtube_url(raw_song):
+def generate_youtube_url(raw_song, tries_remaining=5):
     """Search for the song on YouTube and generate a URL to its video."""
+    # prevents an infinite loop but allows for a few retries
+    if tries_remaining == 0:
+        return
+
     meta_tags = generate_metadata(raw_song)
     if meta_tags is None:
         song = raw_song
@@ -69,19 +73,30 @@ def generate_youtube_url(raw_song):
 
     videos = []
     for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):
+        # ensure result is not a channel
+        if x.find('channel') is not None or 'yt-lockup-channel' in x.parent.attrs['class'] or 'yt-lockup-channel' in x.attrs['class']:
+            continue
+
+        # ensure result is not a mix/playlist
+        if 'yt-lockup-playlist' in x.parent.attrs['class']:
+            continue
+
         # confirm the video result is not an advertisement
-        if x.find('channel') is None and x.find('googleads') is None:
-            y = x.find('div', class_='yt-lockup-content')
-            link = y.find('a')['href']
-            title = y.find('a')['title']
-            try:
-                videotime = x.find('span', class_="video-time").get_text()
-            except AttributeError:
-                return generate_youtube_url(raw_song)
-            youtubedetails = {'link': link, 'title': title, 'videotime': videotime, 'seconds':misc.get_sec(videotime)}
-            videos.append(youtubedetails)
-            if meta_tags is None:
-                break
+        if x.find('googleads') is not None:
+            continue
+
+        y = x.find('div', class_='yt-lockup-content')
+        link = y.find('a')['href']
+        title = y.find('a')['title']
+        try:
+            videotime = x.find('span', class_="video-time").get_text()
+        except AttributeError:
+            return generate_youtube_url(raw_song, tries_remaining - 1)
+
+        youtubedetails = {'link': link, 'title': title, 'videotime': videotime, 'seconds':misc.get_sec(videotime)}
+        videos.append(youtubedetails)
+        if meta_tags is None:
+            break
 
     if not videos:
         return None
@@ -100,10 +115,32 @@ def generate_youtube_url(raw_song):
             return None
     else:
         if meta_tags is not None:
-            videos.sort(key=lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)))
-        result = videos[0];
+            # filter out videos that do not have a similar length to the Spotify song
+            duration_tolerance = 10
+            max_duration_tolerance = 20
+            possible_videos_by_duration = list()
+
+            '''
+            start with a reasonable duration_tolerance, and increment duration_tolerance
+            until one of the Youtube results falls within the correct duration or
+            the duration_tolerance has reached the max_duration_tolerance
+            '''
+            while len(possible_videos_by_duration) == 0:
+                possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
+                duration_tolerance += 1
+                if duration_tolerance > max_duration_tolerance:
+                    print(meta_tags['name'], 'by', meta_tags['artists'][0]['name'], 'was not found')
+                    return None
+
+            result = possible_videos_by_duration[0]
+        else:
+            # if the metadata could not be acquired, take the first result from Youtube because the proper song length is unknown
+            result = videos[0]
+
+    full_link = None
+    if result:
+        full_link = u'youtube.com{0}'.format(result['link'])
 
-    full_link = u'youtube.com{0}'.format(result['link'])
     return full_link
 
 
@@ -214,6 +251,7 @@ def check_exists(music_file, raw_song, islist=True):
             # do not prompt and skip the current song
             # if already downloaded when using list
             if islist:
+                print('Song already exists')
                 return True
             # if downloading only single song, prompt to re-download
             else:
@@ -257,7 +295,7 @@ def grab_list(text_file):
             misc.trim_song(text_file)
             # and append it to the last line in .txt
             with open(text_file, 'a') as myfile:
-                myfile.write(raw_song)
+                myfile.write(raw_song + '\n')
             print('Failed to download song. Will retry after other songs.')
             continue
         except KeyboardInterrupt:
@@ -303,6 +341,7 @@ def grab_single(raw_song, number=None):
     content = go_pafy(raw_song)
     if content is None:
         return
+
     # print '[number]. [artist] - [song]' if downloading from list
     # otherwise print '[artist] - [song]'
     print(get_youtube_title(content, number))