Merge pull request #121 from lkgarrison/master

Improves accuracy of selected youtube video
2026-07-04 17:01:59 +00:00 · 2017-08-26 17:28:38 +02:00
parent ddd57eda88 9d25197a5f
commit 6b3dafa99b
2 changed files with 58 additions and 18 deletions
--- a/core/misc.py
+++ b/core/misc.py
@@ -103,9 +103,10 @@ def generate_search_url(song, viewsort=False):
    # urllib.request.quote() encodes URL with special characters
    song = quote(song)
    if viewsort:
-        url = u"https://www.youtube.com/results?q={0}&sp=CAMSAhABUBQ%253D".format(song)
+        url = u"https://www.youtube.com/results?q={0}".format(song)
    else:
        url = u"https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={0}".format(song)
+
    return url


--- a/spotdl.py
+++ b/spotdl.py
@@ -53,8 +53,12 @@ def generate_metadata(raw_song):
    return meta_tags


-def generate_youtube_url(raw_song):
+def generate_youtube_url(raw_song, tries_remaining=5):
    """Search for the song on YouTube and generate a URL to its video."""
+    # prevents an infinite loop but allows for a few retries
+    if tries_remaining == 0:
+        return
+
    meta_tags = generate_metadata(raw_song)
    if meta_tags is None:
        song = raw_song
@@ -69,15 +73,26 @@ def generate_youtube_url(raw_song):

    videos = []
    for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):
+        # ensure result is not a channel
+        if x.find('channel') is not None or 'yt-lockup-channel' in x.parent.attrs['class'] or 'yt-lockup-channel' in x.attrs['class']:
+            continue
+
+        # ensure result is not a mix/playlist
+        if 'yt-lockup-playlist' in x.parent.attrs['class']:
+            continue
+
        # confirm the video result is not an advertisement
-        if x.find('channel') is None and x.find('googleads') is None:
+        if x.find('googleads') is not None:
+            continue
+
        y = x.find('div', class_='yt-lockup-content')
        link = y.find('a')['href']
        title = y.find('a')['title']
        try:
            videotime = x.find('span', class_="video-time").get_text()
        except AttributeError:
-                return generate_youtube_url(raw_song)
+            return generate_youtube_url(raw_song, tries_remaining - 1)
+
        youtubedetails = {'link': link, 'title': title, 'videotime': videotime, 'seconds':misc.get_sec(videotime)}
        videos.append(youtubedetails)
        if meta_tags is None:
@@ -100,10 +115,32 @@ def generate_youtube_url(raw_song):
            return None
    else:
        if meta_tags is not None:
-            videos.sort(key=lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)))
-        result = videos[0];
+            # filter out videos that do not have a similar length to the Spotify song
+            duration_tolerance = 10
+            max_duration_tolerance = 20
+            possible_videos_by_duration = list()

+            '''
+            start with a reasonable duration_tolerance, and increment duration_tolerance
+            until one of the Youtube results falls within the correct duration or
+            the duration_tolerance has reached the max_duration_tolerance
+            '''
+            while len(possible_videos_by_duration) == 0:
+                possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
+                duration_tolerance += 1
+                if duration_tolerance > max_duration_tolerance:
+                    print(meta_tags['name'], 'by', meta_tags['artists'][0]['name'], 'was not found')
+                    return None
+
+            result = possible_videos_by_duration[0]
+        else:
+            # if the metadata could not be acquired, take the first result from Youtube because the proper song length is unknown
+            result = videos[0]
+
+    full_link = None
+    if result:
        full_link = u'youtube.com{0}'.format(result['link'])
+
    return full_link


@@ -214,6 +251,7 @@ def check_exists(music_file, raw_song, islist=True):
            # do not prompt and skip the current song
            # if already downloaded when using list
            if islist:
+                print('Song already exists')
                return True
            # if downloading only single song, prompt to re-download
            else:
@@ -257,7 +295,7 @@ def grab_list(text_file):
            misc.trim_song(text_file)
            # and append it to the last line in .txt
            with open(text_file, 'a') as myfile:
-                myfile.write(raw_song)
+                myfile.write(raw_song + '\n')
            print('Failed to download song. Will retry after other songs.')
            continue
        except KeyboardInterrupt:
@@ -303,6 +341,7 @@ def grab_single(raw_song, number=None):
    content = go_pafy(raw_song)
    if content is None:
        return
+
    # print '[number]. [artist] - [song]' if downloading from list
    # otherwise print '[artist] - [song]'
    print(get_youtube_title(content, number))