From e0362b6e8c130a527d6d956ba689eef5a335e3d8 Mon Sep 17 00:00:00 2001
From: Ritiek Malhotra <ritiekmalhotra123@gmail.com>
Date: Fri, 8 May 2020 22:14:51 +0530
Subject: [PATCH] Retry a few times to workaround random YouTube issues

---
 spotdl/command_line/lib.py           | 37 +++++++++++++-----------
 spotdl/metadata/providers/youtube.py | 42 ++++++++++++++++++++++++----
 spotdl/metadata_search.py            |  9 +++---
 spotdl/util.py                       | 11 ++++++++
 4 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/spotdl/command_line/lib.py b/spotdl/command_line/lib.py
index bef4004..9561f45 100644
--- a/spotdl/command_line/lib.py
+++ b/spotdl/command_line/lib.py
@@ -111,7 +111,7 @@ class Spotdl:
         )
         # Remove duplicates and empty elements
         # Also strip whitespaces from elements (if any)
-        spotdl.util.remove_duplicates(
+        tracks = spotdl.util.remove_duplicates(
             tracks,
             condition=lambda x: x,
             operation=str.strip
@@ -158,12 +158,15 @@ class Spotdl:
         logger.info('Downloading "{}"'.format(track))
         search_metadata = MetadataSearch(
             track,
-            lyrics=True,
+            lyrics=not self.arguments["no_metadata"],
             yt_search_format=self.arguments["search_format"],
             yt_manual=self.arguments["manual"]
         )
         try:
-            metadata = search_metadata.on_youtube_and_spotify()
+            if self.arguments["no_metadata"]:
+                metadata = search_metadata.on_youtube()
+            else:
+                metadata = search_metadata.on_youtube_and_spotify()
         except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
             logger.error(e.args[0])
         else:
@@ -219,6 +222,9 @@ class Spotdl:
             logger.debug("Skip track download.")
             return
 
+        if not self.arguments["no_metadata"]:
+            metadata["lyrics"].start()
+
         logger.info('Downloading to "{filename}"'.format(filename=filename))
         if Encoder is None:
             track.download(stream, filename)
@@ -231,10 +237,10 @@ class Spotdl:
             )
 
         if not self.arguments["no_metadata"]:
+            track.metadata["lyrics"] = track.metadata["lyrics"].join()
             self.apply_metadata(track, filename, output_extension)
 
     def apply_metadata(self, track, filename, encoding):
-        track.metadata["lyrics"] = track.metadata["lyrics"].join()
         logger.info("Applying metadata")
         try:
             track.apply_metadata(filename, encoding=encoding)
@@ -245,24 +251,22 @@ class Spotdl:
         logger.info(
             "Checking and removing any duplicate tracks in {}.".format(path)
         )
-        with open(path, "r") as fin:
-            tracks = fin.read().splitlines()
-
+        tracks = spotdl.util.readlines_from_nonbinary_file(path)
         # Remove duplicates and empty elements
         # Also strip whitespaces from elements (if any)
-        spotdl.util.remove_duplicates(
+        tracks = spotdl.util.remove_duplicates(
             tracks,
             condition=lambda x: x,
             operation=str.strip
         )
-
         # Overwrite file
-        with open(path, "w") as fout:
-            fout.writelines(tracks)
+        spotdl.util.writelines_to_nonbinary_file(path, tracks)
 
-        print("", file=sys.stderr)
+        logger.info(
+            "Downloading {n} tracks.\n".format(n=len(tracks))
+        )
 
-        for number, track in enumerate(tracks, 1):
+        for position, track in enumerate(tracks, 1):
             search_metadata = MetadataSearch(
                 track,
                 lyrics=True,
@@ -271,7 +275,7 @@ class Spotdl:
             )
             try:
                 log_track_query = '{position}. Downloading "{track}"'.format(
-                    position=number,
+                    position=position,
                     track=track
                 )
                 logger.info(log_track_query)
@@ -285,14 +289,13 @@ class Spotdl:
                 )
                 tracks.append(track)
             except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
-                logger.error(e.args[0])
+                logger.error("{err}".format(err=e.args[0]))
             else:
                 if self.arguments["write_successful"]:
                     with open(self.arguments["write_successful"], "a") as fout:
                         fout.write(track)
             finally:
-                with open(path, "w") as fout:
-                    fout.writelines(tracks[number-1:])
+                spotdl.util.writelines_to_nonbinary_file(path, tracks[position:])
                 print("", file=sys.stderr)
 
     """
diff --git a/spotdl/metadata/providers/youtube.py b/spotdl/metadata/providers/youtube.py
index aa58c8b..6deeb2d 100644
--- a/spotdl/metadata/providers/youtube.py
+++ b/spotdl/metadata/providers/youtube.py
@@ -50,7 +50,7 @@ class YouTubeSearch:
         quoted_query = urllib.request.quote(query)
         return self.base_search_url.format(quoted_query)
 
-    def _fetch_response_html(self, url):
+    def _fetch_response_html(self, url, retries=5):
         response = urllib.request.urlopen(url)
         soup = BeautifulSoup(response.read(), "html.parser")
         return soup
@@ -102,12 +102,26 @@ class YouTubeSearch:
         video = not not_video
         return video
 
-    def search(self, query, limit=5):
+    def _is_server_side_invalid_response(self, videos, html):
+        if videos:
+            return False
+        search_message = html.find("div", {"class":"search-message"})
+        return search_message is None
+
+    def search(self, query, limit=10, retries=5):
         """ Search and scrape YouTube to return a list of matching videos. """
         search_url = self.generate_search_url(query)
         logger.debug('Fetching YouTube results for "{}".'.format(search_url))
         html = self._fetch_response_html(search_url)
-        videos = self._fetch_search_results(html)
+        videos = self._fetch_search_results(html, limit=limit)
+        to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html)
+        if to_retry:
+            retries -= 1
+            logger.debug(
+                "Retrying since YouTube returned invalid response for search "
+                "results. Retries left: {retries}.".format(retries=retries)
+            )
+            return self.search(query, limit=limit, retries=retries)
         return YouTubeVideos(videos)
 
 
@@ -202,10 +216,26 @@ class ProviderYouTube(ProviderBase):
             )
         return self.from_url(watch_urls[0])
 
-    def from_url(self, url):
+    def from_url(self, url, retries=5):
         logger.debug('Fetching YouTube metadata for "{url}".'.format(url=url))
-        content = pytube.YouTube(url)
-        return self.from_pytube_object(content)
+        try:
+            content = pytube.YouTube(url)
+        except KeyError:
+            # Sometimes YouTube can return unexpected response, in such a case
+            # retry a few times before finally failing.
+            if retries > 0:
+                retries -= 1
+                logger.debug(
+                    "YouTube returned an unexpected response for "
+                    "`pytube.YouTube({url})`. Retries left: {retries}".format(
+                        url=url, retries=retries
+                    )
+                )
+                return self.from_url(url, retries=retries)
+            else:
+                raise
+        else:
+            return self.from_pytube_object(content)
 
     def from_pytube_object(self, content):
         return self.metadata_to_standard_form(content)
diff --git a/spotdl/metadata_search.py b/spotdl/metadata_search.py
index a5d17ec..89762d3 100644
--- a/spotdl/metadata_search.py
+++ b/spotdl/metadata_search.py
@@ -23,7 +23,11 @@ PROVIDERS = {
 def prompt_for_youtube_search_result(videos):
     max_index_length = len(str(len(videos)))
     max_title_length = max(len(v["title"]) for v in videos)
-    print(" 0. Skip downloading this track", file=sys.stderr)
+    msg = "{index:>{max_index}}. Skip downloading this track".format(
+        index=0,
+        max_index=max_index_length,
+    )
+    print(msg, file=sys.stderr)
     for index, video in enumerate(videos, 1):
         vid_details = "{index:>{max_index}}. {title:<{max_title}}\n{new_line_gap}  {url} [{duration}]".format(
             index=index,
@@ -92,7 +96,6 @@ class MetadataSearch:
             target=self.get_lyrics,
             args=(lyric_query,),
         )
-        metadata["lyrics"].start()
 
         return metadata
 
@@ -113,7 +116,6 @@ class MetadataSearch:
             target=self.get_lyrics,
             arguments=(lyric_query,),
         )
-        metadata["lyrics"].start()
 
         return metadata
 
@@ -134,7 +136,6 @@ class MetadataSearch:
             target=self.get_lyrics,
             arguments=(lyric_query,),
         )
-        metadata["lyrics"].start()
 
         return metadata
 
diff --git a/spotdl/util.py b/spotdl/util.py
index 008abe7..53a2a24 100644
--- a/spotdl/util.py
+++ b/spotdl/util.py
@@ -212,3 +212,14 @@ def content_available(url):
 def titlecase(string):
     return " ".join(word.capitalize() for word in string.split())
 
+
+def readlines_from_nonbinary_file(path):
+    with open(path, "r") as fin:
+        lines = fin.read().splitlines()
+    return lines
+
+
+def writelines_to_nonbinary_file(path, lines):
+    with open(path, "w") as fout:
+        fout.writelines(map(lambda x: x + "\n", lines))
+