Retry a few times to workaround random YouTube issues

2025-10-29 09:50:16 +00:00 · 2020-05-08 22:14:51 +05:30
parent 007fc0be67
commit e0362b6e8c
4 changed files with 72 additions and 27 deletions
--- a/spotdl/command_line/lib.py
+++ b/spotdl/command_line/lib.py
@@ -111,7 +111,7 @@ class Spotdl:
        )
        # Remove duplicates and empty elements
        # Also strip whitespaces from elements (if any)
-        spotdl.util.remove_duplicates(
+        tracks = spotdl.util.remove_duplicates(
            tracks,
            condition=lambda x: x,
            operation=str.strip
@@ -158,11 +158,14 @@ class Spotdl:
        logger.info('Downloading "{}"'.format(track))
        search_metadata = MetadataSearch(
            track,
-            lyrics=True,
+            lyrics=not self.arguments["no_metadata"],
            yt_search_format=self.arguments["search_format"],
            yt_manual=self.arguments["manual"]
        )
        try:
+            if self.arguments["no_metadata"]:
+                metadata = search_metadata.on_youtube()
+            else:
                metadata = search_metadata.on_youtube_and_spotify()
        except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
            logger.error(e.args[0])
@@ -219,6 +222,9 @@ class Spotdl:
            logger.debug("Skip track download.")
            return

+        if not self.arguments["no_metadata"]:
+            metadata["lyrics"].start()
+
        logger.info('Downloading to "{filename}"'.format(filename=filename))
        if Encoder is None:
            track.download(stream, filename)
@@ -231,10 +237,10 @@ class Spotdl:
            )

        if not self.arguments["no_metadata"]:
+            track.metadata["lyrics"] = track.metadata["lyrics"].join()
            self.apply_metadata(track, filename, output_extension)

    def apply_metadata(self, track, filename, encoding):
-        track.metadata["lyrics"] = track.metadata["lyrics"].join()
        logger.info("Applying metadata")
        try:
            track.apply_metadata(filename, encoding=encoding)
@@ -245,24 +251,22 @@ class Spotdl:
        logger.info(
            "Checking and removing any duplicate tracks in {}.".format(path)
        )
-        with open(path, "r") as fin:
-            tracks = fin.read().splitlines()
-
+        tracks = spotdl.util.readlines_from_nonbinary_file(path)
        # Remove duplicates and empty elements
        # Also strip whitespaces from elements (if any)
-        spotdl.util.remove_duplicates(
+        tracks = spotdl.util.remove_duplicates(
            tracks,
            condition=lambda x: x,
            operation=str.strip
        )
-
        # Overwrite file
-        with open(path, "w") as fout:
-            fout.writelines(tracks)
+        spotdl.util.writelines_to_nonbinary_file(path, tracks)

-        print("", file=sys.stderr)
+        logger.info(
+            "Downloading {n} tracks.\n".format(n=len(tracks))
+        )

-        for number, track in enumerate(tracks, 1):
+        for position, track in enumerate(tracks, 1):
            search_metadata = MetadataSearch(
                track,
                lyrics=True,
@@ -271,7 +275,7 @@ class Spotdl:
            )
            try:
                log_track_query = '{position}. Downloading "{track}"'.format(
-                    position=number,
+                    position=position,
                    track=track
                )
                logger.info(log_track_query)
@@ -285,14 +289,13 @@ class Spotdl:
                )
                tracks.append(track)
            except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
-                logger.error(e.args[0])
+                logger.error("{err}".format(err=e.args[0]))
            else:
                if self.arguments["write_successful"]:
                    with open(self.arguments["write_successful"], "a") as fout:
                        fout.write(track)
            finally:
-                with open(path, "w") as fout:
-                    fout.writelines(tracks[number-1:])
+                spotdl.util.writelines_to_nonbinary_file(path, tracks[position:])
                print("", file=sys.stderr)

    """
--- a/spotdl/metadata/providers/youtube.py
+++ b/spotdl/metadata/providers/youtube.py
@@ -50,7 +50,7 @@ class YouTubeSearch:
        quoted_query = urllib.request.quote(query)
        return self.base_search_url.format(quoted_query)

-    def _fetch_response_html(self, url):
+    def _fetch_response_html(self, url, retries=5):
        response = urllib.request.urlopen(url)
        soup = BeautifulSoup(response.read(), "html.parser")
        return soup
@@ -102,12 +102,26 @@ class YouTubeSearch:
        video = not not_video
        return video

-    def search(self, query, limit=5):
+    def _is_server_side_invalid_response(self, videos, html):
+        if videos:
+            return False
+        search_message = html.find("div", {"class":"search-message"})
+        return search_message is None
+
+    def search(self, query, limit=10, retries=5):
        """ Search and scrape YouTube to return a list of matching videos. """
        search_url = self.generate_search_url(query)
        logger.debug('Fetching YouTube results for "{}".'.format(search_url))
        html = self._fetch_response_html(search_url)
-        videos = self._fetch_search_results(html)
+        videos = self._fetch_search_results(html, limit=limit)
+        to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html)
+        if to_retry:
+            retries -= 1
+            logger.debug(
+                "Retrying since YouTube returned invalid response for search "
+                "results. Retries left: {retries}.".format(retries=retries)
+            )
+            return self.search(query, limit=limit, retries=retries)
        return YouTubeVideos(videos)


@@ -202,9 +216,25 @@ class ProviderYouTube(ProviderBase):
            )
        return self.from_url(watch_urls[0])

-    def from_url(self, url):
+    def from_url(self, url, retries=5):
        logger.debug('Fetching YouTube metadata for "{url}".'.format(url=url))
+        try:
            content = pytube.YouTube(url)
+        except KeyError:
+            # Sometimes YouTube can return unexpected response, in such a case
+            # retry a few times before finally failing.
+            if retries > 0:
+                retries -= 1
+                logger.debug(
+                    "YouTube returned an unexpected response for "
+                    "`pytube.YouTube({url})`. Retries left: {retries}".format(
+                        url=url, retries=retries
+                    )
+                )
+                return self.from_url(url, retries=retries)
+            else:
+                raise
+        else:
            return self.from_pytube_object(content)

    def from_pytube_object(self, content):
--- a/spotdl/metadata_search.py
+++ b/spotdl/metadata_search.py
@@ -23,7 +23,11 @@ PROVIDERS = {
 def prompt_for_youtube_search_result(videos):
    max_index_length = len(str(len(videos)))
    max_title_length = max(len(v["title"]) for v in videos)
-    print(" 0. Skip downloading this track", file=sys.stderr)
+    msg = "{index:>{max_index}}. Skip downloading this track".format(
+        index=0,
+        max_index=max_index_length,
+    )
+    print(msg, file=sys.stderr)
    for index, video in enumerate(videos, 1):
        vid_details = "{index:>{max_index}}. {title:<{max_title}}\n{new_line_gap}  {url} [{duration}]".format(
            index=index,
@@ -92,7 +96,6 @@ class MetadataSearch:
            target=self.get_lyrics,
            args=(lyric_query,),
        )
-        metadata["lyrics"].start()

        return metadata

@@ -113,7 +116,6 @@ class MetadataSearch:
            target=self.get_lyrics,
            arguments=(lyric_query,),
        )
-        metadata["lyrics"].start()

        return metadata

@@ -134,7 +136,6 @@ class MetadataSearch:
            target=self.get_lyrics,
            arguments=(lyric_query,),
        )
-        metadata["lyrics"].start()

        return metadata

--- a/spotdl/util.py
+++ b/spotdl/util.py
@@ -212,3 +212,14 @@ def content_available(url):
 def titlecase(string):
    return " ".join(word.capitalize() for word in string.split())

+
+def readlines_from_nonbinary_file(path):
+    with open(path, "r") as fin:
+        lines = fin.read().splitlines()
+    return lines
+
+
+def writelines_to_nonbinary_file(path, lines):
+    with open(path, "w") as fout:
+        fout.writelines(map(lambda x: x + "\n", lines))
+