From e0362b6e8c130a527d6d956ba689eef5a335e3d8 Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Fri, 8 May 2020 22:14:51 +0530 Subject: [PATCH] Retry a few times to workaround random YouTube issues --- spotdl/command_line/lib.py | 37 +++++++++++++----------- spotdl/metadata/providers/youtube.py | 42 ++++++++++++++++++++++++---- spotdl/metadata_search.py | 9 +++--- spotdl/util.py | 11 ++++++++ 4 files changed, 72 insertions(+), 27 deletions(-) diff --git a/spotdl/command_line/lib.py b/spotdl/command_line/lib.py index bef4004..9561f45 100644 --- a/spotdl/command_line/lib.py +++ b/spotdl/command_line/lib.py @@ -111,7 +111,7 @@ class Spotdl: ) # Remove duplicates and empty elements # Also strip whitespaces from elements (if any) - spotdl.util.remove_duplicates( + tracks = spotdl.util.remove_duplicates( tracks, condition=lambda x: x, operation=str.strip @@ -158,12 +158,15 @@ class Spotdl: logger.info('Downloading "{}"'.format(track)) search_metadata = MetadataSearch( track, - lyrics=True, + lyrics=not self.arguments["no_metadata"], yt_search_format=self.arguments["search_format"], yt_manual=self.arguments["manual"] ) try: - metadata = search_metadata.on_youtube_and_spotify() + if self.arguments["no_metadata"]: + metadata = search_metadata.on_youtube() + else: + metadata = search_metadata.on_youtube_and_spotify() except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e: logger.error(e.args[0]) else: @@ -219,6 +222,9 @@ class Spotdl: logger.debug("Skip track download.") return + if not self.arguments["no_metadata"]: + metadata["lyrics"].start() + logger.info('Downloading to "{filename}"'.format(filename=filename)) if Encoder is None: track.download(stream, filename) @@ -231,10 +237,10 @@ class Spotdl: ) if not self.arguments["no_metadata"]: + track.metadata["lyrics"] = track.metadata["lyrics"].join() self.apply_metadata(track, filename, output_extension) def apply_metadata(self, track, filename, encoding): - track.metadata["lyrics"] = track.metadata["lyrics"].join() logger.info("Applying metadata") try: track.apply_metadata(filename, encoding=encoding) @@ -245,24 +251,22 @@ class Spotdl: logger.info( "Checking and removing any duplicate tracks in {}.".format(path) ) - with open(path, "r") as fin: - tracks = fin.read().splitlines() - + tracks = spotdl.util.readlines_from_nonbinary_file(path) # Remove duplicates and empty elements # Also strip whitespaces from elements (if any) - spotdl.util.remove_duplicates( + tracks = spotdl.util.remove_duplicates( tracks, condition=lambda x: x, operation=str.strip ) - # Overwrite file - with open(path, "w") as fout: - fout.writelines(tracks) + spotdl.util.writelines_to_nonbinary_file(path, tracks) - print("", file=sys.stderr) + logger.info( + "Downloading {n} tracks.\n".format(n=len(tracks)) + ) - for number, track in enumerate(tracks, 1): + for position, track in enumerate(tracks, 1): search_metadata = MetadataSearch( track, lyrics=True, @@ -271,7 +275,7 @@ class Spotdl: ) try: log_track_query = '{position}. Downloading "{track}"'.format( - position=number, + position=position, track=track ) logger.info(log_track_query) @@ -285,14 +289,13 @@ class Spotdl: ) tracks.append(track) except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e: - logger.error(e.args[0]) + logger.error("{err}".format(err=e.args[0])) else: if self.arguments["write_successful"]: with open(self.arguments["write_successful"], "a") as fout: fout.write(track) finally: - with open(path, "w") as fout: - fout.writelines(tracks[number-1:]) + spotdl.util.writelines_to_nonbinary_file(path, tracks[position:]) print("", file=sys.stderr) """ diff --git a/spotdl/metadata/providers/youtube.py b/spotdl/metadata/providers/youtube.py index aa58c8b..6deeb2d 100644 --- a/spotdl/metadata/providers/youtube.py +++ b/spotdl/metadata/providers/youtube.py @@ -50,7 +50,7 @@ class YouTubeSearch: quoted_query = urllib.request.quote(query) return self.base_search_url.format(quoted_query) - def _fetch_response_html(self, url): + def _fetch_response_html(self, url, retries=5): response = urllib.request.urlopen(url) soup = BeautifulSoup(response.read(), "html.parser") return soup @@ -102,12 +102,26 @@ class YouTubeSearch: video = not not_video return video - def search(self, query, limit=5): + def _is_server_side_invalid_response(self, videos, html): + if videos: + return False + search_message = html.find("div", {"class":"search-message"}) + return search_message is None + + def search(self, query, limit=10, retries=5): """ Search and scrape YouTube to return a list of matching videos. """ search_url = self.generate_search_url(query) logger.debug('Fetching YouTube results for "{}".'.format(search_url)) html = self._fetch_response_html(search_url) - videos = self._fetch_search_results(html) + videos = self._fetch_search_results(html, limit=limit) + to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html) + if to_retry: + retries -= 1 + logger.debug( + "Retrying since YouTube returned invalid response for search " + "results. Retries left: {retries}.".format(retries=retries) + ) + return self.search(query, limit=limit, retries=retries) return YouTubeVideos(videos) @@ -202,10 +216,26 @@ class ProviderYouTube(ProviderBase): ) return self.from_url(watch_urls[0]) - def from_url(self, url): + def from_url(self, url, retries=5): logger.debug('Fetching YouTube metadata for "{url}".'.format(url=url)) - content = pytube.YouTube(url) - return self.from_pytube_object(content) + try: + content = pytube.YouTube(url) + except KeyError: + # Sometimes YouTube can return unexpected response, in such a case + # retry a few times before finally failing. + if retries > 0: + retries -= 1 + logger.debug( + "YouTube returned an unexpected response for " + "`pytube.YouTube({url})`. Retries left: {retries}".format( + url=url, retries=retries + ) + ) + return self.from_url(url, retries=retries) + else: + raise + else: + return self.from_pytube_object(content) def from_pytube_object(self, content): return self.metadata_to_standard_form(content) diff --git a/spotdl/metadata_search.py b/spotdl/metadata_search.py index a5d17ec..89762d3 100644 --- a/spotdl/metadata_search.py +++ b/spotdl/metadata_search.py @@ -23,7 +23,11 @@ PROVIDERS = { def prompt_for_youtube_search_result(videos): max_index_length = len(str(len(videos))) max_title_length = max(len(v["title"]) for v in videos) - print(" 0. Skip downloading this track", file=sys.stderr) + msg = "{index:>{max_index}}. Skip downloading this track".format( + index=0, + max_index=max_index_length, + ) + print(msg, file=sys.stderr) for index, video in enumerate(videos, 1): vid_details = "{index:>{max_index}}. {title:<{max_title}}\n{new_line_gap} {url} [{duration}]".format( index=index, @@ -92,7 +96,6 @@ class MetadataSearch: target=self.get_lyrics, args=(lyric_query,), ) - metadata["lyrics"].start() return metadata @@ -113,7 +116,6 @@ class MetadataSearch: target=self.get_lyrics, arguments=(lyric_query,), ) - metadata["lyrics"].start() return metadata @@ -134,7 +136,6 @@ class MetadataSearch: target=self.get_lyrics, arguments=(lyric_query,), ) - metadata["lyrics"].start() return metadata diff --git a/spotdl/util.py b/spotdl/util.py index 008abe7..53a2a24 100644 --- a/spotdl/util.py +++ b/spotdl/util.py @@ -212,3 +212,14 @@ def content_available(url): def titlecase(string): return " ".join(word.capitalize() for word in string.split()) + +def readlines_from_nonbinary_file(path): + with open(path, "r") as fin: + lines = fin.read().splitlines() + return lines + + +def writelines_to_nonbinary_file(path, lines): + with open(path, "w") as fout: + fout.writelines(map(lambda x: x + "\n", lines)) +