Retry a few times to workaround random YouTube issues

This commit is contained in:
Ritiek Malhotra
2020-05-08 22:14:51 +05:30
parent 007fc0be67
commit e0362b6e8c
4 changed files with 72 additions and 27 deletions

View File

@@ -111,7 +111,7 @@ class Spotdl:
) )
# Remove duplicates and empty elements # Remove duplicates and empty elements
# Also strip whitespaces from elements (if any) # Also strip whitespaces from elements (if any)
spotdl.util.remove_duplicates( tracks = spotdl.util.remove_duplicates(
tracks, tracks,
condition=lambda x: x, condition=lambda x: x,
operation=str.strip operation=str.strip
@@ -158,12 +158,15 @@ class Spotdl:
logger.info('Downloading "{}"'.format(track)) logger.info('Downloading "{}"'.format(track))
search_metadata = MetadataSearch( search_metadata = MetadataSearch(
track, track,
lyrics=True, lyrics=not self.arguments["no_metadata"],
yt_search_format=self.arguments["search_format"], yt_search_format=self.arguments["search_format"],
yt_manual=self.arguments["manual"] yt_manual=self.arguments["manual"]
) )
try: try:
metadata = search_metadata.on_youtube_and_spotify() if self.arguments["no_metadata"]:
metadata = search_metadata.on_youtube()
else:
metadata = search_metadata.on_youtube_and_spotify()
except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e: except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
logger.error(e.args[0]) logger.error(e.args[0])
else: else:
@@ -219,6 +222,9 @@ class Spotdl:
logger.debug("Skip track download.") logger.debug("Skip track download.")
return return
if not self.arguments["no_metadata"]:
metadata["lyrics"].start()
logger.info('Downloading to "{filename}"'.format(filename=filename)) logger.info('Downloading to "{filename}"'.format(filename=filename))
if Encoder is None: if Encoder is None:
track.download(stream, filename) track.download(stream, filename)
@@ -231,10 +237,10 @@ class Spotdl:
) )
if not self.arguments["no_metadata"]: if not self.arguments["no_metadata"]:
track.metadata["lyrics"] = track.metadata["lyrics"].join()
self.apply_metadata(track, filename, output_extension) self.apply_metadata(track, filename, output_extension)
def apply_metadata(self, track, filename, encoding): def apply_metadata(self, track, filename, encoding):
track.metadata["lyrics"] = track.metadata["lyrics"].join()
logger.info("Applying metadata") logger.info("Applying metadata")
try: try:
track.apply_metadata(filename, encoding=encoding) track.apply_metadata(filename, encoding=encoding)
@@ -245,24 +251,22 @@ class Spotdl:
logger.info( logger.info(
"Checking and removing any duplicate tracks in {}.".format(path) "Checking and removing any duplicate tracks in {}.".format(path)
) )
with open(path, "r") as fin: tracks = spotdl.util.readlines_from_nonbinary_file(path)
tracks = fin.read().splitlines()
# Remove duplicates and empty elements # Remove duplicates and empty elements
# Also strip whitespaces from elements (if any) # Also strip whitespaces from elements (if any)
spotdl.util.remove_duplicates( tracks = spotdl.util.remove_duplicates(
tracks, tracks,
condition=lambda x: x, condition=lambda x: x,
operation=str.strip operation=str.strip
) )
# Overwrite file # Overwrite file
with open(path, "w") as fout: spotdl.util.writelines_to_nonbinary_file(path, tracks)
fout.writelines(tracks)
print("", file=sys.stderr) logger.info(
"Downloading {n} tracks.\n".format(n=len(tracks))
)
for number, track in enumerate(tracks, 1): for position, track in enumerate(tracks, 1):
search_metadata = MetadataSearch( search_metadata = MetadataSearch(
track, track,
lyrics=True, lyrics=True,
@@ -271,7 +275,7 @@ class Spotdl:
) )
try: try:
log_track_query = '{position}. Downloading "{track}"'.format( log_track_query = '{position}. Downloading "{track}"'.format(
position=number, position=position,
track=track track=track
) )
logger.info(log_track_query) logger.info(log_track_query)
@@ -285,14 +289,13 @@ class Spotdl:
) )
tracks.append(track) tracks.append(track)
except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e: except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
logger.error(e.args[0]) logger.error("{err}".format(err=e.args[0]))
else: else:
if self.arguments["write_successful"]: if self.arguments["write_successful"]:
with open(self.arguments["write_successful"], "a") as fout: with open(self.arguments["write_successful"], "a") as fout:
fout.write(track) fout.write(track)
finally: finally:
with open(path, "w") as fout: spotdl.util.writelines_to_nonbinary_file(path, tracks[position:])
fout.writelines(tracks[number-1:])
print("", file=sys.stderr) print("", file=sys.stderr)
""" """

View File

@@ -50,7 +50,7 @@ class YouTubeSearch:
quoted_query = urllib.request.quote(query) quoted_query = urllib.request.quote(query)
return self.base_search_url.format(quoted_query) return self.base_search_url.format(quoted_query)
def _fetch_response_html(self, url): def _fetch_response_html(self, url, retries=5):
response = urllib.request.urlopen(url) response = urllib.request.urlopen(url)
soup = BeautifulSoup(response.read(), "html.parser") soup = BeautifulSoup(response.read(), "html.parser")
return soup return soup
@@ -102,12 +102,26 @@ class YouTubeSearch:
video = not not_video video = not not_video
return video return video
def search(self, query, limit=5): def _is_server_side_invalid_response(self, videos, html):
if videos:
return False
search_message = html.find("div", {"class":"search-message"})
return search_message is None
def search(self, query, limit=10, retries=5):
""" Search and scrape YouTube to return a list of matching videos. """ """ Search and scrape YouTube to return a list of matching videos. """
search_url = self.generate_search_url(query) search_url = self.generate_search_url(query)
logger.debug('Fetching YouTube results for "{}".'.format(search_url)) logger.debug('Fetching YouTube results for "{}".'.format(search_url))
html = self._fetch_response_html(search_url) html = self._fetch_response_html(search_url)
videos = self._fetch_search_results(html) videos = self._fetch_search_results(html, limit=limit)
to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html)
if to_retry:
retries -= 1
logger.debug(
"Retrying since YouTube returned invalid response for search "
"results. Retries left: {retries}.".format(retries=retries)
)
return self.search(query, limit=limit, retries=retries)
return YouTubeVideos(videos) return YouTubeVideos(videos)
@@ -202,10 +216,26 @@ class ProviderYouTube(ProviderBase):
) )
return self.from_url(watch_urls[0]) return self.from_url(watch_urls[0])
def from_url(self, url): def from_url(self, url, retries=5):
logger.debug('Fetching YouTube metadata for "{url}".'.format(url=url)) logger.debug('Fetching YouTube metadata for "{url}".'.format(url=url))
content = pytube.YouTube(url) try:
return self.from_pytube_object(content) content = pytube.YouTube(url)
except KeyError:
# Sometimes YouTube can return unexpected response, in such a case
# retry a few times before finally failing.
if retries > 0:
retries -= 1
logger.debug(
"YouTube returned an unexpected response for "
"`pytube.YouTube({url})`. Retries left: {retries}".format(
url=url, retries=retries
)
)
return self.from_url(url, retries=retries)
else:
raise
else:
return self.from_pytube_object(content)
def from_pytube_object(self, content): def from_pytube_object(self, content):
return self.metadata_to_standard_form(content) return self.metadata_to_standard_form(content)

View File

@@ -23,7 +23,11 @@ PROVIDERS = {
def prompt_for_youtube_search_result(videos): def prompt_for_youtube_search_result(videos):
max_index_length = len(str(len(videos))) max_index_length = len(str(len(videos)))
max_title_length = max(len(v["title"]) for v in videos) max_title_length = max(len(v["title"]) for v in videos)
print(" 0. Skip downloading this track", file=sys.stderr) msg = "{index:>{max_index}}. Skip downloading this track".format(
index=0,
max_index=max_index_length,
)
print(msg, file=sys.stderr)
for index, video in enumerate(videos, 1): for index, video in enumerate(videos, 1):
vid_details = "{index:>{max_index}}. {title:<{max_title}}\n{new_line_gap} {url} [{duration}]".format( vid_details = "{index:>{max_index}}. {title:<{max_title}}\n{new_line_gap} {url} [{duration}]".format(
index=index, index=index,
@@ -92,7 +96,6 @@ class MetadataSearch:
target=self.get_lyrics, target=self.get_lyrics,
args=(lyric_query,), args=(lyric_query,),
) )
metadata["lyrics"].start()
return metadata return metadata
@@ -113,7 +116,6 @@ class MetadataSearch:
target=self.get_lyrics, target=self.get_lyrics,
arguments=(lyric_query,), arguments=(lyric_query,),
) )
metadata["lyrics"].start()
return metadata return metadata
@@ -134,7 +136,6 @@ class MetadataSearch:
target=self.get_lyrics, target=self.get_lyrics,
arguments=(lyric_query,), arguments=(lyric_query,),
) )
metadata["lyrics"].start()
return metadata return metadata

View File

@@ -212,3 +212,14 @@ def content_available(url):
def titlecase(string): def titlecase(string):
return " ".join(word.capitalize() for word in string.split()) return " ".join(word.capitalize() for word in string.split())
def readlines_from_nonbinary_file(path):
with open(path, "r") as fin:
lines = fin.read().splitlines()
return lines
def writelines_to_nonbinary_file(path, lines):
with open(path, "w") as fout:
fout.writelines(map(lambda x: x + "\n", lines))