Retry a few times to workaround random YouTube issues

This commit is contained in:
Ritiek Malhotra
2020-05-08 22:14:51 +05:30
parent 007fc0be67
commit e0362b6e8c
4 changed files with 72 additions and 27 deletions

View File

@@ -111,7 +111,7 @@ class Spotdl:
)
# Remove duplicates and empty elements
# Also strip whitespaces from elements (if any)
spotdl.util.remove_duplicates(
tracks = spotdl.util.remove_duplicates(
tracks,
condition=lambda x: x,
operation=str.strip
@@ -158,11 +158,14 @@ class Spotdl:
logger.info('Downloading "{}"'.format(track))
search_metadata = MetadataSearch(
track,
lyrics=True,
lyrics=not self.arguments["no_metadata"],
yt_search_format=self.arguments["search_format"],
yt_manual=self.arguments["manual"]
)
try:
if self.arguments["no_metadata"]:
metadata = search_metadata.on_youtube()
else:
metadata = search_metadata.on_youtube_and_spotify()
except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
logger.error(e.args[0])
@@ -219,6 +222,9 @@ class Spotdl:
logger.debug("Skip track download.")
return
if not self.arguments["no_metadata"]:
metadata["lyrics"].start()
logger.info('Downloading to "{filename}"'.format(filename=filename))
if Encoder is None:
track.download(stream, filename)
@@ -231,10 +237,10 @@ class Spotdl:
)
if not self.arguments["no_metadata"]:
track.metadata["lyrics"] = track.metadata["lyrics"].join()
self.apply_metadata(track, filename, output_extension)
def apply_metadata(self, track, filename, encoding):
track.metadata["lyrics"] = track.metadata["lyrics"].join()
logger.info("Applying metadata")
try:
track.apply_metadata(filename, encoding=encoding)
@@ -245,24 +251,22 @@ class Spotdl:
logger.info(
"Checking and removing any duplicate tracks in {}.".format(path)
)
with open(path, "r") as fin:
tracks = fin.read().splitlines()
tracks = spotdl.util.readlines_from_nonbinary_file(path)
# Remove duplicates and empty elements
# Also strip whitespaces from elements (if any)
spotdl.util.remove_duplicates(
tracks = spotdl.util.remove_duplicates(
tracks,
condition=lambda x: x,
operation=str.strip
)
# Overwrite file
with open(path, "w") as fout:
fout.writelines(tracks)
spotdl.util.writelines_to_nonbinary_file(path, tracks)
print("", file=sys.stderr)
logger.info(
"Downloading {n} tracks.\n".format(n=len(tracks))
)
for number, track in enumerate(tracks, 1):
for position, track in enumerate(tracks, 1):
search_metadata = MetadataSearch(
track,
lyrics=True,
@@ -271,7 +275,7 @@ class Spotdl:
)
try:
log_track_query = '{position}. Downloading "{track}"'.format(
position=number,
position=position,
track=track
)
logger.info(log_track_query)
@@ -285,14 +289,13 @@ class Spotdl:
)
tracks.append(track)
except (NoYouTubeVideoFoundError, NoYouTubeVideoMatchError) as e:
logger.error(e.args[0])
logger.error("{err}".format(err=e.args[0]))
else:
if self.arguments["write_successful"]:
with open(self.arguments["write_successful"], "a") as fout:
fout.write(track)
finally:
with open(path, "w") as fout:
fout.writelines(tracks[number-1:])
spotdl.util.writelines_to_nonbinary_file(path, tracks[position:])
print("", file=sys.stderr)
"""

View File

@@ -50,7 +50,7 @@ class YouTubeSearch:
quoted_query = urllib.request.quote(query)
return self.base_search_url.format(quoted_query)
def _fetch_response_html(self, url):
def _fetch_response_html(self, url, retries=5):
response = urllib.request.urlopen(url)
soup = BeautifulSoup(response.read(), "html.parser")
return soup
@@ -102,12 +102,26 @@ class YouTubeSearch:
video = not not_video
return video
def search(self, query, limit=5):
def _is_server_side_invalid_response(self, videos, html):
if videos:
return False
search_message = html.find("div", {"class":"search-message"})
return search_message is None
def search(self, query, limit=10, retries=5):
""" Search and scrape YouTube to return a list of matching videos. """
search_url = self.generate_search_url(query)
logger.debug('Fetching YouTube results for "{}".'.format(search_url))
html = self._fetch_response_html(search_url)
videos = self._fetch_search_results(html)
videos = self._fetch_search_results(html, limit=limit)
to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html)
if to_retry:
retries -= 1
logger.debug(
"Retrying since YouTube returned invalid response for search "
"results. Retries left: {retries}.".format(retries=retries)
)
return self.search(query, limit=limit, retries=retries)
return YouTubeVideos(videos)
@@ -202,9 +216,25 @@ class ProviderYouTube(ProviderBase):
)
return self.from_url(watch_urls[0])
def from_url(self, url):
def from_url(self, url, retries=5):
logger.debug('Fetching YouTube metadata for "{url}".'.format(url=url))
try:
content = pytube.YouTube(url)
except KeyError:
# Sometimes YouTube can return unexpected response, in such a case
# retry a few times before finally failing.
if retries > 0:
retries -= 1
logger.debug(
"YouTube returned an unexpected response for "
"`pytube.YouTube({url})`. Retries left: {retries}".format(
url=url, retries=retries
)
)
return self.from_url(url, retries=retries)
else:
raise
else:
return self.from_pytube_object(content)
def from_pytube_object(self, content):

View File

@@ -23,7 +23,11 @@ PROVIDERS = {
def prompt_for_youtube_search_result(videos):
max_index_length = len(str(len(videos)))
max_title_length = max(len(v["title"]) for v in videos)
print(" 0. Skip downloading this track", file=sys.stderr)
msg = "{index:>{max_index}}. Skip downloading this track".format(
index=0,
max_index=max_index_length,
)
print(msg, file=sys.stderr)
for index, video in enumerate(videos, 1):
vid_details = "{index:>{max_index}}. {title:<{max_title}}\n{new_line_gap} {url} [{duration}]".format(
index=index,
@@ -92,7 +96,6 @@ class MetadataSearch:
target=self.get_lyrics,
args=(lyric_query,),
)
metadata["lyrics"].start()
return metadata
@@ -113,7 +116,6 @@ class MetadataSearch:
target=self.get_lyrics,
arguments=(lyric_query,),
)
metadata["lyrics"].start()
return metadata
@@ -134,7 +136,6 @@ class MetadataSearch:
target=self.get_lyrics,
arguments=(lyric_query,),
)
metadata["lyrics"].start()
return metadata

View File

@@ -212,3 +212,14 @@ def content_available(url):
def titlecase(string):
return " ".join(word.capitalize() for word in string.split())
def readlines_from_nonbinary_file(path):
with open(path, "r") as fin:
lines = fin.read().splitlines()
return lines
def writelines_to_nonbinary_file(path, lines):
with open(path, "w") as fout:
fout.writelines(map(lambda x: x + "\n", lines))