Files
spotify-downloader/spotdl/lyrics/providers/genius.py
Ritiek Malhotra c73f55b8ce Fix crash when lyrics not yet released on Genius
There are some tracks on Genius whose lyrics are yet to be released.
When the tool previously attempted to scrape such webpages, it
resulted in a crash.
The tool will now raise an exception; LyricNotFoundError when such a
track is encountered, which is then handled internally by the tool.

Example of one such track on Genius:
https://genius.com/The-federal-empire-good-man-lyrics

Fixes #647.
2020-01-10 15:33:02 +05:30

50 lines
1.6 KiB
Python

from bs4 import BeautifulSoup
import urllib.request
from spotdl.lyrics.lyric_base import LyricBase
from spotdl.lyrics.exceptions import LyricsNotFound
BASE_URL = "https://genius.com"
class Genius(LyricBase):
def __init__(self, artist, song):
self.artist = artist
self.song = song
self.base_url = BASE_URL
def _guess_lyric_url(self):
query = "/{} {} lyrics".format(self.artist, self.song)
query = query.replace(" ", "-")
encoded_query = urllib.request.quote(query)
lyric_url = self.base_url + encoded_query
return lyric_url
def _fetch_page(self, url, timeout=None):
request = urllib.request.Request(url)
request.add_header("User-Agent", "urllib")
try:
response = urllib.request.urlopen(request, timeout=timeout)
except urllib.request.HTTPError:
raise LyricsNotFound(
"Could not find lyrics for {} - {} at URL: {}".format(
self.artist, self.song, url
)
)
else:
return response.read()
def _get_lyrics_text(self, html):
soup = BeautifulSoup(html, "html.parser")
lyrics_paragraph = soup.find("p")
if lyrics_paragraph:
return lyrics_paragraph.get_text()
else:
raise LyricsNotFound("The lyrics for this track are yet to be released.")
def get_lyrics(self, linesep="\n", timeout=None):
url = self._guess_lyric_url()
html_page = self._fetch_page(url, timeout=timeout)
lyrics = self._get_lyrics_text(html_page)
return lyrics.replace("\n", linesep)