mirror of
https://github.com/KevinMidboe/spotify-downloader.git
synced 2026-01-09 02:46:01 +00:00
68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
from bs4 import BeautifulSoup
|
|
import urllib.request
|
|
|
|
from spotdl.lyrics.lyric_base import LyricBase
|
|
from spotdl.lyrics.exceptions import LyricsNotFoundError
|
|
|
|
BASE_URL = "https://genius.com"
|
|
|
|
|
|
class Genius(LyricBase):
|
|
def __init__(self, artist, track):
|
|
self.artist = artist
|
|
self.track = track
|
|
self.base_url = BASE_URL
|
|
|
|
def _guess_lyric_url(self):
|
|
"""
|
|
Returns the possible lyric URL for the track available
|
|
on Genius. This may not always be a valid URL, but this
|
|
is apparently the best we can do at the moment?
|
|
"""
|
|
query = "/{} {} lyrics".format(self.artist, self.track)
|
|
query = query.replace(" ", "-")
|
|
encoded_query = urllib.request.quote(query)
|
|
lyric_url = self.base_url + encoded_query
|
|
return lyric_url
|
|
|
|
def _fetch_page(self, url, timeout=None):
|
|
"""
|
|
Makes a GET request to the given URL and returns the
|
|
HTML content in the case of a valid response.
|
|
"""
|
|
request = urllib.request.Request(url)
|
|
request.add_header("User-Agent", "urllib")
|
|
try:
|
|
response = urllib.request.urlopen(request, timeout=timeout)
|
|
except urllib.request.HTTPError:
|
|
raise LyricsNotFoundError(
|
|
"Could not find lyrics for {} - {} at URL: {}".format(
|
|
self.artist, self.track, url
|
|
)
|
|
)
|
|
else:
|
|
return response.read()
|
|
|
|
def _get_lyrics_text(self, html):
|
|
"""
|
|
Extracts and returns the lyric content from the
|
|
provided HTML.
|
|
"""
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
lyrics_paragraph = soup.find("p")
|
|
if lyrics_paragraph:
|
|
return lyrics_paragraph.get_text()
|
|
else:
|
|
raise LyricsNotFoundError(
|
|
"The lyrics for this track are yet to be released."
|
|
)
|
|
|
|
def get_lyrics(self, linesep="\n", timeout=None):
|
|
"""
|
|
Returns the lyric string for the given artist and track.
|
|
"""
|
|
url = self._guess_lyric_url()
|
|
html_page = self._fetch_page(url, timeout=timeout)
|
|
lyrics = self._get_lyrics_text(html_page)
|
|
return lyrics.replace("\n", linesep)
|