mirror of
https://github.com/KevinMidboe/spotify-downloader.git
synced 2026-01-01 23:16:37 +00:00
Scrape lyrics from Genius and lyrics refactor
This commit is contained in:
47
spotdl/lyrics/providers/genius.py
Normal file
47
spotdl/lyrics/providers/genius.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib.request
|
||||
|
||||
from spotdl.lyrics.lyric_base import LyricBase
|
||||
from spotdl.lyrics.exceptions import LyricsNotFound
|
||||
|
||||
BASE_URL = "https://genius.com"
|
||||
|
||||
|
||||
class Genius(LyricBase):
|
||||
def __init__(self, artist, song):
|
||||
self.artist = artist
|
||||
self.song = song
|
||||
self.base_url = BASE_URL
|
||||
|
||||
def _guess_lyric_url(self):
|
||||
query = "/{} {} lyrics".format(self.artist, self.song)
|
||||
query = query.replace(" ", "-")
|
||||
encoded_query = urllib.request.quote(query)
|
||||
lyric_url = self.base_url + encoded_query
|
||||
return lyric_url
|
||||
|
||||
def _fetch_page(self, url, timeout=None):
|
||||
request = urllib.request.Request(url)
|
||||
request.add_header("User-Agent", "urllib")
|
||||
try:
|
||||
response = urllib.request.urlopen(request, timeout=timeout)
|
||||
except urllib.request.HTTPError:
|
||||
raise LyricsNotFound(
|
||||
"Could not find lyrics for {} - {} at URL: {}".format(
|
||||
self.artist, self.song, url
|
||||
)
|
||||
)
|
||||
else:
|
||||
return response.read()
|
||||
|
||||
def _get_lyrics_text(self, html):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
lyrics_paragraph = soup.find("p")
|
||||
lyrics = lyrics_paragraph.get_text()
|
||||
return lyrics
|
||||
|
||||
def get_lyrics(self, linesep="\n", timeout=None):
|
||||
url = self._guess_lyric_url()
|
||||
html_page = self._fetch_page(url, timeout=timeout)
|
||||
lyrics = self._get_lyrics_text(html_page)
|
||||
return lyrics.replace("\n", linesep)
|
||||
Reference in New Issue
Block a user