mirror of
https://github.com/KevinMidboe/spotify-downloader.git
synced 2025-10-29 18:00:15 +00:00
Scrape lyrics from Genius and lyrics refactor
This commit is contained in:
1
spotdl/lyrics/__init__.py
Normal file
1
spotdl/lyrics/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
from spotdl.lyrics.lyric_base import LyricBase
|
||||||
5
spotdl/lyrics/exceptions.py
Normal file
5
spotdl/lyrics/exceptions.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
class LyricsNotFound(Exception):
|
||||||
|
__module__ = Exception.__module__
|
||||||
|
|
||||||
|
def __init__(self, message=None):
|
||||||
|
super(LyricsNotFound, self).__init__(message)
|
||||||
14
spotdl/lyrics/lyric_base.py
Normal file
14
spotdl/lyrics/lyric_base.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import lyricwikia
|
||||||
|
|
||||||
|
from abc import ABC
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
|
|
||||||
|
class LyricBase(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def __init__(self, artist, song):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_lyrics(self, linesep="\n", timeout=None):
|
||||||
|
pass
|
||||||
2
spotdl/lyrics/providers/__init__.py
Normal file
2
spotdl/lyrics/providers/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia
|
||||||
|
from spotdl.lyrics.providers.genius import Genius
|
||||||
47
spotdl/lyrics/providers/genius.py
Normal file
47
spotdl/lyrics/providers/genius.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from spotdl.lyrics.lyric_base import LyricBase
|
||||||
|
from spotdl.lyrics.exceptions import LyricsNotFound
|
||||||
|
|
||||||
|
BASE_URL = "https://genius.com"
|
||||||
|
|
||||||
|
|
||||||
|
class Genius(LyricBase):
|
||||||
|
def __init__(self, artist, song):
|
||||||
|
self.artist = artist
|
||||||
|
self.song = song
|
||||||
|
self.base_url = BASE_URL
|
||||||
|
|
||||||
|
def _guess_lyric_url(self):
|
||||||
|
query = "/{} {} lyrics".format(self.artist, self.song)
|
||||||
|
query = query.replace(" ", "-")
|
||||||
|
encoded_query = urllib.request.quote(query)
|
||||||
|
lyric_url = self.base_url + encoded_query
|
||||||
|
return lyric_url
|
||||||
|
|
||||||
|
def _fetch_page(self, url, timeout=None):
|
||||||
|
request = urllib.request.Request(url)
|
||||||
|
request.add_header("User-Agent", "urllib")
|
||||||
|
try:
|
||||||
|
response = urllib.request.urlopen(request, timeout=timeout)
|
||||||
|
except urllib.request.HTTPError:
|
||||||
|
raise LyricsNotFound(
|
||||||
|
"Could not find lyrics for {} - {} at URL: {}".format(
|
||||||
|
self.artist, self.song, url
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return response.read()
|
||||||
|
|
||||||
|
def _get_lyrics_text(self, html):
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
lyrics_paragraph = soup.find("p")
|
||||||
|
lyrics = lyrics_paragraph.get_text()
|
||||||
|
return lyrics
|
||||||
|
|
||||||
|
def get_lyrics(self, linesep="\n", timeout=None):
|
||||||
|
url = self._guess_lyric_url()
|
||||||
|
html_page = self._fetch_page(url, timeout=timeout)
|
||||||
|
lyrics = self._get_lyrics_text(html_page)
|
||||||
|
return lyrics.replace("\n", linesep)
|
||||||
18
spotdl/lyrics/providers/lyricwikia_wrapper.py
Normal file
18
spotdl/lyrics/providers/lyricwikia_wrapper.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import lyricwikia
|
||||||
|
|
||||||
|
from spotdl.lyrics.lyric_base import LyricBase
|
||||||
|
from spotdl.lyrics.exceptions import LyricsNotFound
|
||||||
|
|
||||||
|
|
||||||
|
class LyricWikia(LyricBase):
|
||||||
|
def __init__(self, artist, song):
|
||||||
|
self.artist = artist
|
||||||
|
self.song = song
|
||||||
|
|
||||||
|
def get_lyrics(self, linesep="\n", timeout=None):
|
||||||
|
try:
|
||||||
|
lyrics = lyricwikia.get_lyrics(self.artist, self.song, linesep, timeout)
|
||||||
|
except lyricwikia.LyricsNotFound as e:
|
||||||
|
raise LyricsNotFound(e.args[0])
|
||||||
|
else:
|
||||||
|
return lyrics
|
||||||
0
spotdl/lyrics/providers/tests/__init__.py
Normal file
0
spotdl/lyrics/providers/tests/__init__.py
Normal file
39
spotdl/lyrics/providers/tests/test_genius.py
Normal file
39
spotdl/lyrics/providers/tests/test_genius.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from spotdl.lyrics import LyricBase
|
||||||
|
from spotdl.lyrics import exceptions
|
||||||
|
from spotdl.lyrics.providers import Genius
|
||||||
|
|
||||||
|
import urllib.request
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestGenius:
|
||||||
|
def test_subclass(self):
|
||||||
|
assert issubclass(Genius, LyricBase)
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def track(self):
|
||||||
|
return Genius("artist", "song")
|
||||||
|
|
||||||
|
def test_base_url(self, track):
|
||||||
|
assert track.base_url == "https://genius.com"
|
||||||
|
|
||||||
|
def test_get_lyrics(self, track, monkeypatch):
|
||||||
|
|
||||||
|
def mocked_urlopen(url, timeout=None):
|
||||||
|
class DummyHTTPResponse:
|
||||||
|
def read(self):
|
||||||
|
return "<p>amazing lyrics!</p>"
|
||||||
|
|
||||||
|
return DummyHTTPResponse()
|
||||||
|
|
||||||
|
monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
|
||||||
|
assert track.get_lyrics() == "amazing lyrics!"
|
||||||
|
|
||||||
|
def test_lyrics_not_found_error(self, track, monkeypatch):
|
||||||
|
|
||||||
|
def mocked_urlopen(url, timeout=None):
|
||||||
|
raise urllib.request.HTTPError("", "", "", "", "")
|
||||||
|
|
||||||
|
monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
|
||||||
|
with pytest.raises(exceptions.LyricsNotFound):
|
||||||
|
track.get_lyrics()
|
||||||
31
spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py
Normal file
31
spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import lyricwikia
|
||||||
|
|
||||||
|
from spotdl.lyrics import LyricBase
|
||||||
|
from spotdl.lyrics import exceptions
|
||||||
|
from spotdl.lyrics.providers import LyricWikia
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestLyricWikia:
|
||||||
|
def test_subclass(self):
|
||||||
|
assert issubclass(LyricWikia, LyricBase)
|
||||||
|
|
||||||
|
def test_get_lyrics(self, monkeypatch):
|
||||||
|
# `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics`
|
||||||
|
# internally and there is no need to test a 3rd party library as they
|
||||||
|
# have their own implementation of tests.
|
||||||
|
monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!")
|
||||||
|
track = LyricWikia("Lyricwikia", "Lyricwikia")
|
||||||
|
assert track.get_lyrics() == "awesome lyrics!"
|
||||||
|
|
||||||
|
def test_lyrics_not_found_error(self, monkeypatch):
|
||||||
|
|
||||||
|
def lyricwikia_lyrics_not_found(msg):
|
||||||
|
raise lyricwikia.LyricsNotFound(msg)
|
||||||
|
|
||||||
|
# Wrap `lyricwikia.LyricsNotFound` with `exceptions.LyricsNotFound` error.
|
||||||
|
monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics."))
|
||||||
|
track = LyricWikia("Lyricwikia", "Lyricwikia")
|
||||||
|
with pytest.raises(exceptions.LyricsNotFound):
|
||||||
|
track.get_lyrics()
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
import spotipy
|
import spotipy
|
||||||
import spotipy.oauth2 as oauth2
|
import spotipy.oauth2 as oauth2
|
||||||
import lyricwikia
|
|
||||||
|
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
from titlecase import titlecase
|
from titlecase import titlecase
|
||||||
@@ -12,6 +11,8 @@ import functools
|
|||||||
|
|
||||||
from spotdl import const
|
from spotdl import const
|
||||||
from spotdl import internals
|
from spotdl import internals
|
||||||
|
from spotdl.lyrics.providers import LyricWikia
|
||||||
|
from spotdl.lyrics.exceptions import LyricsNotFound
|
||||||
|
|
||||||
spotify = None
|
spotify = None
|
||||||
|
|
||||||
@@ -74,12 +75,11 @@ def generate_metadata(raw_song):
|
|||||||
meta_tags[u"total_tracks"] = album["tracks"]["total"]
|
meta_tags[u"total_tracks"] = album["tracks"]["total"]
|
||||||
|
|
||||||
log.debug("Fetching lyrics")
|
log.debug("Fetching lyrics")
|
||||||
|
track = LyricWikia(meta_tags["artists"][0]["name"], meta_tags["name"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
meta_tags["lyrics"] = lyricwikia.get_lyrics(
|
meta_tags["lyrics"] = track.get_lyrics()
|
||||||
meta_tags["artists"][0]["name"], meta_tags["name"]
|
except LyricsNotFound:
|
||||||
)
|
|
||||||
except lyricwikia.LyricsNotFound:
|
|
||||||
meta_tags["lyrics"] = None
|
meta_tags["lyrics"] = None
|
||||||
|
|
||||||
# Some sugar
|
# Some sugar
|
||||||
|
|||||||
Reference in New Issue
Block a user