Merge pull request #585 from ritiek/refactor

Scrape lyrics from Genius and lyrics refactor
This commit is contained in:
Ritiek Malhotra
2019-07-25 12:05:25 +05:30
committed by GitHub
14 changed files with 177 additions and 12 deletions

View File

@@ -0,0 +1 @@
from spotdl.lyrics.lyric_base import LyricBase

View File

@@ -0,0 +1,5 @@
class LyricsNotFound(Exception):
__module__ = Exception.__module__
def __init__(self, message=None):
super(LyricsNotFound, self).__init__(message)

View File

@@ -0,0 +1,14 @@
import lyricwikia
from abc import ABC
from abc import abstractmethod
class LyricBase(ABC):
@abstractmethod
def __init__(self, artist, song):
pass
@abstractmethod
def get_lyrics(self, linesep="\n", timeout=None):
pass

View File

@@ -0,0 +1,4 @@
from spotdl.lyrics.providers.genius import Genius
from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia
LyricClasses = (Genius, LyricWikia)

View File

@@ -0,0 +1,47 @@
from bs4 import BeautifulSoup
import urllib.request
from spotdl.lyrics.lyric_base import LyricBase
from spotdl.lyrics.exceptions import LyricsNotFound
BASE_URL = "https://genius.com"
class Genius(LyricBase):
def __init__(self, artist, song):
self.artist = artist
self.song = song
self.base_url = BASE_URL
def _guess_lyric_url(self):
query = "/{} {} lyrics".format(self.artist, self.song)
query = query.replace(" ", "-")
encoded_query = urllib.request.quote(query)
lyric_url = self.base_url + encoded_query
return lyric_url
def _fetch_page(self, url, timeout=None):
request = urllib.request.Request(url)
request.add_header("User-Agent", "urllib")
try:
response = urllib.request.urlopen(request, timeout=timeout)
except urllib.request.HTTPError:
raise LyricsNotFound(
"Could not find lyrics for {} - {} at URL: {}".format(
self.artist, self.song, url
)
)
else:
return response.read()
def _get_lyrics_text(self, html):
soup = BeautifulSoup(html, "html.parser")
lyrics_paragraph = soup.find("p")
lyrics = lyrics_paragraph.get_text()
return lyrics
def get_lyrics(self, linesep="\n", timeout=None):
url = self._guess_lyric_url()
html_page = self._fetch_page(url, timeout=timeout)
lyrics = self._get_lyrics_text(html_page)
return lyrics.replace("\n", linesep)

View File

@@ -0,0 +1,18 @@
import lyricwikia
from spotdl.lyrics.lyric_base import LyricBase
from spotdl.lyrics.exceptions import LyricsNotFound
class LyricWikia(LyricBase):
def __init__(self, artist, song):
self.artist = artist
self.song = song
def get_lyrics(self, linesep="\n", timeout=None):
try:
lyrics = lyricwikia.get_lyrics(self.artist, self.song, linesep, timeout)
except lyricwikia.LyricsNotFound as e:
raise LyricsNotFound(e.args[0])
else:
return lyrics

View File

@@ -0,0 +1,39 @@
from spotdl.lyrics import LyricBase
from spotdl.lyrics import exceptions
from spotdl.lyrics.providers import Genius
import urllib.request
import pytest
class TestGenius:
def test_subclass(self):
assert issubclass(Genius, LyricBase)
@pytest.fixture(scope="module")
def track(self):
return Genius("artist", "song")
def test_base_url(self, track):
assert track.base_url == "https://genius.com"
def test_get_lyrics(self, track, monkeypatch):
def mocked_urlopen(url, timeout=None):
class DummyHTTPResponse:
def read(self):
return "<p>amazing lyrics!</p>"
return DummyHTTPResponse()
monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
assert track.get_lyrics() == "amazing lyrics!"
def test_lyrics_not_found_error(self, track, monkeypatch):
def mocked_urlopen(url, timeout=None):
raise urllib.request.HTTPError("", "", "", "", "")
monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
with pytest.raises(exceptions.LyricsNotFound):
track.get_lyrics()

View File

@@ -0,0 +1,31 @@
import lyricwikia
from spotdl.lyrics import LyricBase
from spotdl.lyrics import exceptions
from spotdl.lyrics.providers import LyricWikia
import pytest
class TestLyricWikia:
def test_subclass(self):
assert issubclass(LyricWikia, LyricBase)
def test_get_lyrics(self, monkeypatch):
# `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics`
# internally and there is no need to test a 3rd party library as they
# have their own implementation of tests.
monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!")
track = LyricWikia("Lyricwikia", "Lyricwikia")
assert track.get_lyrics() == "awesome lyrics!"
def test_lyrics_not_found_error(self, monkeypatch):
def lyricwikia_lyrics_not_found(msg):
raise lyricwikia.LyricsNotFound(msg)
# Wrap `lyricwikia.LyricsNotFound` with `exceptions.LyricsNotFound` error.
monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics."))
track = LyricWikia("Lyricwikia", "Lyricwikia")
with pytest.raises(exceptions.LyricsNotFound):
track.get_lyrics()

View File

@@ -1,6 +1,5 @@
import spotipy
import spotipy.oauth2 as oauth2
import lyricwikia
from slugify import slugify
from titlecase import titlecase
@@ -12,10 +11,14 @@ import functools
from spotdl import const
from spotdl import internals
from spotdl.lyrics.providers import LyricClasses
from spotdl.lyrics.exceptions import LyricsNotFound
spotify = None
def generate_token():
""" Generate the token. """
credentials = oauth2.SpotifyClientCredentials(
@@ -74,13 +77,16 @@ def generate_metadata(raw_song):
meta_tags[u"total_tracks"] = album["tracks"]["total"]
log.debug("Fetching lyrics")
meta_tags["lyrics"] = None
try:
meta_tags["lyrics"] = lyricwikia.get_lyrics(
meta_tags["artists"][0]["name"], meta_tags["name"]
)
except lyricwikia.LyricsNotFound:
meta_tags["lyrics"] = None
for LyricClass in LyricClasses:
track = LyricClass(meta_tags["artists"][0]["name"], meta_tags["name"])
try:
meta_tags["lyrics"] = track.get_lyrics()
except LyricsNotFound:
continue
else:
break
# Some sugar
meta_tags["year"], *_ = meta_tags["release_date"].split("-")