From 0d846cdcce9c7b51bfb34265821436b615c817cf Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Mon, 22 Jul 2019 15:55:05 +0530 Subject: [PATCH] Scrape lyrics from Genius and lyrics refactor --- spotdl/lyrics/__init__.py | 1 + spotdl/lyrics/exceptions.py | 5 ++ spotdl/lyrics/lyric_base.py | 14 ++++++ spotdl/lyrics/providers/__init__.py | 2 + spotdl/lyrics/providers/genius.py | 47 +++++++++++++++++++ spotdl/lyrics/providers/lyricwikia_wrapper.py | 18 +++++++ spotdl/lyrics/providers/tests/__init__.py | 0 spotdl/lyrics/providers/tests/test_genius.py | 39 +++++++++++++++ .../tests/test_lyricwikia_wrapper.py | 31 ++++++++++++ spotdl/spotify_tools.py | 10 ++-- 10 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 spotdl/lyrics/__init__.py create mode 100644 spotdl/lyrics/exceptions.py create mode 100644 spotdl/lyrics/lyric_base.py create mode 100644 spotdl/lyrics/providers/__init__.py create mode 100644 spotdl/lyrics/providers/genius.py create mode 100644 spotdl/lyrics/providers/lyricwikia_wrapper.py create mode 100644 spotdl/lyrics/providers/tests/__init__.py create mode 100644 spotdl/lyrics/providers/tests/test_genius.py create mode 100644 spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py diff --git a/spotdl/lyrics/__init__.py b/spotdl/lyrics/__init__.py new file mode 100644 index 0000000..c90c9ab --- /dev/null +++ b/spotdl/lyrics/__init__.py @@ -0,0 +1 @@ +from spotdl.lyrics.lyric_base import LyricBase diff --git a/spotdl/lyrics/exceptions.py b/spotdl/lyrics/exceptions.py new file mode 100644 index 0000000..ec0afe1 --- /dev/null +++ b/spotdl/lyrics/exceptions.py @@ -0,0 +1,5 @@ +class LyricsNotFound(Exception): + __module__ = Exception.__module__ + + def __init__(self, message=None): + super(LyricsNotFound, self).__init__(message) diff --git a/spotdl/lyrics/lyric_base.py b/spotdl/lyrics/lyric_base.py new file mode 100644 index 0000000..895323b --- /dev/null +++ b/spotdl/lyrics/lyric_base.py @@ -0,0 +1,14 @@ +import lyricwikia + +from abc import ABC +from abc import abstractmethod + + +class LyricBase(ABC): + @abstractmethod + def __init__(self, artist, song): + pass + + @abstractmethod + def get_lyrics(self, linesep="\n", timeout=None): + pass diff --git a/spotdl/lyrics/providers/__init__.py b/spotdl/lyrics/providers/__init__.py new file mode 100644 index 0000000..5fcb123 --- /dev/null +++ b/spotdl/lyrics/providers/__init__.py @@ -0,0 +1,2 @@ +from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia +from spotdl.lyrics.providers.genius import Genius diff --git a/spotdl/lyrics/providers/genius.py b/spotdl/lyrics/providers/genius.py new file mode 100644 index 0000000..49d7f53 --- /dev/null +++ b/spotdl/lyrics/providers/genius.py @@ -0,0 +1,47 @@ +from bs4 import BeautifulSoup +import urllib.request + +from spotdl.lyrics.lyric_base import LyricBase +from spotdl.lyrics.exceptions import LyricsNotFound + +BASE_URL = "https://genius.com" + + +class Genius(LyricBase): + def __init__(self, artist, song): + self.artist = artist + self.song = song + self.base_url = BASE_URL + + def _guess_lyric_url(self): + query = "/{} {} lyrics".format(self.artist, self.song) + query = query.replace(" ", "-") + encoded_query = urllib.request.quote(query) + lyric_url = self.base_url + encoded_query + return lyric_url + + def _fetch_page(self, url, timeout=None): + request = urllib.request.Request(url) + request.add_header("User-Agent", "urllib") + try: + response = urllib.request.urlopen(request, timeout=timeout) + except urllib.request.HTTPError: + raise LyricsNotFound( + "Could not find lyrics for {} - {} at URL: {}".format( + self.artist, self.song, url + ) + ) + else: + return response.read() + + def _get_lyrics_text(self, html): + soup = BeautifulSoup(html, "html.parser") + lyrics_paragraph = soup.find("p") + lyrics = lyrics_paragraph.get_text() + return lyrics + + def get_lyrics(self, linesep="\n", timeout=None): + url = self._guess_lyric_url() + html_page = self._fetch_page(url, timeout=timeout) + lyrics = self._get_lyrics_text(html_page) + return lyrics.replace("\n", linesep) diff --git a/spotdl/lyrics/providers/lyricwikia_wrapper.py b/spotdl/lyrics/providers/lyricwikia_wrapper.py new file mode 100644 index 0000000..2ac3690 --- /dev/null +++ b/spotdl/lyrics/providers/lyricwikia_wrapper.py @@ -0,0 +1,18 @@ +import lyricwikia + +from spotdl.lyrics.lyric_base import LyricBase +from spotdl.lyrics.exceptions import LyricsNotFound + + +class LyricWikia(LyricBase): + def __init__(self, artist, song): + self.artist = artist + self.song = song + + def get_lyrics(self, linesep="\n", timeout=None): + try: + lyrics = lyricwikia.get_lyrics(self.artist, self.song, linesep, timeout) + except lyricwikia.LyricsNotFound as e: + raise LyricsNotFound(e.args[0]) + else: + return lyrics diff --git a/spotdl/lyrics/providers/tests/__init__.py b/spotdl/lyrics/providers/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/spotdl/lyrics/providers/tests/test_genius.py b/spotdl/lyrics/providers/tests/test_genius.py new file mode 100644 index 0000000..e033a1c --- /dev/null +++ b/spotdl/lyrics/providers/tests/test_genius.py @@ -0,0 +1,39 @@ +from spotdl.lyrics import LyricBase +from spotdl.lyrics import exceptions +from spotdl.lyrics.providers import Genius + +import urllib.request +import pytest + + +class TestGenius: + def test_subclass(self): + assert issubclass(Genius, LyricBase) + + @pytest.fixture(scope="module") + def track(self): + return Genius("artist", "song") + + def test_base_url(self, track): + assert track.base_url == "https://genius.com" + + def test_get_lyrics(self, track, monkeypatch): + + def mocked_urlopen(url, timeout=None): + class DummyHTTPResponse: + def read(self): + return "

amazing lyrics!

" + + return DummyHTTPResponse() + + monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) + assert track.get_lyrics() == "amazing lyrics!" + + def test_lyrics_not_found_error(self, track, monkeypatch): + + def mocked_urlopen(url, timeout=None): + raise urllib.request.HTTPError("", "", "", "", "") + + monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) + with pytest.raises(exceptions.LyricsNotFound): + track.get_lyrics() diff --git a/spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py b/spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py new file mode 100644 index 0000000..474a101 --- /dev/null +++ b/spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py @@ -0,0 +1,31 @@ +import lyricwikia + +from spotdl.lyrics import LyricBase +from spotdl.lyrics import exceptions +from spotdl.lyrics.providers import LyricWikia + +import pytest + + +class TestLyricWikia: + def test_subclass(self): + assert issubclass(LyricWikia, LyricBase) + + def test_get_lyrics(self, monkeypatch): + # `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics` + # internally and there is no need to test a 3rd party library as they + # have their own implementation of tests. + monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!") + track = LyricWikia("Lyricwikia", "Lyricwikia") + assert track.get_lyrics() == "awesome lyrics!" + + def test_lyrics_not_found_error(self, monkeypatch): + + def lyricwikia_lyrics_not_found(msg): + raise lyricwikia.LyricsNotFound(msg) + + # Wrap `lyricwikia.LyricsNotFound` with `exceptions.LyricsNotFound` error. + monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics.")) + track = LyricWikia("Lyricwikia", "Lyricwikia") + with pytest.raises(exceptions.LyricsNotFound): + track.get_lyrics() diff --git a/spotdl/spotify_tools.py b/spotdl/spotify_tools.py index 7fbb060..c73174d 100644 --- a/spotdl/spotify_tools.py +++ b/spotdl/spotify_tools.py @@ -1,6 +1,5 @@ import spotipy import spotipy.oauth2 as oauth2 -import lyricwikia from slugify import slugify from titlecase import titlecase @@ -12,6 +11,8 @@ import functools from spotdl import const from spotdl import internals +from spotdl.lyrics.providers import LyricWikia +from spotdl.lyrics.exceptions import LyricsNotFound spotify = None @@ -74,12 +75,11 @@ def generate_metadata(raw_song): meta_tags[u"total_tracks"] = album["tracks"]["total"] log.debug("Fetching lyrics") + track = LyricWikia(meta_tags["artists"][0]["name"], meta_tags["name"]) try: - meta_tags["lyrics"] = lyricwikia.get_lyrics( - meta_tags["artists"][0]["name"], meta_tags["name"] - ) - except lyricwikia.LyricsNotFound: + meta_tags["lyrics"] = track.get_lyrics() + except LyricsNotFound: meta_tags["lyrics"] = None # Some sugar