From 0d846cdcce9c7b51bfb34265821436b615c817cf Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Mon, 22 Jul 2019 15:55:05 +0530 Subject: [PATCH 1/7] Scrape lyrics from Genius and lyrics refactor --- spotdl/lyrics/__init__.py | 1 + spotdl/lyrics/exceptions.py | 5 ++ spotdl/lyrics/lyric_base.py | 14 ++++++ spotdl/lyrics/providers/__init__.py | 2 + spotdl/lyrics/providers/genius.py | 47 +++++++++++++++++++ spotdl/lyrics/providers/lyricwikia_wrapper.py | 18 +++++++ spotdl/lyrics/providers/tests/__init__.py | 0 spotdl/lyrics/providers/tests/test_genius.py | 39 +++++++++++++++ .../tests/test_lyricwikia_wrapper.py | 31 ++++++++++++ spotdl/spotify_tools.py | 10 ++-- 10 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 spotdl/lyrics/__init__.py create mode 100644 spotdl/lyrics/exceptions.py create mode 100644 spotdl/lyrics/lyric_base.py create mode 100644 spotdl/lyrics/providers/__init__.py create mode 100644 spotdl/lyrics/providers/genius.py create mode 100644 spotdl/lyrics/providers/lyricwikia_wrapper.py create mode 100644 spotdl/lyrics/providers/tests/__init__.py create mode 100644 spotdl/lyrics/providers/tests/test_genius.py create mode 100644 spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py diff --git a/spotdl/lyrics/__init__.py b/spotdl/lyrics/__init__.py new file mode 100644 index 0000000..c90c9ab --- /dev/null +++ b/spotdl/lyrics/__init__.py @@ -0,0 +1 @@ +from spotdl.lyrics.lyric_base import LyricBase diff --git a/spotdl/lyrics/exceptions.py b/spotdl/lyrics/exceptions.py new file mode 100644 index 0000000..ec0afe1 --- /dev/null +++ b/spotdl/lyrics/exceptions.py @@ -0,0 +1,5 @@ +class LyricsNotFound(Exception): + __module__ = Exception.__module__ + + def __init__(self, message=None): + super(LyricsNotFound, self).__init__(message) diff --git a/spotdl/lyrics/lyric_base.py b/spotdl/lyrics/lyric_base.py new file mode 100644 index 0000000..895323b --- /dev/null +++ b/spotdl/lyrics/lyric_base.py @@ -0,0 +1,14 @@ +import lyricwikia + +from abc import ABC +from abc import abstractmethod + + +class LyricBase(ABC): + @abstractmethod + def __init__(self, artist, song): + pass + + @abstractmethod + def get_lyrics(self, linesep="\n", timeout=None): + pass diff --git a/spotdl/lyrics/providers/__init__.py b/spotdl/lyrics/providers/__init__.py new file mode 100644 index 0000000..5fcb123 --- /dev/null +++ b/spotdl/lyrics/providers/__init__.py @@ -0,0 +1,2 @@ +from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia +from spotdl.lyrics.providers.genius import Genius diff --git a/spotdl/lyrics/providers/genius.py b/spotdl/lyrics/providers/genius.py new file mode 100644 index 0000000..49d7f53 --- /dev/null +++ b/spotdl/lyrics/providers/genius.py @@ -0,0 +1,47 @@ +from bs4 import BeautifulSoup +import urllib.request + +from spotdl.lyrics.lyric_base import LyricBase +from spotdl.lyrics.exceptions import LyricsNotFound + +BASE_URL = "https://genius.com" + + +class Genius(LyricBase): + def __init__(self, artist, song): + self.artist = artist + self.song = song + self.base_url = BASE_URL + + def _guess_lyric_url(self): + query = "/{} {} lyrics".format(self.artist, self.song) + query = query.replace(" ", "-") + encoded_query = urllib.request.quote(query) + lyric_url = self.base_url + encoded_query + return lyric_url + + def _fetch_page(self, url, timeout=None): + request = urllib.request.Request(url) + request.add_header("User-Agent", "urllib") + try: + response = urllib.request.urlopen(request, timeout=timeout) + except urllib.request.HTTPError: + raise LyricsNotFound( + "Could not find lyrics for {} - {} at URL: {}".format( + self.artist, self.song, url + ) + ) + else: + return response.read() + + def _get_lyrics_text(self, html): + soup = BeautifulSoup(html, "html.parser") + lyrics_paragraph = soup.find("p") + lyrics = lyrics_paragraph.get_text() + return lyrics + + def get_lyrics(self, linesep="\n", timeout=None): + url = self._guess_lyric_url() + html_page = self._fetch_page(url, timeout=timeout) + lyrics = self._get_lyrics_text(html_page) + return lyrics.replace("\n", linesep) diff --git a/spotdl/lyrics/providers/lyricwikia_wrapper.py b/spotdl/lyrics/providers/lyricwikia_wrapper.py new file mode 100644 index 0000000..2ac3690 --- /dev/null +++ b/spotdl/lyrics/providers/lyricwikia_wrapper.py @@ -0,0 +1,18 @@ +import lyricwikia + +from spotdl.lyrics.lyric_base import LyricBase +from spotdl.lyrics.exceptions import LyricsNotFound + + +class LyricWikia(LyricBase): + def __init__(self, artist, song): + self.artist = artist + self.song = song + + def get_lyrics(self, linesep="\n", timeout=None): + try: + lyrics = lyricwikia.get_lyrics(self.artist, self.song, linesep, timeout) + except lyricwikia.LyricsNotFound as e: + raise LyricsNotFound(e.args[0]) + else: + return lyrics diff --git a/spotdl/lyrics/providers/tests/__init__.py b/spotdl/lyrics/providers/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/spotdl/lyrics/providers/tests/test_genius.py b/spotdl/lyrics/providers/tests/test_genius.py new file mode 100644 index 0000000..e033a1c --- /dev/null +++ b/spotdl/lyrics/providers/tests/test_genius.py @@ -0,0 +1,39 @@ +from spotdl.lyrics import LyricBase +from spotdl.lyrics import exceptions +from spotdl.lyrics.providers import Genius + +import urllib.request +import pytest + + +class TestGenius: + def test_subclass(self): + assert issubclass(Genius, LyricBase) + + @pytest.fixture(scope="module") + def track(self): + return Genius("artist", "song") + + def test_base_url(self, track): + assert track.base_url == "https://genius.com" + + def test_get_lyrics(self, track, monkeypatch): + + def mocked_urlopen(url, timeout=None): + class DummyHTTPResponse: + def read(self): + return "

amazing lyrics!

" + + return DummyHTTPResponse() + + monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) + assert track.get_lyrics() == "amazing lyrics!" + + def test_lyrics_not_found_error(self, track, monkeypatch): + + def mocked_urlopen(url, timeout=None): + raise urllib.request.HTTPError("", "", "", "", "") + + monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) + with pytest.raises(exceptions.LyricsNotFound): + track.get_lyrics() diff --git a/spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py b/spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py new file mode 100644 index 0000000..474a101 --- /dev/null +++ b/spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py @@ -0,0 +1,31 @@ +import lyricwikia + +from spotdl.lyrics import LyricBase +from spotdl.lyrics import exceptions +from spotdl.lyrics.providers import LyricWikia + +import pytest + + +class TestLyricWikia: + def test_subclass(self): + assert issubclass(LyricWikia, LyricBase) + + def test_get_lyrics(self, monkeypatch): + # `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics` + # internally and there is no need to test a 3rd party library as they + # have their own implementation of tests. + monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!") + track = LyricWikia("Lyricwikia", "Lyricwikia") + assert track.get_lyrics() == "awesome lyrics!" + + def test_lyrics_not_found_error(self, monkeypatch): + + def lyricwikia_lyrics_not_found(msg): + raise lyricwikia.LyricsNotFound(msg) + + # Wrap `lyricwikia.LyricsNotFound` with `exceptions.LyricsNotFound` error. + monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics.")) + track = LyricWikia("Lyricwikia", "Lyricwikia") + with pytest.raises(exceptions.LyricsNotFound): + track.get_lyrics() diff --git a/spotdl/spotify_tools.py b/spotdl/spotify_tools.py index 7fbb060..c73174d 100644 --- a/spotdl/spotify_tools.py +++ b/spotdl/spotify_tools.py @@ -1,6 +1,5 @@ import spotipy import spotipy.oauth2 as oauth2 -import lyricwikia from slugify import slugify from titlecase import titlecase @@ -12,6 +11,8 @@ import functools from spotdl import const from spotdl import internals +from spotdl.lyrics.providers import LyricWikia +from spotdl.lyrics.exceptions import LyricsNotFound spotify = None @@ -74,12 +75,11 @@ def generate_metadata(raw_song): meta_tags[u"total_tracks"] = album["tracks"]["total"] log.debug("Fetching lyrics") + track = LyricWikia(meta_tags["artists"][0]["name"], meta_tags["name"]) try: - meta_tags["lyrics"] = lyricwikia.get_lyrics( - meta_tags["artists"][0]["name"], meta_tags["name"] - ) - except lyricwikia.LyricsNotFound: + meta_tags["lyrics"] = track.get_lyrics() + except LyricsNotFound: meta_tags["lyrics"] = None # Some sugar From 08566e02b5c8f83707110be022bb64e919b4e9dd Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Mon, 22 Jul 2019 15:58:54 +0530 Subject: [PATCH 2/7] Update command to run tests --- .travis.yml | 2 +- CONTRIBUTING.md | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 690bbd0..45f6bd5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,7 @@ install: - tinydownload 07426048687547254773 -o ~/bin/ffmpeg - chmod 755 ~/bin/ffmpeg - xdg-user-dirs-update -script: python -m pytest test --cov=. +script: python -m pytest --cov=. after_success: - pip install codecov - codecov diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3f0c566..4605586 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,7 +24,7 @@ don't feel bad. Open an issue any way! unless mentioned otherwise. - Code should be formatted using [black](https://github.com/ambv/black). Don't worry if you forgot or don't know how to do this, the codebase will be black-formatted with each release. - All tests are placed in the [test directory](https://github.com/ritiek/spotify-downloader/tree/master/test). We use [pytest](https://github.com/pytest-dev/pytest) -to run the test suite: `$ python3 -m pytest test`. +to run the test suite: `$ python3 -m pytest`. If you don't have pytest, you can install it with `$ pip3 install pytest`. - Add a note about the changes, your GitHub username and a reference to the PR to the `Unreleased` section of the [`CHANGES.md`](CHANGES.md) file (see existing releases for examples), add the appropriate section ("Added", "Changed", "Fixed" etc.) if necessary. You don't have to increment version numbers. See https://keepachangelog.com/en/1.0.0/ for more information. - If you are planning to work on something big, let us know through an issue. So we can discuss more about it. diff --git a/README.md b/README.md index 11f6e10..530edb5 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Check out [CONTRIBUTING.md](CONTRIBUTING.md) for more info. ## Running Tests ```console -$ python3 -m pytest test +$ python3 -m pytest ``` Obviously this requires the `pytest` module to be installed. From 513445955423abbdef36d89b647d9c8c14259825 Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Mon, 22 Jul 2019 16:10:10 +0530 Subject: [PATCH 3/7] Maybe stop calling pytest as module works? --- .travis.yml | 2 +- CONTRIBUTING.md | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 45f6bd5..7f74bf7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,7 @@ install: - tinydownload 07426048687547254773 -o ~/bin/ffmpeg - chmod 755 ~/bin/ffmpeg - xdg-user-dirs-update -script: python -m pytest --cov=. +script: pytest --cov=. after_success: - pip install codecov - codecov diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4605586..a9bb414 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,7 +24,7 @@ don't feel bad. Open an issue any way! unless mentioned otherwise. - Code should be formatted using [black](https://github.com/ambv/black). Don't worry if you forgot or don't know how to do this, the codebase will be black-formatted with each release. - All tests are placed in the [test directory](https://github.com/ritiek/spotify-downloader/tree/master/test). We use [pytest](https://github.com/pytest-dev/pytest) -to run the test suite: `$ python3 -m pytest`. +to run the test suite: `$ pytest`. If you don't have pytest, you can install it with `$ pip3 install pytest`. - Add a note about the changes, your GitHub username and a reference to the PR to the `Unreleased` section of the [`CHANGES.md`](CHANGES.md) file (see existing releases for examples), add the appropriate section ("Added", "Changed", "Fixed" etc.) if necessary. You don't have to increment version numbers. See https://keepachangelog.com/en/1.0.0/ for more information. - If you are planning to work on something big, let us know through an issue. So we can discuss more about it. diff --git a/README.md b/README.md index 530edb5..2020a7e 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Check out [CONTRIBUTING.md](CONTRIBUTING.md) for more info. ## Running Tests ```console -$ python3 -m pytest +$ pytest ``` Obviously this requires the `pytest` module to be installed. From 5bcacf01daa16f189d4cb982807ddc966df23ff6 Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Wed, 24 Jul 2019 10:56:04 +0530 Subject: [PATCH 4/7] Fallback to LyricWikia if lyrics not found on Genius --- spotdl/lyrics/providers/__init__.py | 4 +++- spotdl/spotify_tools.py | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/spotdl/lyrics/providers/__init__.py b/spotdl/lyrics/providers/__init__.py index 5fcb123..9453de2 100644 --- a/spotdl/lyrics/providers/__init__.py +++ b/spotdl/lyrics/providers/__init__.py @@ -1,2 +1,4 @@ -from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia from spotdl.lyrics.providers.genius import Genius +from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia + +LyricClasses = (Genius, LyricWikia) diff --git a/spotdl/spotify_tools.py b/spotdl/spotify_tools.py index c73174d..0f62a4f 100644 --- a/spotdl/spotify_tools.py +++ b/spotdl/spotify_tools.py @@ -11,12 +11,14 @@ import functools from spotdl import const from spotdl import internals -from spotdl.lyrics.providers import LyricWikia +from spotdl.lyrics.providers import LyricClasses from spotdl.lyrics.exceptions import LyricsNotFound spotify = None + + def generate_token(): """ Generate the token. """ credentials = oauth2.SpotifyClientCredentials( @@ -75,12 +77,16 @@ def generate_metadata(raw_song): meta_tags[u"total_tracks"] = album["tracks"]["total"] log.debug("Fetching lyrics") - track = LyricWikia(meta_tags["artists"][0]["name"], meta_tags["name"]) + meta_tags["lyrics"] = None - try: - meta_tags["lyrics"] = track.get_lyrics() - except LyricsNotFound: - meta_tags["lyrics"] = None + for LyricClass in LyricClasses: + track = LyricClass(meta_tags["artists"][0]["name"], meta_tags["name"]) + try: + meta_tags["lyrics"] = track.get_lyrics() + except LyricsNotFound: + continue + else: + break # Some sugar meta_tags["year"], *_ = meta_tags["release_date"].split("-") From 4f6cae9f80dfd945d6660ff238fed114c15076f5 Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Wed, 24 Jul 2019 11:29:37 +0530 Subject: [PATCH 5/7] Update CHANGES.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index c0b41bd..99564dc 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - ### Changed -- +- Fetch lyrics from Genius and fallback to LyricWikia if not found ([@ritiek](https://github.com/ritiek)) (#585) ## [1.2.2] - 2019-06-03 ### Fixed From 568ddc52abd7ace0c613e5b19f8945bbea057fda Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Wed, 24 Jul 2019 11:50:10 +0530 Subject: [PATCH 6/7] Automatically retry randomly failed Travis jobs --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7f74bf7..08afa00 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,7 @@ install: - tinydownload 07426048687547254773 -o ~/bin/ffmpeg - chmod 755 ~/bin/ffmpeg - xdg-user-dirs-update -script: pytest --cov=. +script: travis_retry pytest --cov=. after_success: - pip install codecov - codecov From 34ea3ea91b2ff955a4f3e914c7d25181b317d52c Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Thu, 25 Jul 2019 11:41:07 +0530 Subject: [PATCH 7/7] Mention about Genius lyric provider --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2020a7e..2a783c9 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ - Can also download a song by entering its artist and song name (in case if you don't have the Spotify's HTTP link for some song). - Automatically applies metadata to the downloaded song which includes: - - `Title`, `Artist`, `Album`, `Album art`, `Lyrics` (if found on [lyrics wikia](http://lyrics.wikia.com)), `Album artist`, `Genre`, `Track number`, `Disc number`, `Release date`, and more... + - `Title`, `Artist`, `Album`, `Album art`, `Lyrics` (if found either on [Genius](https://genius.com/) or [LyricsWikia](http://lyrics.wikia.com)), `Album artist`, `Genre`, `Track number`, `Disc number`, `Release date`, and more... - Works straight out of the box and does not require you to generate or mess with your API keys (already included).