mirror of
				https://github.com/KevinMidboe/spotify-downloader.git
				synced 2025-10-29 18:00:15 +00:00 
			
		
		
		
	Merge pull request #585 from ritiek/refactor
Scrape lyrics from Genius and lyrics refactor
This commit is contained in:
		| @@ -40,7 +40,7 @@ install: | ||||
|   - tinydownload 07426048687547254773 -o ~/bin/ffmpeg | ||||
|   - chmod 755 ~/bin/ffmpeg | ||||
|   - xdg-user-dirs-update | ||||
| script: python -m pytest test --cov=. | ||||
| script: travis_retry pytest --cov=. | ||||
| after_success: | ||||
|   - pip install codecov | ||||
|   - codecov | ||||
|   | ||||
| @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. | ||||
| - | ||||
|  | ||||
| ### Changed | ||||
| - | ||||
| - Fetch lyrics from Genius and fallback to LyricWikia if not found ([@ritiek](https://github.com/ritiek)) (#585) | ||||
|  | ||||
| ## [1.2.2] - 2019-06-03 | ||||
| ### Fixed | ||||
|   | ||||
| @@ -24,7 +24,7 @@ don't feel bad. Open an issue any way! | ||||
| unless mentioned otherwise. | ||||
| - Code should be formatted using [black](https://github.com/ambv/black). Don't worry if you forgot or don't know how to do this, the codebase will be black-formatted with each release. | ||||
| - All tests are placed in the [test directory](https://github.com/ritiek/spotify-downloader/tree/master/test). We use [pytest](https://github.com/pytest-dev/pytest) | ||||
| to run the test suite: `$ python3 -m pytest test`. | ||||
| to run the test suite: `$ pytest`. | ||||
| If you don't have pytest, you can install it with `$ pip3 install pytest`. | ||||
| - Add a note about the changes, your GitHub username and a reference to the PR to the `Unreleased` section of the [`CHANGES.md`](CHANGES.md) file (see existing releases for examples), add the appropriate section ("Added", "Changed", "Fixed" etc.) if necessary. You don't have to increment version numbers. See https://keepachangelog.com/en/1.0.0/ for more information. | ||||
| - If you are planning to work on something big, let us know through an issue. So we can discuss more about it. | ||||
|   | ||||
| @@ -11,7 +11,7 @@ | ||||
| - Can also download a song by entering its artist and song name (in case if you don't have the Spotify's HTTP link for some song). | ||||
| - Automatically applies metadata to the downloaded song which includes: | ||||
|  | ||||
|   - `Title`, `Artist`, `Album`, `Album art`, `Lyrics` (if found on [lyrics wikia](http://lyrics.wikia.com)), `Album artist`, `Genre`, `Track number`, `Disc number`, `Release date`, and more... | ||||
|   - `Title`, `Artist`, `Album`, `Album art`, `Lyrics` (if found either on [Genius](https://genius.com/) or [LyricsWikia](http://lyrics.wikia.com)), `Album artist`, `Genre`, `Track number`, `Disc number`, `Release date`, and more... | ||||
|  | ||||
| - Works straight out of the box and does not require you to generate or mess with your API keys (already included). | ||||
|  | ||||
| @@ -74,7 +74,7 @@ Check out [CONTRIBUTING.md](CONTRIBUTING.md) for more info. | ||||
| ## Running Tests | ||||
|  | ||||
| ```console | ||||
| $ python3 -m pytest test | ||||
| $ pytest | ||||
| ``` | ||||
|  | ||||
| Obviously this requires the `pytest` module to be installed. | ||||
|   | ||||
							
								
								
									
										1
									
								
								spotdl/lyrics/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								spotdl/lyrics/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| from spotdl.lyrics.lyric_base import LyricBase | ||||
							
								
								
									
										5
									
								
								spotdl/lyrics/exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								spotdl/lyrics/exceptions.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| class LyricsNotFound(Exception): | ||||
|     __module__ = Exception.__module__ | ||||
|  | ||||
|     def __init__(self, message=None): | ||||
|         super(LyricsNotFound, self).__init__(message) | ||||
							
								
								
									
										14
									
								
								spotdl/lyrics/lyric_base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								spotdl/lyrics/lyric_base.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| import lyricwikia | ||||
|  | ||||
| from abc import ABC | ||||
| from abc import abstractmethod | ||||
|  | ||||
|  | ||||
| class LyricBase(ABC): | ||||
|     @abstractmethod | ||||
|     def __init__(self, artist, song): | ||||
|         pass | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_lyrics(self, linesep="\n", timeout=None): | ||||
|         pass | ||||
							
								
								
									
										4
									
								
								spotdl/lyrics/providers/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								spotdl/lyrics/providers/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| from spotdl.lyrics.providers.genius import Genius | ||||
| from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia | ||||
|  | ||||
| LyricClasses = (Genius, LyricWikia) | ||||
							
								
								
									
										47
									
								
								spotdl/lyrics/providers/genius.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								spotdl/lyrics/providers/genius.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| from bs4 import BeautifulSoup | ||||
| import urllib.request | ||||
|  | ||||
| from spotdl.lyrics.lyric_base import LyricBase | ||||
| from spotdl.lyrics.exceptions import LyricsNotFound | ||||
|  | ||||
| BASE_URL = "https://genius.com" | ||||
|  | ||||
|  | ||||
| class Genius(LyricBase): | ||||
|     def __init__(self, artist, song): | ||||
|         self.artist = artist | ||||
|         self.song = song | ||||
|         self.base_url = BASE_URL | ||||
|  | ||||
|     def _guess_lyric_url(self): | ||||
|         query = "/{} {} lyrics".format(self.artist, self.song) | ||||
|         query = query.replace(" ", "-") | ||||
|         encoded_query = urllib.request.quote(query) | ||||
|         lyric_url = self.base_url + encoded_query | ||||
|         return lyric_url | ||||
|  | ||||
|     def _fetch_page(self, url, timeout=None): | ||||
|         request = urllib.request.Request(url) | ||||
|         request.add_header("User-Agent", "urllib") | ||||
|         try: | ||||
|             response = urllib.request.urlopen(request, timeout=timeout) | ||||
|         except urllib.request.HTTPError: | ||||
|             raise LyricsNotFound( | ||||
|                 "Could not find lyrics for {} - {} at URL: {}".format( | ||||
|                     self.artist, self.song, url | ||||
|                 ) | ||||
|             ) | ||||
|         else: | ||||
|             return response.read() | ||||
|  | ||||
|     def _get_lyrics_text(self, html): | ||||
|         soup = BeautifulSoup(html, "html.parser") | ||||
|         lyrics_paragraph = soup.find("p") | ||||
|         lyrics = lyrics_paragraph.get_text() | ||||
|         return lyrics | ||||
|  | ||||
|     def get_lyrics(self, linesep="\n", timeout=None): | ||||
|         url = self._guess_lyric_url() | ||||
|         html_page = self._fetch_page(url, timeout=timeout) | ||||
|         lyrics = self._get_lyrics_text(html_page) | ||||
|         return lyrics.replace("\n", linesep) | ||||
							
								
								
									
										18
									
								
								spotdl/lyrics/providers/lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								spotdl/lyrics/providers/lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| import lyricwikia | ||||
|  | ||||
| from spotdl.lyrics.lyric_base import LyricBase | ||||
| from spotdl.lyrics.exceptions import LyricsNotFound | ||||
|  | ||||
|  | ||||
| class LyricWikia(LyricBase): | ||||
|     def __init__(self, artist, song): | ||||
|         self.artist = artist | ||||
|         self.song = song | ||||
|  | ||||
|     def get_lyrics(self, linesep="\n", timeout=None): | ||||
|         try: | ||||
|             lyrics = lyricwikia.get_lyrics(self.artist, self.song, linesep, timeout) | ||||
|         except lyricwikia.LyricsNotFound as e: | ||||
|             raise LyricsNotFound(e.args[0]) | ||||
|         else: | ||||
|             return lyrics | ||||
							
								
								
									
										0
									
								
								spotdl/lyrics/providers/tests/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								spotdl/lyrics/providers/tests/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										39
									
								
								spotdl/lyrics/providers/tests/test_genius.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								spotdl/lyrics/providers/tests/test_genius.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| from spotdl.lyrics import LyricBase | ||||
| from spotdl.lyrics import exceptions | ||||
| from spotdl.lyrics.providers import Genius | ||||
|  | ||||
| import urllib.request | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| class TestGenius: | ||||
|     def test_subclass(self): | ||||
|         assert issubclass(Genius, LyricBase) | ||||
|  | ||||
|     @pytest.fixture(scope="module") | ||||
|     def track(self): | ||||
|         return Genius("artist", "song") | ||||
|  | ||||
|     def test_base_url(self, track): | ||||
|         assert track.base_url == "https://genius.com" | ||||
|  | ||||
|     def test_get_lyrics(self, track, monkeypatch): | ||||
|  | ||||
|         def mocked_urlopen(url, timeout=None): | ||||
|             class DummyHTTPResponse: | ||||
|                 def read(self): | ||||
|                     return "<p>amazing lyrics!</p>" | ||||
|  | ||||
|             return DummyHTTPResponse() | ||||
|  | ||||
|         monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) | ||||
|         assert track.get_lyrics() == "amazing lyrics!" | ||||
|  | ||||
|     def test_lyrics_not_found_error(self, track, monkeypatch): | ||||
|  | ||||
|         def mocked_urlopen(url, timeout=None): | ||||
|             raise urllib.request.HTTPError("", "", "", "", "") | ||||
|  | ||||
|         monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) | ||||
|         with pytest.raises(exceptions.LyricsNotFound): | ||||
|             track.get_lyrics() | ||||
							
								
								
									
										31
									
								
								spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| import lyricwikia | ||||
|  | ||||
| from spotdl.lyrics import LyricBase | ||||
| from spotdl.lyrics import exceptions | ||||
| from spotdl.lyrics.providers import LyricWikia | ||||
|  | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| class TestLyricWikia: | ||||
|     def test_subclass(self): | ||||
|         assert issubclass(LyricWikia, LyricBase) | ||||
|  | ||||
|     def test_get_lyrics(self, monkeypatch): | ||||
|         # `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics` | ||||
|         # internally and there is no need to test a 3rd party library as they | ||||
|         # have their own implementation of tests. | ||||
|         monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!") | ||||
|         track = LyricWikia("Lyricwikia", "Lyricwikia") | ||||
|         assert track.get_lyrics() == "awesome lyrics!" | ||||
|  | ||||
|     def test_lyrics_not_found_error(self, monkeypatch): | ||||
|  | ||||
|         def lyricwikia_lyrics_not_found(msg): | ||||
|             raise lyricwikia.LyricsNotFound(msg) | ||||
|  | ||||
|         # Wrap `lyricwikia.LyricsNotFound` with `exceptions.LyricsNotFound` error. | ||||
|         monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics.")) | ||||
|         track = LyricWikia("Lyricwikia", "Lyricwikia") | ||||
|         with pytest.raises(exceptions.LyricsNotFound): | ||||
|             track.get_lyrics() | ||||
| @@ -1,6 +1,5 @@ | ||||
| import spotipy | ||||
| import spotipy.oauth2 as oauth2 | ||||
| import lyricwikia | ||||
|  | ||||
| from slugify import slugify | ||||
| from titlecase import titlecase | ||||
| @@ -12,10 +11,14 @@ import functools | ||||
|  | ||||
| from spotdl import const | ||||
| from spotdl import internals | ||||
| from spotdl.lyrics.providers import LyricClasses | ||||
| from spotdl.lyrics.exceptions import LyricsNotFound | ||||
|  | ||||
| spotify = None | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def generate_token(): | ||||
|     """ Generate the token. """ | ||||
|     credentials = oauth2.SpotifyClientCredentials( | ||||
| @@ -74,14 +77,17 @@ def generate_metadata(raw_song): | ||||
|     meta_tags[u"total_tracks"] = album["tracks"]["total"] | ||||
|  | ||||
|     log.debug("Fetching lyrics") | ||||
|  | ||||
|     try: | ||||
|         meta_tags["lyrics"] = lyricwikia.get_lyrics( | ||||
|             meta_tags["artists"][0]["name"], meta_tags["name"] | ||||
|         ) | ||||
|     except lyricwikia.LyricsNotFound: | ||||
|     meta_tags["lyrics"] = None | ||||
|  | ||||
|     for LyricClass in LyricClasses: | ||||
|         track = LyricClass(meta_tags["artists"][0]["name"], meta_tags["name"]) | ||||
|         try: | ||||
|             meta_tags["lyrics"] = track.get_lyrics() | ||||
|         except LyricsNotFound: | ||||
|             continue | ||||
|         else: | ||||
|             break | ||||
|  | ||||
|     # Some sugar | ||||
|     meta_tags["year"], *_ = meta_tags["release_date"].split("-") | ||||
|     meta_tags["duration"] = meta_tags["duration_ms"] / 1000.0 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user