mirror of
				https://github.com/KevinMidboe/spotify-downloader.git
				synced 2025-10-29 18:00:15 +00:00 
			
		
		
		
	Merge pull request #585 from ritiek/refactor
Scrape lyrics from Genius and lyrics refactor
This commit is contained in:
		@@ -40,7 +40,7 @@ install:
 | 
			
		||||
  - tinydownload 07426048687547254773 -o ~/bin/ffmpeg
 | 
			
		||||
  - chmod 755 ~/bin/ffmpeg
 | 
			
		||||
  - xdg-user-dirs-update
 | 
			
		||||
script: python -m pytest test --cov=.
 | 
			
		||||
script: travis_retry pytest --cov=.
 | 
			
		||||
after_success:
 | 
			
		||||
  - pip install codecov
 | 
			
		||||
  - codecov
 | 
			
		||||
 
 | 
			
		||||
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 | 
			
		||||
-
 | 
			
		||||
 | 
			
		||||
### Changed
 | 
			
		||||
-
 | 
			
		||||
- Fetch lyrics from Genius and fallback to LyricWikia if not found ([@ritiek](https://github.com/ritiek)) (#585)
 | 
			
		||||
 | 
			
		||||
## [1.2.2] - 2019-06-03
 | 
			
		||||
### Fixed
 | 
			
		||||
 
 | 
			
		||||
@@ -24,7 +24,7 @@ don't feel bad. Open an issue any way!
 | 
			
		||||
unless mentioned otherwise.
 | 
			
		||||
- Code should be formatted using [black](https://github.com/ambv/black). Don't worry if you forgot or don't know how to do this, the codebase will be black-formatted with each release.
 | 
			
		||||
- All tests are placed in the [test directory](https://github.com/ritiek/spotify-downloader/tree/master/test). We use [pytest](https://github.com/pytest-dev/pytest)
 | 
			
		||||
to run the test suite: `$ python3 -m pytest test`.
 | 
			
		||||
to run the test suite: `$ pytest`.
 | 
			
		||||
If you don't have pytest, you can install it with `$ pip3 install pytest`.
 | 
			
		||||
- Add a note about the changes, your GitHub username and a reference to the PR to the `Unreleased` section of the [`CHANGES.md`](CHANGES.md) file (see existing releases for examples), add the appropriate section ("Added", "Changed", "Fixed" etc.) if necessary. You don't have to increment version numbers. See https://keepachangelog.com/en/1.0.0/ for more information.
 | 
			
		||||
- If you are planning to work on something big, let us know through an issue. So we can discuss more about it.
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@
 | 
			
		||||
- Can also download a song by entering its artist and song name (in case if you don't have the Spotify's HTTP link for some song).
 | 
			
		||||
- Automatically applies metadata to the downloaded song which includes:
 | 
			
		||||
 | 
			
		||||
  - `Title`, `Artist`, `Album`, `Album art`, `Lyrics` (if found on [lyrics wikia](http://lyrics.wikia.com)), `Album artist`, `Genre`, `Track number`, `Disc number`, `Release date`, and more...
 | 
			
		||||
  - `Title`, `Artist`, `Album`, `Album art`, `Lyrics` (if found either on [Genius](https://genius.com/) or [LyricsWikia](http://lyrics.wikia.com)), `Album artist`, `Genre`, `Track number`, `Disc number`, `Release date`, and more...
 | 
			
		||||
 | 
			
		||||
- Works straight out of the box and does not require you to generate or mess with your API keys (already included).
 | 
			
		||||
 | 
			
		||||
@@ -74,7 +74,7 @@ Check out [CONTRIBUTING.md](CONTRIBUTING.md) for more info.
 | 
			
		||||
## Running Tests
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ python3 -m pytest test
 | 
			
		||||
$ pytest
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Obviously this requires the `pytest` module to be installed.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								spotdl/lyrics/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								spotdl/lyrics/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
from spotdl.lyrics.lyric_base import LyricBase
 | 
			
		||||
							
								
								
									
										5
									
								
								spotdl/lyrics/exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								spotdl/lyrics/exceptions.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,5 @@
 | 
			
		||||
class LyricsNotFound(Exception):
 | 
			
		||||
    __module__ = Exception.__module__
 | 
			
		||||
 | 
			
		||||
    def __init__(self, message=None):
 | 
			
		||||
        super(LyricsNotFound, self).__init__(message)
 | 
			
		||||
							
								
								
									
										14
									
								
								spotdl/lyrics/lyric_base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								spotdl/lyrics/lyric_base.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
			
		||||
import lyricwikia
 | 
			
		||||
 | 
			
		||||
from abc import ABC
 | 
			
		||||
from abc import abstractmethod
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LyricBase(ABC):
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def __init__(self, artist, song):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_lyrics(self, linesep="\n", timeout=None):
 | 
			
		||||
        pass
 | 
			
		||||
							
								
								
									
										4
									
								
								spotdl/lyrics/providers/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								spotdl/lyrics/providers/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,4 @@
 | 
			
		||||
from spotdl.lyrics.providers.genius import Genius
 | 
			
		||||
from spotdl.lyrics.providers.lyricwikia_wrapper import LyricWikia
 | 
			
		||||
 | 
			
		||||
LyricClasses = (Genius, LyricWikia)
 | 
			
		||||
							
								
								
									
										47
									
								
								spotdl/lyrics/providers/genius.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								spotdl/lyrics/providers/genius.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,47 @@
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
import urllib.request
 | 
			
		||||
 | 
			
		||||
from spotdl.lyrics.lyric_base import LyricBase
 | 
			
		||||
from spotdl.lyrics.exceptions import LyricsNotFound
 | 
			
		||||
 | 
			
		||||
BASE_URL = "https://genius.com"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Genius(LyricBase):
 | 
			
		||||
    def __init__(self, artist, song):
 | 
			
		||||
        self.artist = artist
 | 
			
		||||
        self.song = song
 | 
			
		||||
        self.base_url = BASE_URL
 | 
			
		||||
 | 
			
		||||
    def _guess_lyric_url(self):
 | 
			
		||||
        query = "/{} {} lyrics".format(self.artist, self.song)
 | 
			
		||||
        query = query.replace(" ", "-")
 | 
			
		||||
        encoded_query = urllib.request.quote(query)
 | 
			
		||||
        lyric_url = self.base_url + encoded_query
 | 
			
		||||
        return lyric_url
 | 
			
		||||
 | 
			
		||||
    def _fetch_page(self, url, timeout=None):
 | 
			
		||||
        request = urllib.request.Request(url)
 | 
			
		||||
        request.add_header("User-Agent", "urllib")
 | 
			
		||||
        try:
 | 
			
		||||
            response = urllib.request.urlopen(request, timeout=timeout)
 | 
			
		||||
        except urllib.request.HTTPError:
 | 
			
		||||
            raise LyricsNotFound(
 | 
			
		||||
                "Could not find lyrics for {} - {} at URL: {}".format(
 | 
			
		||||
                    self.artist, self.song, url
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            return response.read()
 | 
			
		||||
 | 
			
		||||
    def _get_lyrics_text(self, html):
 | 
			
		||||
        soup = BeautifulSoup(html, "html.parser")
 | 
			
		||||
        lyrics_paragraph = soup.find("p")
 | 
			
		||||
        lyrics = lyrics_paragraph.get_text()
 | 
			
		||||
        return lyrics
 | 
			
		||||
 | 
			
		||||
    def get_lyrics(self, linesep="\n", timeout=None):
 | 
			
		||||
        url = self._guess_lyric_url()
 | 
			
		||||
        html_page = self._fetch_page(url, timeout=timeout)
 | 
			
		||||
        lyrics = self._get_lyrics_text(html_page)
 | 
			
		||||
        return lyrics.replace("\n", linesep)
 | 
			
		||||
							
								
								
									
										18
									
								
								spotdl/lyrics/providers/lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								spotdl/lyrics/providers/lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,18 @@
 | 
			
		||||
import lyricwikia
 | 
			
		||||
 | 
			
		||||
from spotdl.lyrics.lyric_base import LyricBase
 | 
			
		||||
from spotdl.lyrics.exceptions import LyricsNotFound
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LyricWikia(LyricBase):
 | 
			
		||||
    def __init__(self, artist, song):
 | 
			
		||||
        self.artist = artist
 | 
			
		||||
        self.song = song
 | 
			
		||||
 | 
			
		||||
    def get_lyrics(self, linesep="\n", timeout=None):
 | 
			
		||||
        try:
 | 
			
		||||
            lyrics = lyricwikia.get_lyrics(self.artist, self.song, linesep, timeout)
 | 
			
		||||
        except lyricwikia.LyricsNotFound as e:
 | 
			
		||||
            raise LyricsNotFound(e.args[0])
 | 
			
		||||
        else:
 | 
			
		||||
            return lyrics
 | 
			
		||||
							
								
								
									
										0
									
								
								spotdl/lyrics/providers/tests/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								spotdl/lyrics/providers/tests/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										39
									
								
								spotdl/lyrics/providers/tests/test_genius.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								spotdl/lyrics/providers/tests/test_genius.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
from spotdl.lyrics import LyricBase
 | 
			
		||||
from spotdl.lyrics import exceptions
 | 
			
		||||
from spotdl.lyrics.providers import Genius
 | 
			
		||||
 | 
			
		||||
import urllib.request
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestGenius:
 | 
			
		||||
    def test_subclass(self):
 | 
			
		||||
        assert issubclass(Genius, LyricBase)
 | 
			
		||||
 | 
			
		||||
    @pytest.fixture(scope="module")
 | 
			
		||||
    def track(self):
 | 
			
		||||
        return Genius("artist", "song")
 | 
			
		||||
 | 
			
		||||
    def test_base_url(self, track):
 | 
			
		||||
        assert track.base_url == "https://genius.com"
 | 
			
		||||
 | 
			
		||||
    def test_get_lyrics(self, track, monkeypatch):
 | 
			
		||||
 | 
			
		||||
        def mocked_urlopen(url, timeout=None):
 | 
			
		||||
            class DummyHTTPResponse:
 | 
			
		||||
                def read(self):
 | 
			
		||||
                    return "<p>amazing lyrics!</p>"
 | 
			
		||||
 | 
			
		||||
            return DummyHTTPResponse()
 | 
			
		||||
 | 
			
		||||
        monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
 | 
			
		||||
        assert track.get_lyrics() == "amazing lyrics!"
 | 
			
		||||
 | 
			
		||||
    def test_lyrics_not_found_error(self, track, monkeypatch):
 | 
			
		||||
 | 
			
		||||
        def mocked_urlopen(url, timeout=None):
 | 
			
		||||
            raise urllib.request.HTTPError("", "", "", "", "")
 | 
			
		||||
 | 
			
		||||
        monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
 | 
			
		||||
        with pytest.raises(exceptions.LyricsNotFound):
 | 
			
		||||
            track.get_lyrics()
 | 
			
		||||
							
								
								
									
										31
									
								
								spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								spotdl/lyrics/providers/tests/test_lyricwikia_wrapper.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,31 @@
 | 
			
		||||
import lyricwikia
 | 
			
		||||
 | 
			
		||||
from spotdl.lyrics import LyricBase
 | 
			
		||||
from spotdl.lyrics import exceptions
 | 
			
		||||
from spotdl.lyrics.providers import LyricWikia
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestLyricWikia:
 | 
			
		||||
    def test_subclass(self):
 | 
			
		||||
        assert issubclass(LyricWikia, LyricBase)
 | 
			
		||||
 | 
			
		||||
    def test_get_lyrics(self, monkeypatch):
 | 
			
		||||
        # `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics`
 | 
			
		||||
        # internally and there is no need to test a 3rd party library as they
 | 
			
		||||
        # have their own implementation of tests.
 | 
			
		||||
        monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!")
 | 
			
		||||
        track = LyricWikia("Lyricwikia", "Lyricwikia")
 | 
			
		||||
        assert track.get_lyrics() == "awesome lyrics!"
 | 
			
		||||
 | 
			
		||||
    def test_lyrics_not_found_error(self, monkeypatch):
 | 
			
		||||
 | 
			
		||||
        def lyricwikia_lyrics_not_found(msg):
 | 
			
		||||
            raise lyricwikia.LyricsNotFound(msg)
 | 
			
		||||
 | 
			
		||||
        # Wrap `lyricwikia.LyricsNotFound` with `exceptions.LyricsNotFound` error.
 | 
			
		||||
        monkeypatch.setattr("lyricwikia.get_lyrics", lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics."))
 | 
			
		||||
        track = LyricWikia("Lyricwikia", "Lyricwikia")
 | 
			
		||||
        with pytest.raises(exceptions.LyricsNotFound):
 | 
			
		||||
            track.get_lyrics()
 | 
			
		||||
@@ -1,6 +1,5 @@
 | 
			
		||||
import spotipy
 | 
			
		||||
import spotipy.oauth2 as oauth2
 | 
			
		||||
import lyricwikia
 | 
			
		||||
 | 
			
		||||
from slugify import slugify
 | 
			
		||||
from titlecase import titlecase
 | 
			
		||||
@@ -12,10 +11,14 @@ import functools
 | 
			
		||||
 | 
			
		||||
from spotdl import const
 | 
			
		||||
from spotdl import internals
 | 
			
		||||
from spotdl.lyrics.providers import LyricClasses
 | 
			
		||||
from spotdl.lyrics.exceptions import LyricsNotFound
 | 
			
		||||
 | 
			
		||||
spotify = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_token():
 | 
			
		||||
    """ Generate the token. """
 | 
			
		||||
    credentials = oauth2.SpotifyClientCredentials(
 | 
			
		||||
@@ -74,13 +77,16 @@ def generate_metadata(raw_song):
 | 
			
		||||
    meta_tags[u"total_tracks"] = album["tracks"]["total"]
 | 
			
		||||
 | 
			
		||||
    log.debug("Fetching lyrics")
 | 
			
		||||
    meta_tags["lyrics"] = None
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        meta_tags["lyrics"] = lyricwikia.get_lyrics(
 | 
			
		||||
            meta_tags["artists"][0]["name"], meta_tags["name"]
 | 
			
		||||
        )
 | 
			
		||||
    except lyricwikia.LyricsNotFound:
 | 
			
		||||
        meta_tags["lyrics"] = None
 | 
			
		||||
    for LyricClass in LyricClasses:
 | 
			
		||||
        track = LyricClass(meta_tags["artists"][0]["name"], meta_tags["name"])
 | 
			
		||||
        try:
 | 
			
		||||
            meta_tags["lyrics"] = track.get_lyrics()
 | 
			
		||||
        except LyricsNotFound:
 | 
			
		||||
            continue
 | 
			
		||||
        else:
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
    # Some sugar
 | 
			
		||||
    meta_tags["year"], *_ = meta_tags["release_date"].split("-")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user