Add additional methods to fetch lyrics

The following inputs can now be used to fetch lyrics: * artist and track names * search query * direct url
2026-07-24 10:41:53 +00:00 · 2020-04-08 21:43:58 +05:30
parent 51da0b7a29
commit 47247f7250
7 changed files with 210 additions and 76 deletions
@@ -1,6 +1,8 @@
 from spotdl.metadata.providers import ProviderSpotify
 from spotdl.metadata.providers import ProviderYouTube
 from spotdl.metadata.embedders import EmbedderDefault
+from spotdl.lyrics.providers import LyricWikia
+from spotdl.lyrics.providers import Genius

 from spotdl.track import Track

@@ -10,7 +12,7 @@ import urllib.request
 import threading


-def search_metadata(track):
+def search_metadata(track, lyrics=True):
    youtube = ProviderYouTube()
    if spotdl.util.is_spotify(track):
        spotify = ProviderSpotify()
@@ -34,8 +36,7 @@ def search_metadata(track):
    return metadata


-def download_track(metadata,
-            dry_run=False, overwrite="prompt", output_ext="mp3", file_format="{artist} - {track-name}", log_fmt="{artist} - {track_name}"):
+def download_track(metadata, arguments):
    # TODO: CONFIG.YML
    #       Exit here if config.dry_run

@@ -52,10 +53,16 @@ def download_track(metadata,

    track = Track(metadata, cache_albumart=True)
    track.download_while_re_encoding("test.mp3")
+
+    # TODO: CONFIG.YML
+    #       Skip metadata if config.no_metadata
+
    track.apply_metadata("test.mp3")


-def download_tracks_from_file(path):
+def download_tracks_from_file(path, arguments):
+    # FIXME: Can we make this function cleaner?
+
    # log.info(
    #     "Checking and removing any duplicate tracks "
    #     "in reading {}".format(path)
@@ -12,16 +12,23 @@ class LyricBase(ABC):
    """

    @abstractmethod
-    def __init__(self, artist, track):
-        """
-        This method must set any protected attributes,
-        which may be modified from outside the class
-        if the need arises.
-        """
-        pass
-
-    @abstractmethod
-    def get_lyrics(self, linesep="\n", timeout=None):
+    def from_url(self, url, linesep="\n", timeout=None):
+        """
+        This method must return the lyrics string for the
+        given track.
+        """
+        pass
+
+    @abstractmethod
+    def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
+        """
+        This method must return the lyrics string for the
+        given track.
+        """
+        pass
+
+    @abstractmethod
+    def from_query(self, query, linesep="\n", timeout=None):
        """
        This method must return the lyrics string for the
        given track.
@@ -1,34 +1,38 @@
 from bs4 import BeautifulSoup
 import urllib.request
+import json

 from spotdl.lyrics.lyric_base import LyricBase
 from spotdl.lyrics.exceptions import LyricsNotFoundError

 BASE_URL = "https://genius.com"
+BASE_SEARCH_URL = BASE_URL + "/api/search/multi?per_page=1&q="

+# FIXME: Make Genius a metadata provider instead of lyric provider
+#        Since, Genius parses additional metadata too (such as track
+#        name, artist name, albumart url). For example, fetch this URL:
+#        https://genius.com/api/search/multi?per_page=1&q=artist+trackname

 class Genius(LyricBase):
-    def __init__(self, artist, track):
-        self.artist = artist
-        self.track = track
+    def __init__(self):
        self.base_url = BASE_URL
+        self.base_search_url = BASE_SEARCH_URL

-    def _guess_lyric_url(self):
+    def guess_lyric_url_from_artist_and_track(self, artist, track):
        """
-        Returns the possible lyric URL for the track available
-        on Genius. This may not always be a valid URL, but this
-        is apparently the best we can do at the moment?
+        Returns the possible lyric URL for the track available on
+        Genius. This may not always be a valid URL.
        """
-        query = "/{} {} lyrics".format(self.artist, self.track)
+        query = "/{} {} lyrics".format(artist, track)
        query = query.replace(" ", "-")
        encoded_query = urllib.request.quote(query)
        lyric_url = self.base_url + encoded_query
        return lyric_url

-    def _fetch_page(self, url, timeout=None):
+    def _fetch_url_page(self, url, timeout=None):
        """
-        Makes a GET request to the given URL and returns the
-        HTML content in the case of a valid response.
+        Makes a GET request to the given lyrics page URL and returns
+        the HTML content in the case of a valid response.
        """
        request = urllib.request.Request(url)
        request.add_header("User-Agent", "urllib")
@@ -36,17 +40,14 @@ class Genius(LyricBase):
            response = urllib.request.urlopen(request, timeout=timeout)
        except urllib.request.HTTPError:
            raise LyricsNotFoundError(
-                "Could not find lyrics for {} - {} at URL: {}".format(
-                    self.artist, self.track, url
-                )
+                "Could not find lyrics at URL: {}".format(url)
            )
        else:
            return response.read()

    def _get_lyrics_text(self, html):
        """
-        Extracts and returns the lyric content from the
-        provided HTML.
+        Extracts and returns the lyric content from the provided HTML.
        """
        soup = BeautifulSoup(html, "html.parser")
        lyrics_paragraph = soup.find("p")
@@ -57,11 +58,52 @@ class Genius(LyricBase):
                "The lyrics for this track are yet to be released."
            )

-    def get_lyrics(self, linesep="\n", timeout=None):
+    def _fetch_search_page(self, url, timeout=None):
        """
-        Returns the lyric string for the given artist and track.
+        Returns search results from a given URL in JSON.
        """
-        url = self._guess_lyric_url()
-        html_page = self._fetch_page(url, timeout=timeout)
-        lyrics = self._get_lyrics_text(html_page)
+        request = urllib.request.Request(url)
+        request.add_header("User-Agent", "urllib")
+        response = urllib.request.urlopen(request, timeout=timeout)
+        metadata = json.loads(response.read())
+        if len(metadata["response"]["sections"][0]["hits"]) == 0:
+            raise LyricsNotFoundError(
+                "Could not find any search results for URL: {}".format(url)
+            )
+        return metadata
+
+    def best_matching_lyric_url_from_query(self, query):
+        """
+        Returns the best matching track's URL from a given query.
+        """
+        encoded_query = query.replace(" ", "+")
+        search_url = self.base_search_url + encoded_query
+        metadata = self._fetch_search_page(search_url)
+        lyric_url = metadata["response"]["sections"][0]["hits"][0]["result"]["path"]
+        return self.base_url + lyric_url
+
+    def from_query(self, query, linesep="\n", timeout=None):
+        """
+        Returns the lyric string for the track best matching the
+        given query.
+        """
+        lyric_url = self.best_matching_lyric_url_from_query(query)
+        return self.from_url(lyric_url, linesep, timeout=timeout)
+
+    def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
+        """
+        Returns the lyric string for the given artist and track
+        by making scraping search results and fetching the first
+        result.
+        """
+        lyric_url = self.guess_lyric_url_from_artist_and_track(artist, track)
+        return self.from_url(lyric_url, linesep, timeout)
+
+    def from_url(self, url, linesep="\n", timeout=None):
+        """
+        Returns the lyric string for the given URL.
+        """
+        lyric_html_page = self._fetch_url_page(url, timeout=timeout)
+        lyrics = self._get_lyrics_text(lyric_html_page)
        return lyrics.replace("\n", linesep)
+
@@ -5,17 +5,20 @@ from spotdl.lyrics.exceptions import LyricsNotFoundError


 class LyricWikia(LyricBase):
-    def __init__(self, artist, track):
-        self.artist = artist
-        self.track = track
+    def from_query(self, query, linesep="\n", timeout=None):
+        raise NotImplementedError

-    def get_lyrics(self, linesep="\n", timeout=None):
+    def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
        """
        Returns the lyric string for the given artist and track.
        """
        try:
-            lyrics = lyricwikia.get_lyrics(self.artist, self.track, linesep, timeout)
+            lyrics = lyricwikia.get_lyrics(artist, track, linesep, timeout)
        except lyricwikia.LyricsNotFound as e:
            raise LyricsNotFoundError(e.args[0])
-        else:
-            return lyrics
+
+        return lyrics
+
+    def from_url(self, url, linesep="\n", timeout=None):
+        raise NotImplementedError
+
@@ -3,35 +3,114 @@ from spotdl.lyrics import exceptions
 from spotdl.lyrics.providers import Genius

 import urllib.request
+import json
 import pytest

-
 class TestGenius:
    def test_subclass(self):
        assert issubclass(Genius, LyricBase)

+    @pytest.fixture(scope="module")
+    def expect_lyrics_count(self):
+        # This is the number of characters in lyrics found
+        # for the track in `lyric_url` fixture below
+        return 1845
+
+    @pytest.fixture(scope="module")
+    def genius(self):
+        return Genius()
+
+    def test_base_url(self, genius):
+        assert genius.base_url == "https://genius.com"
+
+    @pytest.fixture(scope="module")
+    def artist(self):
+        return "selena gomez"
+
    @pytest.fixture(scope="module")
    def track(self):
-        return Genius("artist", "track")
+        return "wolves"

-    def test_base_url(self, track):
-        assert track.base_url == "https://genius.com"
+    @pytest.fixture(scope="module")
+    def query(self, artist, track):
+        return "{} {}".format(artist, track)

-    def test_get_lyrics(self, track, monkeypatch):
-        def mocked_urlopen(url, timeout=None):
-            class DummyHTTPResponse:
-                def read(self):
-                    return "<p>amazing lyrics!</p>"
+    @pytest.fixture(scope="module")
+    def guess_url(self, query):
+        return "https://genius.com/selena-gomez-wolves-lyrics"

-            return DummyHTTPResponse()
+    @pytest.fixture(scope="module")
+    def lyric_url(self):
+        return "https://genius.com/Selena-gomez-and-marshmello-wolves-lyrics"

-        monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
-        assert track.get_lyrics() == "amazing lyrics!"
+    def test_guess_lyric_url_from_artist_and_track(self, genius, artist, track, guess_url):
+        url = genius.guess_lyric_url_from_artist_and_track(artist, track)
+        assert url == guess_url

-    def test_lyrics_not_found_error(self, track, monkeypatch):
-        def mocked_urlopen(url, timeout=None):
+    class MockHTTPResponse:
+        expect_lyrics = ""
+
+        def __init__(self, request, timeout=None):
+            search_results_url = "https://genius.com/api/search/multi?per_page=1&q=selena+gomez+wolves"
+            if request._full_url == search_results_url:
+                read_method = lambda: json.dumps({
+                    "response": {"sections": [{"hits": [{"result": {
+                        "path": "/Selena-gomez-and-marshmello-wolves-lyrics"
+                    } }] }] }
+                })
+            else:
+                read_method = lambda: "<p>" + self.expect_lyrics + "</p>"
+
+            self.read = read_method
+
+    @pytest.mark.network
+    def test_best_matching_lyric_url_from_query(self, genius, query, lyric_url):
+        url = genius.best_matching_lyric_url_from_query(query)
+        assert url == lyric_url
+
+    def test_mock_best_matching_lyric_url_from_query(self, genius, query, lyric_url, monkeypatch):
+        monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
+        self.test_best_matching_lyric_url_from_query(genius, query, lyric_url)
+
+    @pytest.mark.network
+    def test_from_url(self, genius, lyric_url, expect_lyrics_count):
+        lyrics = genius.from_url(lyric_url)
+        assert len(lyrics) == expect_lyrics_count
+
+    def test_mock_from_url(self, genius, lyric_url, expect_lyrics_count, monkeypatch):
+        self.MockHTTPResponse.expect_lyrics = "a" * expect_lyrics_count
+        monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
+        self.test_from_url(genius, lyric_url, expect_lyrics_count)
+
+    @pytest.mark.network
+    def test_from_artist_and_track(self, genius, artist, track, expect_lyrics_count):
+        lyrics = genius.from_artist_and_track(artist, track)
+        assert len(lyrics) == expect_lyrics_count
+
+    def test_mock_from_artist_and_track(self, genius, artist, track, expect_lyrics_count, monkeypatch):
+        self.MockHTTPResponse.expect_lyrics = "a" * expect_lyrics_count
+        monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
+        self.test_from_artist_and_track(genius, artist, track, expect_lyrics_count)
+
+    @pytest.mark.network
+    def test_from_query(self, genius, query, expect_lyrics_count):
+        lyrics = genius.from_query(query)
+        assert len(lyrics) == expect_lyrics_count
+
+    def test_mock_from_query(self, genius, query, expect_lyrics_count, monkeypatch):
+        self.MockHTTPResponse.expect_lyrics = "a" * expect_lyrics_count
+        monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
+        self.test_from_query(genius, query, expect_lyrics_count)
+
+    @pytest.mark.network
+    def test_lyrics_not_found_error(self, genius):
+        with pytest.raises(exceptions.LyricsNotFoundError):
+            genius.from_artist_and_track(self, "nonexistent_artist", "nonexistent_track")
+
+    def test_mock_lyrics_not_found_error(self, genius, monkeypatch):
+        def mock_urlopen(url, timeout=None):
            raise urllib.request.HTTPError("", "", "", "", "")

-        monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen)
-        with pytest.raises(exceptions.LyricsNotFoundError):
-            track.get_lyrics()
+        monkeypatch.setattr("urllib.request.urlopen", mock_urlopen)
+        self.test_lyrics_not_found_error(genius)
+
@@ -11,15 +11,16 @@ class TestLyricWikia:
    def test_subclass(self):
        assert issubclass(LyricWikia, LyricBase)

-    def test_get_lyrics(self, monkeypatch):
+    def test_from_artist_and_track(self, monkeypatch):
        # `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics`
        # internally and there is no need to test a 3rd party library as they
        # have their own implementation of tests.
        monkeypatch.setattr(
            "lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!"
        )
-        track = LyricWikia("Lyricwikia", "Lyricwikia")
-        assert track.get_lyrics() == "awesome lyrics!"
+        artist, track = "selena gomez", "wolves"
+        lyrics = LyricWikia().from_artist_and_track(artist, track)
+        assert lyrics == "awesome lyrics!"

    def test_lyrics_not_found_error(self, monkeypatch):
        def lyricwikia_lyrics_not_found(msg):
@@ -30,6 +31,6 @@ class TestLyricWikia:
            "lyricwikia.get_lyrics",
            lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics."),
        )
-        track = LyricWikia("Lyricwikia", "Lyricwikia")
+        artist, track = "nonexistent_artist", "nonexistent_track"
        with pytest.raises(exceptions.LyricsNotFoundError):
-            track.get_lyrics()
+            LyricWikia().from_artist_and_track(artist, track)
@@ -5,25 +5,20 @@ import pytest

 class TestAbstractBaseClass:
    def test_error_abstract_base_class_lyricbase(self):
-        artist = "awesome artist"
-        track = "amazing track"
-
        with pytest.raises(TypeError):
            # This abstract base class must be inherited from
            # for instantiation
-            LyricBase(artist, track)
-
+            LyricBase()

    def test_inherit_abstract_base_class_encoderbase(self):
        class LyricKid(LyricBase):
-            def __init__(self, artist, track):
-                super().__init__(artist, track)
+            def from_query(self, query):
+                raise NotImplementedError

-            def get_lyrics(self):
+            def from_artist_and_track(self, artist, track):
                pass

+            def from_url(self, url):
+                raise NotImplementedError

-        artist = "awesome artist"
-        track = "amazing track"
-
-        LyricKid(artist, track)
+        LyricKid()