Add additional methods to fetch lyrics

The following inputs can now be used to fetch lyrics: * artist and track names * search query * direct url
2026-02-15 05:49:25 +00:00 · 2020-04-08 21:43:58 +05:30
parent 51da0b7a29
commit 47247f7250
7 changed files with 210 additions and 76 deletions
--- a/spotdl/lyrics/providers/genius.py
+++ b/spotdl/lyrics/providers/genius.py
@@ -1,34 +1,38 @@
 from bs4 import BeautifulSoup
 import urllib.request
+import json

 from spotdl.lyrics.lyric_base import LyricBase
 from spotdl.lyrics.exceptions import LyricsNotFoundError

 BASE_URL = "https://genius.com"
+BASE_SEARCH_URL = BASE_URL + "/api/search/multi?per_page=1&q="

+# FIXME: Make Genius a metadata provider instead of lyric provider
+#        Since, Genius parses additional metadata too (such as track
+#        name, artist name, albumart url). For example, fetch this URL:
+#        https://genius.com/api/search/multi?per_page=1&q=artist+trackname

 class Genius(LyricBase):
-    def __init__(self, artist, track):
-        self.artist = artist
-        self.track = track
+    def __init__(self):
        self.base_url = BASE_URL
+        self.base_search_url = BASE_SEARCH_URL

-    def _guess_lyric_url(self):
+    def guess_lyric_url_from_artist_and_track(self, artist, track):
        """
-        Returns the possible lyric URL for the track available
-        on Genius. This may not always be a valid URL, but this
-        is apparently the best we can do at the moment?
+        Returns the possible lyric URL for the track available on
+        Genius. This may not always be a valid URL.
        """
-        query = "/{} {} lyrics".format(self.artist, self.track)
+        query = "/{} {} lyrics".format(artist, track)
        query = query.replace(" ", "-")
        encoded_query = urllib.request.quote(query)
        lyric_url = self.base_url + encoded_query
        return lyric_url

-    def _fetch_page(self, url, timeout=None):
+    def _fetch_url_page(self, url, timeout=None):
        """
-        Makes a GET request to the given URL and returns the
-        HTML content in the case of a valid response.
+        Makes a GET request to the given lyrics page URL and returns
+        the HTML content in the case of a valid response.
        """
        request = urllib.request.Request(url)
        request.add_header("User-Agent", "urllib")
@@ -36,17 +40,14 @@ class Genius(LyricBase):
            response = urllib.request.urlopen(request, timeout=timeout)
        except urllib.request.HTTPError:
            raise LyricsNotFoundError(
-                "Could not find lyrics for {} - {} at URL: {}".format(
-                    self.artist, self.track, url
-                )
+                "Could not find lyrics at URL: {}".format(url)
            )
        else:
            return response.read()

    def _get_lyrics_text(self, html):
        """
-        Extracts and returns the lyric content from the
-        provided HTML.
+        Extracts and returns the lyric content from the provided HTML.
        """
        soup = BeautifulSoup(html, "html.parser")
        lyrics_paragraph = soup.find("p")
@@ -57,11 +58,52 @@ class Genius(LyricBase):
                "The lyrics for this track are yet to be released."
            )

-    def get_lyrics(self, linesep="\n", timeout=None):
+    def _fetch_search_page(self, url, timeout=None):
        """
-        Returns the lyric string for the given artist and track.
+        Returns search results from a given URL in JSON.
        """
-        url = self._guess_lyric_url()
-        html_page = self._fetch_page(url, timeout=timeout)
-        lyrics = self._get_lyrics_text(html_page)
+        request = urllib.request.Request(url)
+        request.add_header("User-Agent", "urllib")
+        response = urllib.request.urlopen(request, timeout=timeout)
+        metadata = json.loads(response.read())
+        if len(metadata["response"]["sections"][0]["hits"]) == 0:
+            raise LyricsNotFoundError(
+                "Could not find any search results for URL: {}".format(url)
+            )
+        return metadata
+
+    def best_matching_lyric_url_from_query(self, query):
+        """
+        Returns the best matching track's URL from a given query.
+        """
+        encoded_query = query.replace(" ", "+")
+        search_url = self.base_search_url + encoded_query
+        metadata = self._fetch_search_page(search_url)
+        lyric_url = metadata["response"]["sections"][0]["hits"][0]["result"]["path"]
+        return self.base_url + lyric_url
+
+    def from_query(self, query, linesep="\n", timeout=None):
+        """
+        Returns the lyric string for the track best matching the
+        given query.
+        """
+        lyric_url = self.best_matching_lyric_url_from_query(query)
+        return self.from_url(lyric_url, linesep, timeout=timeout)
+
+    def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
+        """
+        Returns the lyric string for the given artist and track
+        by making scraping search results and fetching the first
+        result.
+        """
+        lyric_url = self.guess_lyric_url_from_artist_and_track(artist, track)
+        return self.from_url(lyric_url, linesep, timeout)
+
+    def from_url(self, url, linesep="\n", timeout=None):
+        """
+        Returns the lyric string for the given URL.
+        """
+        lyric_html_page = self._fetch_url_page(url, timeout=timeout)
+        lyrics = self._get_lyrics_text(lyric_html_page)
        return lyrics.replace("\n", linesep)
+