Add additional methods to fetch lyrics

The following inputs can now be used to fetch lyrics:
* artist and track names
* search query
* direct url
This commit is contained in:
Ritiek Malhotra
2020-04-08 21:43:58 +05:30
parent 51da0b7a29
commit 47247f7250
7 changed files with 210 additions and 76 deletions

View File

@@ -1,6 +1,8 @@
from spotdl.metadata.providers import ProviderSpotify from spotdl.metadata.providers import ProviderSpotify
from spotdl.metadata.providers import ProviderYouTube from spotdl.metadata.providers import ProviderYouTube
from spotdl.metadata.embedders import EmbedderDefault from spotdl.metadata.embedders import EmbedderDefault
from spotdl.lyrics.providers import LyricWikia
from spotdl.lyrics.providers import Genius
from spotdl.track import Track from spotdl.track import Track
@@ -10,7 +12,7 @@ import urllib.request
import threading import threading
def search_metadata(track): def search_metadata(track, lyrics=True):
youtube = ProviderYouTube() youtube = ProviderYouTube()
if spotdl.util.is_spotify(track): if spotdl.util.is_spotify(track):
spotify = ProviderSpotify() spotify = ProviderSpotify()
@@ -34,8 +36,7 @@ def search_metadata(track):
return metadata return metadata
def download_track(metadata, def download_track(metadata, arguments):
dry_run=False, overwrite="prompt", output_ext="mp3", file_format="{artist} - {track-name}", log_fmt="{artist} - {track_name}"):
# TODO: CONFIG.YML # TODO: CONFIG.YML
# Exit here if config.dry_run # Exit here if config.dry_run
@@ -52,10 +53,16 @@ def download_track(metadata,
track = Track(metadata, cache_albumart=True) track = Track(metadata, cache_albumart=True)
track.download_while_re_encoding("test.mp3") track.download_while_re_encoding("test.mp3")
# TODO: CONFIG.YML
# Skip metadata if config.no_metadata
track.apply_metadata("test.mp3") track.apply_metadata("test.mp3")
def download_tracks_from_file(path): def download_tracks_from_file(path, arguments):
# FIXME: Can we make this function cleaner?
# log.info( # log.info(
# "Checking and removing any duplicate tracks " # "Checking and removing any duplicate tracks "
# "in reading {}".format(path) # "in reading {}".format(path)

View File

@@ -12,16 +12,23 @@ class LyricBase(ABC):
""" """
@abstractmethod @abstractmethod
def __init__(self, artist, track): def from_url(self, url, linesep="\n", timeout=None):
""" """
This method must set any protected attributes, This method must return the lyrics string for the
which may be modified from outside the class given track.
if the need arises. """
""" pass
pass
@abstractmethod
@abstractmethod def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
def get_lyrics(self, linesep="\n", timeout=None): """
This method must return the lyrics string for the
given track.
"""
pass
@abstractmethod
def from_query(self, query, linesep="\n", timeout=None):
""" """
This method must return the lyrics string for the This method must return the lyrics string for the
given track. given track.

View File

@@ -1,34 +1,38 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import urllib.request import urllib.request
import json
from spotdl.lyrics.lyric_base import LyricBase from spotdl.lyrics.lyric_base import LyricBase
from spotdl.lyrics.exceptions import LyricsNotFoundError from spotdl.lyrics.exceptions import LyricsNotFoundError
BASE_URL = "https://genius.com" BASE_URL = "https://genius.com"
BASE_SEARCH_URL = BASE_URL + "/api/search/multi?per_page=1&q="
# FIXME: Make Genius a metadata provider instead of lyric provider
# Since, Genius parses additional metadata too (such as track
# name, artist name, albumart url). For example, fetch this URL:
# https://genius.com/api/search/multi?per_page=1&q=artist+trackname
class Genius(LyricBase): class Genius(LyricBase):
def __init__(self, artist, track): def __init__(self):
self.artist = artist
self.track = track
self.base_url = BASE_URL self.base_url = BASE_URL
self.base_search_url = BASE_SEARCH_URL
def _guess_lyric_url(self): def guess_lyric_url_from_artist_and_track(self, artist, track):
""" """
Returns the possible lyric URL for the track available Returns the possible lyric URL for the track available on
on Genius. This may not always be a valid URL, but this Genius. This may not always be a valid URL.
is apparently the best we can do at the moment?
""" """
query = "/{} {} lyrics".format(self.artist, self.track) query = "/{} {} lyrics".format(artist, track)
query = query.replace(" ", "-") query = query.replace(" ", "-")
encoded_query = urllib.request.quote(query) encoded_query = urllib.request.quote(query)
lyric_url = self.base_url + encoded_query lyric_url = self.base_url + encoded_query
return lyric_url return lyric_url
def _fetch_page(self, url, timeout=None): def _fetch_url_page(self, url, timeout=None):
""" """
Makes a GET request to the given URL and returns the Makes a GET request to the given lyrics page URL and returns
HTML content in the case of a valid response. the HTML content in the case of a valid response.
""" """
request = urllib.request.Request(url) request = urllib.request.Request(url)
request.add_header("User-Agent", "urllib") request.add_header("User-Agent", "urllib")
@@ -36,17 +40,14 @@ class Genius(LyricBase):
response = urllib.request.urlopen(request, timeout=timeout) response = urllib.request.urlopen(request, timeout=timeout)
except urllib.request.HTTPError: except urllib.request.HTTPError:
raise LyricsNotFoundError( raise LyricsNotFoundError(
"Could not find lyrics for {} - {} at URL: {}".format( "Could not find lyrics at URL: {}".format(url)
self.artist, self.track, url
)
) )
else: else:
return response.read() return response.read()
def _get_lyrics_text(self, html): def _get_lyrics_text(self, html):
""" """
Extracts and returns the lyric content from the Extracts and returns the lyric content from the provided HTML.
provided HTML.
""" """
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
lyrics_paragraph = soup.find("p") lyrics_paragraph = soup.find("p")
@@ -57,11 +58,52 @@ class Genius(LyricBase):
"The lyrics for this track are yet to be released." "The lyrics for this track are yet to be released."
) )
def get_lyrics(self, linesep="\n", timeout=None): def _fetch_search_page(self, url, timeout=None):
""" """
Returns the lyric string for the given artist and track. Returns search results from a given URL in JSON.
""" """
url = self._guess_lyric_url() request = urllib.request.Request(url)
html_page = self._fetch_page(url, timeout=timeout) request.add_header("User-Agent", "urllib")
lyrics = self._get_lyrics_text(html_page) response = urllib.request.urlopen(request, timeout=timeout)
metadata = json.loads(response.read())
if len(metadata["response"]["sections"][0]["hits"]) == 0:
raise LyricsNotFoundError(
"Could not find any search results for URL: {}".format(url)
)
return metadata
def best_matching_lyric_url_from_query(self, query):
"""
Returns the best matching track's URL from a given query.
"""
encoded_query = query.replace(" ", "+")
search_url = self.base_search_url + encoded_query
metadata = self._fetch_search_page(search_url)
lyric_url = metadata["response"]["sections"][0]["hits"][0]["result"]["path"]
return self.base_url + lyric_url
def from_query(self, query, linesep="\n", timeout=None):
"""
Returns the lyric string for the track best matching the
given query.
"""
lyric_url = self.best_matching_lyric_url_from_query(query)
return self.from_url(lyric_url, linesep, timeout=timeout)
def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
"""
Returns the lyric string for the given artist and track
by making scraping search results and fetching the first
result.
"""
lyric_url = self.guess_lyric_url_from_artist_and_track(artist, track)
return self.from_url(lyric_url, linesep, timeout)
def from_url(self, url, linesep="\n", timeout=None):
"""
Returns the lyric string for the given URL.
"""
lyric_html_page = self._fetch_url_page(url, timeout=timeout)
lyrics = self._get_lyrics_text(lyric_html_page)
return lyrics.replace("\n", linesep) return lyrics.replace("\n", linesep)

View File

@@ -5,17 +5,20 @@ from spotdl.lyrics.exceptions import LyricsNotFoundError
class LyricWikia(LyricBase): class LyricWikia(LyricBase):
def __init__(self, artist, track): def from_query(self, query, linesep="\n", timeout=None):
self.artist = artist raise NotImplementedError
self.track = track
def get_lyrics(self, linesep="\n", timeout=None): def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
""" """
Returns the lyric string for the given artist and track. Returns the lyric string for the given artist and track.
""" """
try: try:
lyrics = lyricwikia.get_lyrics(self.artist, self.track, linesep, timeout) lyrics = lyricwikia.get_lyrics(artist, track, linesep, timeout)
except lyricwikia.LyricsNotFound as e: except lyricwikia.LyricsNotFound as e:
raise LyricsNotFoundError(e.args[0]) raise LyricsNotFoundError(e.args[0])
else:
return lyrics return lyrics
def from_url(self, url, linesep="\n", timeout=None):
raise NotImplementedError

View File

@@ -3,35 +3,114 @@ from spotdl.lyrics import exceptions
from spotdl.lyrics.providers import Genius from spotdl.lyrics.providers import Genius
import urllib.request import urllib.request
import json
import pytest import pytest
class TestGenius: class TestGenius:
def test_subclass(self): def test_subclass(self):
assert issubclass(Genius, LyricBase) assert issubclass(Genius, LyricBase)
@pytest.fixture(scope="module")
def expect_lyrics_count(self):
# This is the number of characters in lyrics found
# for the track in `lyric_url` fixture below
return 1845
@pytest.fixture(scope="module")
def genius(self):
return Genius()
def test_base_url(self, genius):
assert genius.base_url == "https://genius.com"
@pytest.fixture(scope="module")
def artist(self):
return "selena gomez"
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def track(self): def track(self):
return Genius("artist", "track") return "wolves"
def test_base_url(self, track): @pytest.fixture(scope="module")
assert track.base_url == "https://genius.com" def query(self, artist, track):
return "{} {}".format(artist, track)
def test_get_lyrics(self, track, monkeypatch): @pytest.fixture(scope="module")
def mocked_urlopen(url, timeout=None): def guess_url(self, query):
class DummyHTTPResponse: return "https://genius.com/selena-gomez-wolves-lyrics"
def read(self):
return "<p>amazing lyrics!</p>"
return DummyHTTPResponse() @pytest.fixture(scope="module")
def lyric_url(self):
return "https://genius.com/Selena-gomez-and-marshmello-wolves-lyrics"
monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) def test_guess_lyric_url_from_artist_and_track(self, genius, artist, track, guess_url):
assert track.get_lyrics() == "amazing lyrics!" url = genius.guess_lyric_url_from_artist_and_track(artist, track)
assert url == guess_url
def test_lyrics_not_found_error(self, track, monkeypatch): class MockHTTPResponse:
def mocked_urlopen(url, timeout=None): expect_lyrics = ""
def __init__(self, request, timeout=None):
search_results_url = "https://genius.com/api/search/multi?per_page=1&q=selena+gomez+wolves"
if request._full_url == search_results_url:
read_method = lambda: json.dumps({
"response": {"sections": [{"hits": [{"result": {
"path": "/Selena-gomez-and-marshmello-wolves-lyrics"
} }] }] }
})
else:
read_method = lambda: "<p>" + self.expect_lyrics + "</p>"
self.read = read_method
@pytest.mark.network
def test_best_matching_lyric_url_from_query(self, genius, query, lyric_url):
url = genius.best_matching_lyric_url_from_query(query)
assert url == lyric_url
def test_mock_best_matching_lyric_url_from_query(self, genius, query, lyric_url, monkeypatch):
monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
self.test_best_matching_lyric_url_from_query(genius, query, lyric_url)
@pytest.mark.network
def test_from_url(self, genius, lyric_url, expect_lyrics_count):
lyrics = genius.from_url(lyric_url)
assert len(lyrics) == expect_lyrics_count
def test_mock_from_url(self, genius, lyric_url, expect_lyrics_count, monkeypatch):
self.MockHTTPResponse.expect_lyrics = "a" * expect_lyrics_count
monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
self.test_from_url(genius, lyric_url, expect_lyrics_count)
@pytest.mark.network
def test_from_artist_and_track(self, genius, artist, track, expect_lyrics_count):
lyrics = genius.from_artist_and_track(artist, track)
assert len(lyrics) == expect_lyrics_count
def test_mock_from_artist_and_track(self, genius, artist, track, expect_lyrics_count, monkeypatch):
self.MockHTTPResponse.expect_lyrics = "a" * expect_lyrics_count
monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
self.test_from_artist_and_track(genius, artist, track, expect_lyrics_count)
@pytest.mark.network
def test_from_query(self, genius, query, expect_lyrics_count):
lyrics = genius.from_query(query)
assert len(lyrics) == expect_lyrics_count
def test_mock_from_query(self, genius, query, expect_lyrics_count, monkeypatch):
self.MockHTTPResponse.expect_lyrics = "a" * expect_lyrics_count
monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
self.test_from_query(genius, query, expect_lyrics_count)
@pytest.mark.network
def test_lyrics_not_found_error(self, genius):
with pytest.raises(exceptions.LyricsNotFoundError):
genius.from_artist_and_track(self, "nonexistent_artist", "nonexistent_track")
def test_mock_lyrics_not_found_error(self, genius, monkeypatch):
def mock_urlopen(url, timeout=None):
raise urllib.request.HTTPError("", "", "", "", "") raise urllib.request.HTTPError("", "", "", "", "")
monkeypatch.setattr("urllib.request.urlopen", mocked_urlopen) monkeypatch.setattr("urllib.request.urlopen", mock_urlopen)
with pytest.raises(exceptions.LyricsNotFoundError): self.test_lyrics_not_found_error(genius)
track.get_lyrics()

View File

@@ -11,15 +11,16 @@ class TestLyricWikia:
def test_subclass(self): def test_subclass(self):
assert issubclass(LyricWikia, LyricBase) assert issubclass(LyricWikia, LyricBase)
def test_get_lyrics(self, monkeypatch): def test_from_artist_and_track(self, monkeypatch):
# `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics` # `LyricWikia` class uses the 3rd party method `lyricwikia.get_lyrics`
# internally and there is no need to test a 3rd party library as they # internally and there is no need to test a 3rd party library as they
# have their own implementation of tests. # have their own implementation of tests.
monkeypatch.setattr( monkeypatch.setattr(
"lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!" "lyricwikia.get_lyrics", lambda a, b, c, d: "awesome lyrics!"
) )
track = LyricWikia("Lyricwikia", "Lyricwikia") artist, track = "selena gomez", "wolves"
assert track.get_lyrics() == "awesome lyrics!" lyrics = LyricWikia().from_artist_and_track(artist, track)
assert lyrics == "awesome lyrics!"
def test_lyrics_not_found_error(self, monkeypatch): def test_lyrics_not_found_error(self, monkeypatch):
def lyricwikia_lyrics_not_found(msg): def lyricwikia_lyrics_not_found(msg):
@@ -30,6 +31,6 @@ class TestLyricWikia:
"lyricwikia.get_lyrics", "lyricwikia.get_lyrics",
lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics."), lambda a, b, c, d: lyricwikia_lyrics_not_found("Nope, no lyrics."),
) )
track = LyricWikia("Lyricwikia", "Lyricwikia") artist, track = "nonexistent_artist", "nonexistent_track"
with pytest.raises(exceptions.LyricsNotFoundError): with pytest.raises(exceptions.LyricsNotFoundError):
track.get_lyrics() LyricWikia().from_artist_and_track(artist, track)

View File

@@ -5,25 +5,20 @@ import pytest
class TestAbstractBaseClass: class TestAbstractBaseClass:
def test_error_abstract_base_class_lyricbase(self): def test_error_abstract_base_class_lyricbase(self):
artist = "awesome artist"
track = "amazing track"
with pytest.raises(TypeError): with pytest.raises(TypeError):
# This abstract base class must be inherited from # This abstract base class must be inherited from
# for instantiation # for instantiation
LyricBase(artist, track) LyricBase()
def test_inherit_abstract_base_class_encoderbase(self): def test_inherit_abstract_base_class_encoderbase(self):
class LyricKid(LyricBase): class LyricKid(LyricBase):
def __init__(self, artist, track): def from_query(self, query):
super().__init__(artist, track) raise NotImplementedError
def get_lyrics(self): def from_artist_and_track(self, artist, track):
pass pass
def from_url(self, url):
raise NotImplementedError
artist = "awesome artist" LyricKid()
track = "amazing track"
LyricKid(artist, track)