diff --git a/CHANGES.md b/CHANGES.md index d39a530..739e01d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -27,11 +27,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [1.1.1] - 2019-01-03 ### Added +- Use YouTube as fallback metadata if track not found on Spotify. Also added `--no-fallback-metadata` + to preserve old behaviour ([@ritiek](https://github.com/ritiek)) (#457) - Output informative message in case of no result found in YouTube search ([@Amit-L](https://github.com/Amit-L)) (#452) - Ability to pass multiple tracks with `-s` option ([@ritiek](https://github.com/ritiek)) (#442) ### Changed -- Allowed to fetch metadata from Spotify upon searching Spotify-URL and `--no-metadata` to gather YouTube custom-search fields ([@Amit-L](https://github.com/Amit-L)) (#452) +- Allowed to fetch metadata from Spotify upon searching Spotify-URL and `--no-metadata` to gather YouTube + custom-search fields ([@Amit-L](https://github.com/Amit-L)) (#452) - Change FFmpeg to use the built-in encoder `aac` instead of 3rd party `libfdk-aac` which does not ship with the apt package ([@ritiek](https://github.com/ritiek)) (#448) - Monkeypatch ever-changing network-relying tests ([@ritiek](https://github.com/ritiek)) (#448) diff --git a/spotdl/downloader.py b/spotdl/downloader.py index 8e8c243..9d0190d 100644 --- a/spotdl/downloader.py +++ b/spotdl/downloader.py @@ -163,8 +163,6 @@ class Downloader: if not refined_songname == " - ": songname = refined_songname else: - if not const.args.no_metadata: - log.warning("Could not find metadata") songname = internals.sanitize_title(songname) return songname diff --git a/spotdl/handle.py b/spotdl/handle.py index 4b6ceb6..f76f46c 100644 --- a/spotdl/handle.py +++ b/spotdl/handle.py @@ -17,6 +17,7 @@ default_conf = { "spotify-downloader": { "manual": False, "no-metadata": False, + "no-fallback-metadata": False, "avconv": False, "folder": internals.get_music_dir(), "overwrite": "prompt", @@ -132,7 +133,7 @@ def get_arguments(raw_args=None, to_group=True, to_merge=True): "-m", "--manual", default=config["manual"], - help="choose the track to download manually from a list " "of matching tracks", + help="choose the track to download manually from a list of matching tracks", action="store_true", ) parser.add_argument( @@ -142,6 +143,13 @@ def get_arguments(raw_args=None, to_group=True, to_merge=True): help="do not embed metadata in tracks", action="store_true", ) + parser.add_argument( + "-nf", + "--no-fallback-metadata", + default=config["no-fallback-metadata"], + help="use YouTube metadata as fallback if track not found on Spotify", + action="store_true", + ) parser.add_argument( "-a", "--avconv", diff --git a/spotdl/internals.py b/spotdl/internals.py index 375cc3a..d255215 100644 --- a/spotdl/internals.py +++ b/spotdl/internals.py @@ -1,6 +1,7 @@ from logzero import logger as log import os import sys +import urllib.request from spotdl import const @@ -253,3 +254,12 @@ def remove_duplicates(tracks): local_set = set() local_set_add = local_set.add return [x for x in tracks if not (x in local_set or local_set_add(x))] + + +def content_available(url): + try: + response = urllib.request.urlopen(url) + except HTTPError: + return False + else: + return response.getcode() < 300 diff --git a/spotdl/metadata.py b/spotdl/metadata.py index b214d27..d67fabc 100644 --- a/spotdl/metadata.py +++ b/spotdl/metadata.py @@ -46,6 +46,8 @@ class EmbedMetadata: def __init__(self, music_file, meta_tags): self.music_file = music_file self.meta_tags = meta_tags + self.spotify_metadata = meta_tags["spotify_metadata"] + self.provider = "spotify" if meta_tags["spotify_metadata"] else "youtube" def as_mp3(self): """ Embed metadata to MP3 files. """ @@ -62,7 +64,7 @@ class EmbedMetadata: audiofile["lyricist"] = meta_tags["artists"][0]["name"] audiofile["arranger"] = meta_tags["artists"][0]["name"] audiofile["performer"] = meta_tags["artists"][0]["name"] - audiofile["website"] = meta_tags["external_urls"]["spotify"] + audiofile["website"] = meta_tags["external_urls"][self.provider] audiofile["length"] = str(meta_tags["duration"]) if meta_tags["publisher"]: audiofile["encodedby"] = meta_tags["publisher"] @@ -78,7 +80,7 @@ class EmbedMetadata: audiofile["TYER"] = TYER(encoding=3, text=meta_tags["year"]) if meta_tags["publisher"]: audiofile["TPUB"] = TPUB(encoding=3, text=meta_tags["publisher"]) - audiofile["COMM"] = COMM(encoding=3, text=meta_tags["external_urls"]["spotify"]) + audiofile["COMM"] = COMM(encoding=3, text=meta_tags["external_urls"][self.provider]) if meta_tags["lyrics"]: audiofile["USLT"] = USLT( encoding=3, desc=u"Lyrics", text=meta_tags["lyrics"] @@ -106,7 +108,7 @@ class EmbedMetadata: audiofile = MP4(music_file) self._embed_basic_metadata(audiofile, preset=M4A_TAG_PRESET) audiofile[M4A_TAG_PRESET["year"]] = meta_tags["year"] - audiofile[M4A_TAG_PRESET["comment"]] = meta_tags["external_urls"]["spotify"] + audiofile[M4A_TAG_PRESET["comment"]] = meta_tags["external_urls"][self.provider] if meta_tags["lyrics"]: audiofile[M4A_TAG_PRESET["lyrics"]] = meta_tags["lyrics"] try: @@ -127,7 +129,7 @@ class EmbedMetadata: audiofile = FLAC(music_file) self._embed_basic_metadata(audiofile) audiofile["year"] = meta_tags["year"] - audiofile["comment"] = meta_tags["external_urls"]["spotify"] + audiofile["comment"] = meta_tags["external_urls"][self.provider] if meta_tags["lyrics"]: audiofile["lyrics"] = meta_tags["lyrics"] @@ -147,7 +149,8 @@ class EmbedMetadata: meta_tags = self.meta_tags audiofile[preset["artist"]] = meta_tags["artists"][0]["name"] audiofile[preset["albumartist"]] = meta_tags["album"]["artists"][0]["name"] - audiofile[preset["album"]] = meta_tags["album"]["name"] + if meta_tags["album"]["name"]: + audiofile[preset["album"]] = meta_tags["album"]["name"] audiofile[preset["title"]] = meta_tags["name"] audiofile[preset["date"]] = meta_tags["release_date"] audiofile[preset["originaldate"]] = meta_tags["release_date"] diff --git a/spotdl/patcher.py b/spotdl/patcher.py new file mode 100644 index 0000000..1b1b744 --- /dev/null +++ b/spotdl/patcher.py @@ -0,0 +1,31 @@ +import pafy + +from spotdl import internals + + +def _getbestthumb(self): + url = self._ydl_info["thumbnails"][0]["url"] + if url: + return url + + part_url = "https://i.ytimg.com/vi/%s/" % self.videoid + # Thumbnail resolution sorted in descending order + thumbs = ("maxresdefault.jpg", + "sddefault.jpg", + "hqdefault.jpg", + "mqdefault.jpg", + "default.jpg") + for thumb in thumbs: + url = part_url + thumb + if self._content_available(url): + return url + + +@classmethod +def _content_available(cls, url): + return internals.content_available(url) + +def patch_pafy(): + pafy.backend_shared.BasePafy._bestthumb = None + pafy.backend_shared.BasePafy._content_available = _content_available + pafy.backend_shared.BasePafy.getbestthumb = _getbestthumb diff --git a/spotdl/spotify_tools.py b/spotdl/spotify_tools.py index c0e6b74..2a60284 100644 --- a/spotdl/spotify_tools.py +++ b/spotdl/spotify_tools.py @@ -81,6 +81,7 @@ def generate_metadata(raw_song): # Some sugar meta_tags["year"], *_ = meta_tags["release_date"].split("-") meta_tags["duration"] = meta_tags["duration_ms"] / 1000.0 + meta_tags["spotify_metadata"] = True # Remove unwanted parameters del meta_tags["duration_ms"] del meta_tags["available_markets"] diff --git a/spotdl/youtube_tools.py b/spotdl/youtube_tools.py index 9c1705f..7b81d73 100644 --- a/spotdl/youtube_tools.py +++ b/spotdl/youtube_tools.py @@ -14,6 +14,12 @@ from spotdl import const # Read more on mps-youtube/pafy#199 pafy.g.opener.addheaders.append(("Range", "bytes=0-")) +# Implement unreleased methods on Pafy object +# More info: https://github.com/mps-youtube/pafy/pull/211 +if pafy.__version__ <= "0.5.4": + from spotdl import patcher + patcher.patch_pafy() + def set_api_key(): if const.args.youtube_api_key: @@ -39,28 +45,75 @@ def go_pafy(raw_song, meta_tags=None): return track_info -def match_video_and_metadata(track, force_pafy=True): +def match_video_and_metadata(track): """ Get and match track data from YouTube and Spotify. """ meta_tags = None + + def fallback_metadata(meta_tags): + fallback_metadata_info = "Track not found on Spotify, falling back on YouTube metadata" + skip_fallback_metadata_warning = "Fallback condition not met, shall not embed metadata" + if meta_tags is None: + if const.args.no_fallback_metadata: + log.warning(skip_fallback_metadata_warning) + else: + log.info(fallback_metadata_info) + meta_tags = generate_metadata(content) + return meta_tags + + if internals.is_youtube(track): log.debug("Input song is a YouTube URL") content = go_pafy(track, meta_tags=None) track = slugify(content.title).replace("-", " ") if not const.args.no_metadata: meta_tags = spotify_tools.generate_metadata(track) - else: - # Let it generate metadata, youtube doesn't know spotify slang - if not const.args.no_metadata or internals.is_spotify(track): - meta_tags = spotify_tools.generate_metadata(track) + meta_tags = fallback_metadata(meta_tags) - if force_pafy: - content = go_pafy(track, meta_tags) + elif internals.is_spotify(track): + log.debug("Input song is a Spotify URL") + # Let it generate metadata, YouTube doesn't know Spotify slang + meta_tags = spotify_tools.generate_metadata(track) + content = go_pafy(track, meta_tags) + if const.args.no_metadata: + meta_tags = None + + else: + log.debug("Input song is plain text based") + if const.args.no_metadata: + content = go_pafy(track, meta_tags=None) else: - content = None + meta_tags = spotify_tools.generate_metadata(track) + content = go_pafy(track, meta_tags=meta_tags) + meta_tags = fallback_metadata(meta_tags) + return content, meta_tags +def generate_metadata(content): + """ Fetch a song's metadata from YouTube. """ + meta_tags = {"spotify_metadata": False, + "name": content.title, + "artists": [{"name": content.author}], + "duration": content.length, + "external_urls": {"youtube": content.watchv_url}, + "album": {"images" : [{"url": content.getbestthumb()}], "name": None}, + "year": content.published.split("-")[0], + "release_date": content.published.split(" ")[0], + "type": "track", + "disc_number": 1, + "track_number": 1, + "total_tracks": 1, + "publisher": None, + "external_ids": {"isrc": None}, + "lyrics": None, + "copyright": None, + "genre": None, + } + + return meta_tags + + def get_youtube_title(content, number=None): """ Get the YouTube video's title. """ title = content.title diff --git a/test/loader.py b/test/loader.py index 33e288d..79c9771 100644 --- a/test/loader.py +++ b/test/loader.py @@ -1,6 +1,7 @@ from spotdl import const from spotdl import handle from spotdl import spotdl +import urllib import pytest @@ -13,3 +14,13 @@ def load_defaults(): spotdl.log = const.logzero.setup_logger( formatter=const._formatter, level=const.args.log_level ) + + +# GIST_URL is the monkeypatched version of: https://www.youtube.com/results?search_query=janji+-+heroes +# so that we get same results even if YouTube changes the list/order of videos on their page. +GIST_URL = "https://gist.githubusercontent.com/ritiek/e731338e9810e31c2f00f13c249a45f5/raw/c11a27f3b5d11a8d082976f1cdd237bd605ec2c2/search_results.html" + +def monkeypatch_youtube_search_page(*args, **kwargs): + fake_urlopen = urllib.request.urlopen(GIST_URL) + return fake_urlopen + diff --git a/test/test_download_with_metadata.py b/test/test_download_with_metadata.py index ad840a3..9c376c0 100644 --- a/test/test_download_with_metadata.py +++ b/test/test_download_with_metadata.py @@ -1,4 +1,3 @@ -import urllib import subprocess import os @@ -39,7 +38,7 @@ def metadata_fixture(): def test_metadata(metadata_fixture): - expect_number = 23 + expect_number = 24 assert len(metadata_fixture) == expect_number @@ -54,16 +53,11 @@ class TestFileFormat: assert title == EXPECTED_SPOTIFY_TITLE.replace(" ", "_") -def monkeypatch_youtube_search_page(*args, **kwargs): - fake_urlopen = urllib.request.urlopen(GIST_URL) - return fake_urlopen - - def test_youtube_url(metadata_fixture, monkeypatch): monkeypatch.setattr( youtube_tools.GenerateYouTubeURL, "_fetch_response", - monkeypatch_youtube_search_page, + loader.monkeypatch_youtube_search_page, ) url = youtube_tools.generate_youtube_url(SPOTIFY_TRACK_URL, metadata_fixture) assert url == EXPECTED_YOUTUBE_URL @@ -73,7 +67,7 @@ def test_youtube_title(metadata_fixture, monkeypatch): monkeypatch.setattr( youtube_tools.GenerateYouTubeURL, "_fetch_response", - monkeypatch_youtube_search_page, + loader.monkeypatch_youtube_search_page, ) content = youtube_tools.go_pafy(SPOTIFY_TRACK_URL, metadata_fixture) pytest.content_fixture = content @@ -107,22 +101,16 @@ class TestDownload: def test_m4a(self, monkeypatch, filename_fixture): expect_download = True - monkeypatch.setattr( - "pafy.backend_shared.BaseStream.download", self.blank_audio_generator - ) - download = youtube_tools.download_song( - filename_fixture + ".m4a", pytest.content_fixture - ) + monkeypatch.setattr("pafy.backend_shared.BaseStream.download", self.blank_audio_generator) + monkeypatch.setattr("pafy.backend_youtube_dl.YtdlStream.download", self.blank_audio_generator) + download = youtube_tools.download_song(filename_fixture + ".m4a", pytest.content_fixture) assert download == expect_download def test_webm(self, monkeypatch, filename_fixture): expect_download = True - monkeypatch.setattr( - "pafy.backend_shared.BaseStream.download", self.blank_audio_generator - ) - download = youtube_tools.download_song( - filename_fixture + ".webm", pytest.content_fixture - ) + monkeypatch.setattr("pafy.backend_shared.BaseStream.download", self.blank_audio_generator) + monkeypatch.setattr("pafy.backend_youtube_dl.YtdlStream.download", self.blank_audio_generator) + download = youtube_tools.download_song(filename_fixture + ".webm", pytest.content_fixture) assert download == expect_download diff --git a/test/test_patcher.py b/test/test_patcher.py new file mode 100644 index 0000000..baddd0e --- /dev/null +++ b/test/test_patcher.py @@ -0,0 +1,35 @@ +from spotdl import patcher +import pafy + +import pytest + +patcher.patch_pafy() + +class TestPafyContentAvailable: + pass + + +class TestMethodAssignment: + def test_pafy_getbestthumb(self): + pafy.backend_shared.BasePafy.getbestthumb == patcher._getbestthumb + + +class TestMethodCalls: + @pytest.fixture(scope="module") + def content_fixture(self): + content = pafy.new("http://youtube.com/watch?v=3nQNiWdeH2Q") + return content + + def test_pafy_getbestthumb(self, content_fixture): + thumbnail = patcher._getbestthumb(content_fixture) + assert thumbnail == "https://i.ytimg.com/vi/3nQNiWdeH2Q/maxresdefault.jpg" + + def test_pafy_getbestthumb_without_ytdl(self, content_fixture): + content_fixture._ydl_info["thumbnails"][0]["url"] = None + thumbnail = patcher._getbestthumb(content_fixture) + assert thumbnail == "https://i.ytimg.com/vi/3nQNiWdeH2Q/maxresdefault.jpg" + + + def test_pafy_content_available(self): + TestPafyContentAvailable._content_available = patcher._content_available + assert TestPafyContentAvailable()._content_available("https://youtube.com/") diff --git a/test/test_spotify_tools.py b/test/test_spotify_tools.py index 6ed6d03..1981c20 100644 --- a/test/test_spotify_tools.py +++ b/test/test_spotify_tools.py @@ -2,6 +2,9 @@ from spotdl import spotify_tools import os import pytest +import loader + +loader.load_defaults() def test_generate_token(): @@ -23,7 +26,7 @@ class TestGenerateMetadata: return metadata def test_len(self, metadata_fixture): - assert len(metadata_fixture) == 23 + assert len(metadata_fixture) == 24 def test_trackname(self, metadata_fixture): assert metadata_fixture["name"] == "Spectre" diff --git a/test/test_youtube_tools.py b/test/test_youtube_tools.py index eb5393c..364760e 100644 --- a/test/test_youtube_tools.py +++ b/test/test_youtube_tools.py @@ -17,7 +17,6 @@ YT_API_KEY = "AIzaSyAnItl3udec-Q1d5bkjKJGL-RgrKO_vU90" TRACK_SEARCH = "Tony's Videos VERY SHORT VIDEO 28.10.2016" EXPECTED_TITLE = TRACK_SEARCH EXPECTED_YT_URL = "http://youtube.com/watch?v=qOOcy2-tmbk" -EXPECTED_YT_URLS = (EXPECTED_YT_URL, "http://youtube.com/watch?v=5USR1Omo7f0") RESULT_COUNT_SEARCH = "she is still sleeping SAO" @@ -71,8 +70,7 @@ class TestYouTubeURL: def test_only_music_category(self, metadata_fixture): const.args.music_videos_only = True url = youtube_tools.generate_youtube_url(TRACK_SEARCH, metadata_fixture) - # YouTube keeps changing its results - assert url in EXPECTED_YT_URLS + assert url == EXPECTED_YT_URL def test_all_categories(self, metadata_fixture): const.args.music_videos_only = False @@ -99,6 +97,56 @@ def content_fixture(metadata_fixture): return content +# True = Metadata must be fetched from Spotify +# False = Metadata must be fetched from YouTube +# None = Metadata must be `None` + +MATCH_METADATA_NO_FALLBACK_TEST_TABLE = [ + ("https://open.spotify.com/track/5nWduGwBGBn1PSqYTJUDbS", True), + ("http://youtube.com/watch?v=3nQNiWdeH2Q", None), + ("Linux Talk | Working with Drives and Filesystems", None) +] + +MATCH_METADATA_FALLBACK_TEST_TABLE = [ + ("https://open.spotify.com/track/5nWduGwBGBn1PSqYTJUDbS", True), + ("http://youtube.com/watch?v=3nQNiWdeH2Q", False), + ("Linux Talk | Working with Drives and Filesystems", False) +] + +MATCH_METADATA_NO_METADATA_TEST_TABLE = [ + ("https://open.spotify.com/track/5nWduGwBGBn1PSqYTJUDbS", None), + ("http://youtube.com/watch?v=3nQNiWdeH2Q", None), + ("Linux Talk | Working with Drives and Filesystems", None) +] + + +class TestMetadataOrigin: + def match_metadata(self, track, metadata_type): + _, metadata = youtube_tools.match_video_and_metadata(track) + if metadata_type is None: + assert metadata == metadata_type + else: + assert metadata["spotify_metadata"] == metadata_type + + @pytest.mark.parametrize("track, metadata_type", MATCH_METADATA_NO_FALLBACK_TEST_TABLE) + def test_match_metadata_with_no_fallback(self, track, metadata_type, content_fixture, monkeypatch): + monkeypatch.setattr(youtube_tools, "go_pafy", lambda track, meta_tags: content_fixture) + const.args.no_fallback_metadata = True + self.match_metadata(track, metadata_type) + + @pytest.mark.parametrize("track, metadata_type", MATCH_METADATA_FALLBACK_TEST_TABLE) + def test_match_metadata_with_fallback(self, track, metadata_type, content_fixture, monkeypatch): + monkeypatch.setattr(youtube_tools, "go_pafy", lambda track, meta_tags: content_fixture) + const.args.no_fallback_metadata = False + self.match_metadata(track, metadata_type) + + @pytest.mark.parametrize("track, metadata_type", MATCH_METADATA_NO_METADATA_TEST_TABLE) + def test_match_metadata_with_no_metadata(self, track, metadata_type, content_fixture, monkeypatch): + monkeypatch.setattr(youtube_tools, "go_pafy", lambda track, meta_tags: content_fixture) + const.args.no_metadata = True + self.match_metadata(track, metadata_type) + + @pytest.fixture(scope="module") def title_fixture(content_fixture): title = youtube_tools.get_youtube_title(content_fixture) @@ -136,6 +184,26 @@ def test_check_exists(metadata_fixture, filename_fixture, tmpdir): assert check == expect_check +def test_generate_m3u(tmpdir, monkeypatch): + monkeypatch.setattr(youtube_tools.GenerateYouTubeURL, "_fetch_response", loader.monkeypatch_youtube_search_page) + expect_m3u = ( + "#EXTM3U\n\n" + "#EXTINF:208,Janji - Heroes Tonight (feat. Johnning) [NCS Release]\n" + "http://www.youtube.com/watch?v=3nQNiWdeH2Q\n" + "#EXTINF:226,Alan Walker - Spectre [NCS Release]\n" + "http://www.youtube.com/watch?v=AOeY-nDp7hI\n" + ) + m3u_track_file = os.path.join(str(tmpdir), "m3u_test.txt") + with open(m3u_track_file, "w") as track_file: + track_file.write("\nhttps://open.spotify.com/track/3SipFlNddvL0XNZRLXvdZD") + track_file.write("\nhttp://www.youtube.com/watch?v=AOeY-nDp7hI") + youtube_tools.generate_m3u(m3u_track_file) + m3u_file = "{}.m3u".format(m3u_track_file.split(".")[0]) + with open(m3u_file, "r") as m3u_in: + m3u = m3u_in.readlines() + assert "".join(m3u) == expect_m3u + + class TestDownload: def test_webm(self, content_fixture, filename_fixture, monkeypatch): # content_fixture does not have any .webm audiostream