Merge pull request #457 from ritiek/youtube-metadata

Use YouTube as fallback for track metadata if not found on Spotify
This commit is contained in:
Ritiek Malhotra
2019-02-25 22:24:15 -08:00
committed by GitHub
13 changed files with 254 additions and 42 deletions

View File

@@ -27,11 +27,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [1.1.1] - 2019-01-03
### Added
- Use YouTube as fallback metadata if track not found on Spotify. Also added `--no-fallback-metadata`
to preserve old behaviour ([@ritiek](https://github.com/ritiek)) (#457)
- Output informative message in case of no result found in YouTube search ([@Amit-L](https://github.com/Amit-L)) (#452)
- Ability to pass multiple tracks with `-s` option ([@ritiek](https://github.com/ritiek)) (#442)
### Changed
- Allowed to fetch metadata from Spotify upon searching Spotify-URL and `--no-metadata` to gather YouTube custom-search fields ([@Amit-L](https://github.com/Amit-L)) (#452)
- Allowed to fetch metadata from Spotify upon searching Spotify-URL and `--no-metadata` to gather YouTube
custom-search fields ([@Amit-L](https://github.com/Amit-L)) (#452)
- Change FFmpeg to use the built-in encoder `aac` instead of 3rd party `libfdk-aac` which does not
ship with the apt package ([@ritiek](https://github.com/ritiek)) (#448)
- Monkeypatch ever-changing network-relying tests ([@ritiek](https://github.com/ritiek)) (#448)

View File

@@ -163,8 +163,6 @@ class Downloader:
if not refined_songname == " - ":
songname = refined_songname
else:
if not const.args.no_metadata:
log.warning("Could not find metadata")
songname = internals.sanitize_title(songname)
return songname

View File

@@ -17,6 +17,7 @@ default_conf = {
"spotify-downloader": {
"manual": False,
"no-metadata": False,
"no-fallback-metadata": False,
"avconv": False,
"folder": internals.get_music_dir(),
"overwrite": "prompt",
@@ -132,7 +133,7 @@ def get_arguments(raw_args=None, to_group=True, to_merge=True):
"-m",
"--manual",
default=config["manual"],
help="choose the track to download manually from a list " "of matching tracks",
help="choose the track to download manually from a list of matching tracks",
action="store_true",
)
parser.add_argument(
@@ -142,6 +143,13 @@ def get_arguments(raw_args=None, to_group=True, to_merge=True):
help="do not embed metadata in tracks",
action="store_true",
)
parser.add_argument(
"-nf",
"--no-fallback-metadata",
default=config["no-fallback-metadata"],
help="use YouTube metadata as fallback if track not found on Spotify",
action="store_true",
)
parser.add_argument(
"-a",
"--avconv",

View File

@@ -1,6 +1,7 @@
from logzero import logger as log
import os
import sys
import urllib.request
from spotdl import const
@@ -253,3 +254,12 @@ def remove_duplicates(tracks):
local_set = set()
local_set_add = local_set.add
return [x for x in tracks if not (x in local_set or local_set_add(x))]
def content_available(url):
try:
response = urllib.request.urlopen(url)
except HTTPError:
return False
else:
return response.getcode() < 300

View File

@@ -46,6 +46,8 @@ class EmbedMetadata:
def __init__(self, music_file, meta_tags):
self.music_file = music_file
self.meta_tags = meta_tags
self.spotify_metadata = meta_tags["spotify_metadata"]
self.provider = "spotify" if meta_tags["spotify_metadata"] else "youtube"
def as_mp3(self):
""" Embed metadata to MP3 files. """
@@ -62,7 +64,7 @@ class EmbedMetadata:
audiofile["lyricist"] = meta_tags["artists"][0]["name"]
audiofile["arranger"] = meta_tags["artists"][0]["name"]
audiofile["performer"] = meta_tags["artists"][0]["name"]
audiofile["website"] = meta_tags["external_urls"]["spotify"]
audiofile["website"] = meta_tags["external_urls"][self.provider]
audiofile["length"] = str(meta_tags["duration"])
if meta_tags["publisher"]:
audiofile["encodedby"] = meta_tags["publisher"]
@@ -78,7 +80,7 @@ class EmbedMetadata:
audiofile["TYER"] = TYER(encoding=3, text=meta_tags["year"])
if meta_tags["publisher"]:
audiofile["TPUB"] = TPUB(encoding=3, text=meta_tags["publisher"])
audiofile["COMM"] = COMM(encoding=3, text=meta_tags["external_urls"]["spotify"])
audiofile["COMM"] = COMM(encoding=3, text=meta_tags["external_urls"][self.provider])
if meta_tags["lyrics"]:
audiofile["USLT"] = USLT(
encoding=3, desc=u"Lyrics", text=meta_tags["lyrics"]
@@ -106,7 +108,7 @@ class EmbedMetadata:
audiofile = MP4(music_file)
self._embed_basic_metadata(audiofile, preset=M4A_TAG_PRESET)
audiofile[M4A_TAG_PRESET["year"]] = meta_tags["year"]
audiofile[M4A_TAG_PRESET["comment"]] = meta_tags["external_urls"]["spotify"]
audiofile[M4A_TAG_PRESET["comment"]] = meta_tags["external_urls"][self.provider]
if meta_tags["lyrics"]:
audiofile[M4A_TAG_PRESET["lyrics"]] = meta_tags["lyrics"]
try:
@@ -127,7 +129,7 @@ class EmbedMetadata:
audiofile = FLAC(music_file)
self._embed_basic_metadata(audiofile)
audiofile["year"] = meta_tags["year"]
audiofile["comment"] = meta_tags["external_urls"]["spotify"]
audiofile["comment"] = meta_tags["external_urls"][self.provider]
if meta_tags["lyrics"]:
audiofile["lyrics"] = meta_tags["lyrics"]
@@ -147,7 +149,8 @@ class EmbedMetadata:
meta_tags = self.meta_tags
audiofile[preset["artist"]] = meta_tags["artists"][0]["name"]
audiofile[preset["albumartist"]] = meta_tags["album"]["artists"][0]["name"]
audiofile[preset["album"]] = meta_tags["album"]["name"]
if meta_tags["album"]["name"]:
audiofile[preset["album"]] = meta_tags["album"]["name"]
audiofile[preset["title"]] = meta_tags["name"]
audiofile[preset["date"]] = meta_tags["release_date"]
audiofile[preset["originaldate"]] = meta_tags["release_date"]

31
spotdl/patcher.py Normal file
View File

@@ -0,0 +1,31 @@
import pafy
from spotdl import internals
def _getbestthumb(self):
url = self._ydl_info["thumbnails"][0]["url"]
if url:
return url
part_url = "https://i.ytimg.com/vi/%s/" % self.videoid
# Thumbnail resolution sorted in descending order
thumbs = ("maxresdefault.jpg",
"sddefault.jpg",
"hqdefault.jpg",
"mqdefault.jpg",
"default.jpg")
for thumb in thumbs:
url = part_url + thumb
if self._content_available(url):
return url
@classmethod
def _content_available(cls, url):
return internals.content_available(url)
def patch_pafy():
pafy.backend_shared.BasePafy._bestthumb = None
pafy.backend_shared.BasePafy._content_available = _content_available
pafy.backend_shared.BasePafy.getbestthumb = _getbestthumb

View File

@@ -81,6 +81,7 @@ def generate_metadata(raw_song):
# Some sugar
meta_tags["year"], *_ = meta_tags["release_date"].split("-")
meta_tags["duration"] = meta_tags["duration_ms"] / 1000.0
meta_tags["spotify_metadata"] = True
# Remove unwanted parameters
del meta_tags["duration_ms"]
del meta_tags["available_markets"]

View File

@@ -14,6 +14,12 @@ from spotdl import const
# Read more on mps-youtube/pafy#199
pafy.g.opener.addheaders.append(("Range", "bytes=0-"))
# Implement unreleased methods on Pafy object
# More info: https://github.com/mps-youtube/pafy/pull/211
if pafy.__version__ <= "0.5.4":
from spotdl import patcher
patcher.patch_pafy()
def set_api_key():
if const.args.youtube_api_key:
@@ -39,28 +45,75 @@ def go_pafy(raw_song, meta_tags=None):
return track_info
def match_video_and_metadata(track, force_pafy=True):
def match_video_and_metadata(track):
""" Get and match track data from YouTube and Spotify. """
meta_tags = None
def fallback_metadata(meta_tags):
fallback_metadata_info = "Track not found on Spotify, falling back on YouTube metadata"
skip_fallback_metadata_warning = "Fallback condition not met, shall not embed metadata"
if meta_tags is None:
if const.args.no_fallback_metadata:
log.warning(skip_fallback_metadata_warning)
else:
log.info(fallback_metadata_info)
meta_tags = generate_metadata(content)
return meta_tags
if internals.is_youtube(track):
log.debug("Input song is a YouTube URL")
content = go_pafy(track, meta_tags=None)
track = slugify(content.title).replace("-", " ")
if not const.args.no_metadata:
meta_tags = spotify_tools.generate_metadata(track)
else:
# Let it generate metadata, youtube doesn't know spotify slang
if not const.args.no_metadata or internals.is_spotify(track):
meta_tags = spotify_tools.generate_metadata(track)
meta_tags = fallback_metadata(meta_tags)
if force_pafy:
content = go_pafy(track, meta_tags)
elif internals.is_spotify(track):
log.debug("Input song is a Spotify URL")
# Let it generate metadata, YouTube doesn't know Spotify slang
meta_tags = spotify_tools.generate_metadata(track)
content = go_pafy(track, meta_tags)
if const.args.no_metadata:
meta_tags = None
else:
log.debug("Input song is plain text based")
if const.args.no_metadata:
content = go_pafy(track, meta_tags=None)
else:
content = None
meta_tags = spotify_tools.generate_metadata(track)
content = go_pafy(track, meta_tags=meta_tags)
meta_tags = fallback_metadata(meta_tags)
return content, meta_tags
def generate_metadata(content):
""" Fetch a song's metadata from YouTube. """
meta_tags = {"spotify_metadata": False,
"name": content.title,
"artists": [{"name": content.author}],
"duration": content.length,
"external_urls": {"youtube": content.watchv_url},
"album": {"images" : [{"url": content.getbestthumb()}], "name": None},
"year": content.published.split("-")[0],
"release_date": content.published.split(" ")[0],
"type": "track",
"disc_number": 1,
"track_number": 1,
"total_tracks": 1,
"publisher": None,
"external_ids": {"isrc": None},
"lyrics": None,
"copyright": None,
"genre": None,
}
return meta_tags
def get_youtube_title(content, number=None):
""" Get the YouTube video's title. """
title = content.title

View File

@@ -1,6 +1,7 @@
from spotdl import const
from spotdl import handle
from spotdl import spotdl
import urllib
import pytest
@@ -13,3 +14,13 @@ def load_defaults():
spotdl.log = const.logzero.setup_logger(
formatter=const._formatter, level=const.args.log_level
)
# GIST_URL is the monkeypatched version of: https://www.youtube.com/results?search_query=janji+-+heroes
# so that we get same results even if YouTube changes the list/order of videos on their page.
GIST_URL = "https://gist.githubusercontent.com/ritiek/e731338e9810e31c2f00f13c249a45f5/raw/c11a27f3b5d11a8d082976f1cdd237bd605ec2c2/search_results.html"
def monkeypatch_youtube_search_page(*args, **kwargs):
fake_urlopen = urllib.request.urlopen(GIST_URL)
return fake_urlopen

View File

@@ -1,4 +1,3 @@
import urllib
import subprocess
import os
@@ -39,7 +38,7 @@ def metadata_fixture():
def test_metadata(metadata_fixture):
expect_number = 23
expect_number = 24
assert len(metadata_fixture) == expect_number
@@ -54,16 +53,11 @@ class TestFileFormat:
assert title == EXPECTED_SPOTIFY_TITLE.replace(" ", "_")
def monkeypatch_youtube_search_page(*args, **kwargs):
fake_urlopen = urllib.request.urlopen(GIST_URL)
return fake_urlopen
def test_youtube_url(metadata_fixture, monkeypatch):
monkeypatch.setattr(
youtube_tools.GenerateYouTubeURL,
"_fetch_response",
monkeypatch_youtube_search_page,
loader.monkeypatch_youtube_search_page,
)
url = youtube_tools.generate_youtube_url(SPOTIFY_TRACK_URL, metadata_fixture)
assert url == EXPECTED_YOUTUBE_URL
@@ -73,7 +67,7 @@ def test_youtube_title(metadata_fixture, monkeypatch):
monkeypatch.setattr(
youtube_tools.GenerateYouTubeURL,
"_fetch_response",
monkeypatch_youtube_search_page,
loader.monkeypatch_youtube_search_page,
)
content = youtube_tools.go_pafy(SPOTIFY_TRACK_URL, metadata_fixture)
pytest.content_fixture = content
@@ -107,22 +101,16 @@ class TestDownload:
def test_m4a(self, monkeypatch, filename_fixture):
expect_download = True
monkeypatch.setattr(
"pafy.backend_shared.BaseStream.download", self.blank_audio_generator
)
download = youtube_tools.download_song(
filename_fixture + ".m4a", pytest.content_fixture
)
monkeypatch.setattr("pafy.backend_shared.BaseStream.download", self.blank_audio_generator)
monkeypatch.setattr("pafy.backend_youtube_dl.YtdlStream.download", self.blank_audio_generator)
download = youtube_tools.download_song(filename_fixture + ".m4a", pytest.content_fixture)
assert download == expect_download
def test_webm(self, monkeypatch, filename_fixture):
expect_download = True
monkeypatch.setattr(
"pafy.backend_shared.BaseStream.download", self.blank_audio_generator
)
download = youtube_tools.download_song(
filename_fixture + ".webm", pytest.content_fixture
)
monkeypatch.setattr("pafy.backend_shared.BaseStream.download", self.blank_audio_generator)
monkeypatch.setattr("pafy.backend_youtube_dl.YtdlStream.download", self.blank_audio_generator)
download = youtube_tools.download_song(filename_fixture + ".webm", pytest.content_fixture)
assert download == expect_download

35
test/test_patcher.py Normal file
View File

@@ -0,0 +1,35 @@
from spotdl import patcher
import pafy
import pytest
patcher.patch_pafy()
class TestPafyContentAvailable:
pass
class TestMethodAssignment:
def test_pafy_getbestthumb(self):
pafy.backend_shared.BasePafy.getbestthumb == patcher._getbestthumb
class TestMethodCalls:
@pytest.fixture(scope="module")
def content_fixture(self):
content = pafy.new("http://youtube.com/watch?v=3nQNiWdeH2Q")
return content
def test_pafy_getbestthumb(self, content_fixture):
thumbnail = patcher._getbestthumb(content_fixture)
assert thumbnail == "https://i.ytimg.com/vi/3nQNiWdeH2Q/maxresdefault.jpg"
def test_pafy_getbestthumb_without_ytdl(self, content_fixture):
content_fixture._ydl_info["thumbnails"][0]["url"] = None
thumbnail = patcher._getbestthumb(content_fixture)
assert thumbnail == "https://i.ytimg.com/vi/3nQNiWdeH2Q/maxresdefault.jpg"
def test_pafy_content_available(self):
TestPafyContentAvailable._content_available = patcher._content_available
assert TestPafyContentAvailable()._content_available("https://youtube.com/")

View File

@@ -2,6 +2,9 @@ from spotdl import spotify_tools
import os
import pytest
import loader
loader.load_defaults()
def test_generate_token():
@@ -23,7 +26,7 @@ class TestGenerateMetadata:
return metadata
def test_len(self, metadata_fixture):
assert len(metadata_fixture) == 23
assert len(metadata_fixture) == 24
def test_trackname(self, metadata_fixture):
assert metadata_fixture["name"] == "Spectre"

View File

@@ -17,7 +17,6 @@ YT_API_KEY = "AIzaSyAnItl3udec-Q1d5bkjKJGL-RgrKO_vU90"
TRACK_SEARCH = "Tony's Videos VERY SHORT VIDEO 28.10.2016"
EXPECTED_TITLE = TRACK_SEARCH
EXPECTED_YT_URL = "http://youtube.com/watch?v=qOOcy2-tmbk"
EXPECTED_YT_URLS = (EXPECTED_YT_URL, "http://youtube.com/watch?v=5USR1Omo7f0")
RESULT_COUNT_SEARCH = "she is still sleeping SAO"
@@ -71,8 +70,7 @@ class TestYouTubeURL:
def test_only_music_category(self, metadata_fixture):
const.args.music_videos_only = True
url = youtube_tools.generate_youtube_url(TRACK_SEARCH, metadata_fixture)
# YouTube keeps changing its results
assert url in EXPECTED_YT_URLS
assert url == EXPECTED_YT_URL
def test_all_categories(self, metadata_fixture):
const.args.music_videos_only = False
@@ -99,6 +97,56 @@ def content_fixture(metadata_fixture):
return content
# True = Metadata must be fetched from Spotify
# False = Metadata must be fetched from YouTube
# None = Metadata must be `None`
MATCH_METADATA_NO_FALLBACK_TEST_TABLE = [
("https://open.spotify.com/track/5nWduGwBGBn1PSqYTJUDbS", True),
("http://youtube.com/watch?v=3nQNiWdeH2Q", None),
("Linux Talk | Working with Drives and Filesystems", None)
]
MATCH_METADATA_FALLBACK_TEST_TABLE = [
("https://open.spotify.com/track/5nWduGwBGBn1PSqYTJUDbS", True),
("http://youtube.com/watch?v=3nQNiWdeH2Q", False),
("Linux Talk | Working with Drives and Filesystems", False)
]
MATCH_METADATA_NO_METADATA_TEST_TABLE = [
("https://open.spotify.com/track/5nWduGwBGBn1PSqYTJUDbS", None),
("http://youtube.com/watch?v=3nQNiWdeH2Q", None),
("Linux Talk | Working with Drives and Filesystems", None)
]
class TestMetadataOrigin:
def match_metadata(self, track, metadata_type):
_, metadata = youtube_tools.match_video_and_metadata(track)
if metadata_type is None:
assert metadata == metadata_type
else:
assert metadata["spotify_metadata"] == metadata_type
@pytest.mark.parametrize("track, metadata_type", MATCH_METADATA_NO_FALLBACK_TEST_TABLE)
def test_match_metadata_with_no_fallback(self, track, metadata_type, content_fixture, monkeypatch):
monkeypatch.setattr(youtube_tools, "go_pafy", lambda track, meta_tags: content_fixture)
const.args.no_fallback_metadata = True
self.match_metadata(track, metadata_type)
@pytest.mark.parametrize("track, metadata_type", MATCH_METADATA_FALLBACK_TEST_TABLE)
def test_match_metadata_with_fallback(self, track, metadata_type, content_fixture, monkeypatch):
monkeypatch.setattr(youtube_tools, "go_pafy", lambda track, meta_tags: content_fixture)
const.args.no_fallback_metadata = False
self.match_metadata(track, metadata_type)
@pytest.mark.parametrize("track, metadata_type", MATCH_METADATA_NO_METADATA_TEST_TABLE)
def test_match_metadata_with_no_metadata(self, track, metadata_type, content_fixture, monkeypatch):
monkeypatch.setattr(youtube_tools, "go_pafy", lambda track, meta_tags: content_fixture)
const.args.no_metadata = True
self.match_metadata(track, metadata_type)
@pytest.fixture(scope="module")
def title_fixture(content_fixture):
title = youtube_tools.get_youtube_title(content_fixture)
@@ -136,6 +184,26 @@ def test_check_exists(metadata_fixture, filename_fixture, tmpdir):
assert check == expect_check
def test_generate_m3u(tmpdir, monkeypatch):
monkeypatch.setattr(youtube_tools.GenerateYouTubeURL, "_fetch_response", loader.monkeypatch_youtube_search_page)
expect_m3u = (
"#EXTM3U\n\n"
"#EXTINF:208,Janji - Heroes Tonight (feat. Johnning) [NCS Release]\n"
"http://www.youtube.com/watch?v=3nQNiWdeH2Q\n"
"#EXTINF:226,Alan Walker - Spectre [NCS Release]\n"
"http://www.youtube.com/watch?v=AOeY-nDp7hI\n"
)
m3u_track_file = os.path.join(str(tmpdir), "m3u_test.txt")
with open(m3u_track_file, "w") as track_file:
track_file.write("\nhttps://open.spotify.com/track/3SipFlNddvL0XNZRLXvdZD")
track_file.write("\nhttp://www.youtube.com/watch?v=AOeY-nDp7hI")
youtube_tools.generate_m3u(m3u_track_file)
m3u_file = "{}.m3u".format(m3u_track_file.split(".")[0])
with open(m3u_file, "r") as m3u_in:
m3u = m3u_in.readlines()
assert "".join(m3u) == expect_m3u
class TestDownload:
def test_webm(self, content_fixture, filename_fixture, monkeypatch):
# content_fixture does not have any .webm audiostream